Repository: n8willis/opentype-shaping-documents
Branch: master
Commit: 8b6e9924c4ad
Files: 132
Total size: 2.5 MB

Directory structure:
gitextract_xxlse0rw/

├── .github/
│   └── workflows/
│       ├── build_html_output.yml
│       └── test_document_sources.yml
├── .gitignore
├── BUILD.md
├── Makefile
├── README.md
├── _ext/
│   ├── LICENSE.md
│   ├── README.md
│   ├── abbreviations.py
│   ├── colors.py
│   └── shapingdocs_svg_color_toggles.py
├── _global.md
├── _static/
│   ├── LICENSES_FOR_INCORPORATED_SOFTWARE.txt
│   ├── custom.css
│   ├── fonts/
│   │   ├── Source_Code_Pro/
│   │   │   ├── OFL.txt
│   │   │   └── README.txt
│   │   ├── Source_Sans_3/
│   │   │   ├── OFL.txt
│   │   │   └── README.txt
│   │   └── Source_Serif_4/
│   │       ├── OFL.txt
│   │       └── README.txt
│   ├── fontsizes.html
│   └── toggleSvgColors.js
├── _templates/
│   ├── layout.html
│   └── static_nav.html
├── _toc.yml
├── build-requirements.txt
├── character-tables/
│   ├── README.md
│   ├── character-tables-arabic.md
│   ├── character-tables-bengali.md
│   ├── character-tables-devanagari.md
│   ├── character-tables-gujarati.md
│   ├── character-tables-gurmukhi.md
│   ├── character-tables-hangul.md
│   ├── character-tables-hebrew.md
│   ├── character-tables-kannada.md
│   ├── character-tables-khmer.md
│   ├── character-tables-lao.md
│   ├── character-tables-malayalam.md
│   ├── character-tables-mongolian.md
│   ├── character-tables-myanmar.md
│   ├── character-tables-nko.md
│   ├── character-tables-oriya.md
│   ├── character-tables-sinhala.md
│   ├── character-tables-syriac.md
│   ├── character-tables-tamil.md
│   ├── character-tables-telugu.md
│   ├── character-tables-thai.md
│   ├── character-tables-tibetan.md
│   └── index.md
├── conf.py
├── errata.md
├── images/
│   ├── arabic/
│   │   ├── arabic-png-image-generation-log.md
│   │   └── arabic-svg-image-generation-log.md
│   ├── bengali/
│   │   ├── bengali-png-image-generation-log.md
│   │   └── bengali-svg-image-generation-log.md
│   ├── devanagari/
│   │   ├── devanagari-png-image-generation-log.md
│   │   └── devanagari-svg-image-generation-log.md
│   ├── emoji/
│   │   └── emoji-png-image-generation-log.md
│   ├── example-fonts.txt
│   ├── gujarati/
│   │   ├── gujarati-png-image-generation-log.md
│   │   └── gujarati-svg-image-generation-log.md
│   ├── gurmukhi/
│   │   ├── gurmukhi-png-image-generation-log.md
│   │   └── gurmukhi-svg-image-generation-log.md
│   ├── hangul/
│   │   ├── hangul-png-image-generation-log.md
│   │   └── hangul-svg-image-generation-log.md
│   ├── hebrew/
│   │   ├── hebrew-png-image-generation-log.md
│   │   └── hebrew-svg-image-generation-log.md
│   ├── images-index.md
│   ├── kannada/
│   │   ├── kannada-png-image-generation-log.md
│   │   └── kannada-svg-image-generation-log.md
│   ├── khmer/
│   │   ├── khmer-png-image-generation-log.md
│   │   └── khmer-svg-image-generation-log.md
│   ├── malayalam/
│   │   ├── malayalam-png-image-generation-log.md
│   │   └── malayalam-svg-image-generation-log.md
│   ├── mongolian/
│   │   ├── mongolian-png-image-generation-log.md
│   │   └── mongolian-svg-image-generation-log.md
│   ├── myanmar/
│   │   ├── myanmar-png-image-generation-log.md
│   │   └── myanmar-svg-image-generation-log.md
│   ├── nko/
│   │   ├── nko-png-image-generation-log.md
│   │   └── nko-svg-image-generation-log.md
│   ├── oriya/
│   │   ├── oriya-png-image-generation-log.md
│   │   └── oriya-svg-image-generation-log.md
│   ├── sinhala/
│   │   ├── sinhala-png-image-generation-log.md
│   │   └── sinhala-svg-image-generation-log.md
│   ├── syriac/
│   │   ├── syriac-png-image-generation-log.md
│   │   └── syriac-svg-image-generation-log.md
│   ├── tamil/
│   │   ├── tamil-png-image-generation-log.md
│   │   └── tamil-svg-image-generation-log.md
│   ├── telugu/
│   │   ├── telugu-png-image-generation-log.md
│   │   └── telugu-svg-image-generation-log.md
│   ├── thai-lao/
│   │   ├── thai-lao-png-image-generation-log.md
│   │   └── thai-lao-svg-image-generation-log.md
│   └── tibetan/
│       ├── tibetan-png-image-generation-log.md
│       └── tibetan-svg-image-generation-log.md
├── index.md
├── make.bat
├── notes/
│   ├── README.md
│   ├── emoji-implementation.md
│   ├── index.md
│   ├── ragel-machine-notation.md
│   └── uniscribe-bug-compatibility.md
├── opentype-shaping-arabic-general.md
├── opentype-shaping-arabic.md
├── opentype-shaping-bengali.md
├── opentype-shaping-default.md
├── opentype-shaping-devanagari.md
├── opentype-shaping-emoji.md
├── opentype-shaping-gujarati.md
├── opentype-shaping-gurmukhi.md
├── opentype-shaping-hangul.md
├── opentype-shaping-hebrew.md
├── opentype-shaping-indic-general.md
├── opentype-shaping-kannada.md
├── opentype-shaping-khmer.md
├── opentype-shaping-malayalam.md
├── opentype-shaping-mongolian.md
├── opentype-shaping-myanmar.md
├── opentype-shaping-nko.md
├── opentype-shaping-normalization.md
├── opentype-shaping-oriya.md
├── opentype-shaping-sinhala.md
├── opentype-shaping-syriac.md
├── opentype-shaping-tamil.md
├── opentype-shaping-telugu.md
├── opentype-shaping-thai-lao.md
├── opentype-shaping-tibetan.md
├── opentype-shaping-use.md
├── opentype-shaping-vedic-extensions.md
├── overview.md
└── test/
    ├── spellcheck.yml
    ├── spellcheck_html.yml
    └── wordlist.txt

================================================
FILE CONTENTS
================================================

================================================
FILE: .github/workflows/build_html_output.yml
================================================
name: "Build HTML output"
on:
- push

jobs:
  html:
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v6
      - name: Set up Python environment
        uses: actions/setup-python@v6
        with: 
          python-version: "3.10"
      - name: Update pip
        run: |
          python -m pip install --upgrade pip
      - name: Install dependencies
        run: |
          if [ -f build-requirements.txt ]; then pip install -r build-requirements.txt; fi
      - name: Build HTML
        uses: rickstaa/sphinx-action@master
        with:
          docs-folder: "."
      - uses: actions/upload-artifact@v6
        with:
          name: ShapingDocumentsHTML
          path: _build/html/


================================================
FILE: .github/workflows/test_document_sources.yml
================================================
name: "Test document sources"
on:
- push

jobs:
  linkcheck:
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v6
      - name: Set up Python environment
        uses: actions/setup-python@v6
        with: 
          python-version: "3.10"
      - name: Update pip
        run: |
          python -m pip install --upgrade pip
      - name: Install dependencies
        run: |
          if [ -f build-requirements.txt ]; then pip install -r build-requirements.txt; fi
      - name: Check links
        uses: rickstaa/sphinx-action@master
        with:
          docs-folder: "."
          build-command: "sphinx-build -M linkcheck . _build"


================================================
FILE: .gitignore
================================================
# Ignore general
*~

# Ignore Unicode PDFs & misc resources
reference/
_build/

# Ignore auto-generated binary spelling dictionary
dictionary.dic


================================================
FILE: BUILD.md
================================================
# Building a local copy of these documents #

A local, static-HTML version of these documents can be built with the
[Sphinx](https://www.sphinx-doc.org/) generator.

The Sphinx-related files in the repository are:
```
config.py
index.rst
make.bat
Makefile
```

plus the directories
```
_build/
_static/
_templates/
_ext/
```

## Sphinx ##

Sphinx is a Python-based utility that you will need to install on your
local machine. The official [installation
guide](https://www.sphinx-doc.org/en/master/usage/quickstart.html)
covers what is necessary for a variety of OSes and
environments. Perhaps the easiest approach is to install Sphinx in a
Python [virtual
environment](https://www.sphinx-doc.org/en/master/usage/installation.html#using-virtual-environments). 

After installing Sphinx itself, you will also need to install the
[MyST-Parser](https://myst-parser.readthedocs.io/en/latest/) package,
which enables Sphinx to process Markdown files. Using the
virtual-environment installation method, you can keep both of these
packages contained for this project.

At the moment there are three other dependencies involved, all of which
are Sphinx-extension packages:

1. [Sphinx-multitoc-numbering](https://sphinx-multitoc-numbering.readthedocs.io/),
   which is required to make Sphinx use a continuous numbering scheme
   across the files

2. [sphinx External TOC](https://sphinx-external-toc.readthedocs.io/),
   which is required to define the navigation sidebar declaratively
   (see the [TOCtrees](#toctrees) subsection below for more info)

3. [sphinx inline svg](https://pypi.org/project/sphinx-inline-svg/),
   which is used to implement the user-togglable cluster colors on the
   illustrations of feature application.

but a full `build-requirements.txt` file is included in the repository
that lists the packages in the author's virtual environment. You
shouldn't _need_ to utilize it, since just installing Sphinx,
MyST-Parser, and the extensions ought to suffice, but it is there if
required.

The build also uses a custom Sphinx extension, named
`shapingdocs_svg_color_toggles`, to generate the HTML elements used to
do the color toggling. This extension is included in the `_ext/`
directory of this repository and is called from that location,
however, so you do not need to install it separately.


## Building HTML documents ##

With the Sphinx and MyST-Parser packages installed, go to the
top-level directory of this repository in a shell or
terminal. Building the HTML documents should only take two steps:

1. Run `make clean` to clear out all temporary files from previous
   run. Do this every time.

2. Run `make html` to regenerate the HTML files. The output files will
   be written to the `_build/html/` subdirectory.


## Test suite ##

The `test/` directory contains test elements. At the moment, all of
the tests are run manually, but as the kinks are worked out they may
be rolled into either Git hooks or GitHub Actions, so it is advisable
to start using them. Currently there are two tests available as
Makefile targets: 

1. Run `make linktest` to run checks on all of the URLs found in the
   documents. 
   
2. Run `make spellcheck` to run spellchecking on the documents.

You can also run `make test` to run both tests in sequence.

The spell-checking is configured in `test/spellcheck.yaml`. It uses
the [PySpelling](https://facelessuser.github.io/pyspelling/) package
with the custom wordlist at `test/wordlist.txt`.

There are a few lingering peculiarities to PySpelling (most notably,
it supports excluding specified HTML elements, but cannot exclude
`<table>` elements because of a discrepancy between its built-in
Markdown converter and Sphinx, so at present the `character-tables/`
directory returns a great many false hits from the Unicode codepoint
names in the tables). The plan is to iron those out, then run both the
spell-checking and link-checking tests automatically on all pull
requests. When that is implemented, it will be documented here.


## Editing and bugfixing ##

The static-HTML version of the docs are a work-in-progress at the
moment, so please do poke around for problems and report any bugs.

Basic Sphinx configuration is done in the `config.py` file.

The HTML output documents are currently using the "Alabaster" theme,
which comes preinstalled. The Alabaster theme accepts several
configuration options which are also kept in `config.py`. Output
customization for the theme is also tweaked in the `custom.css` file
in the `_static/` subdirectory (just be sure you edit the one in
`_static/` itself; whenever the docs are rebuilt with `make`, that
file also gets copied into `_build/html/_static/`, so don't edit that
copy of the file since it gets overwritten).

To report a suspected typo or to suggest a general wording change,
please first synchronize your local repository with `git pull`. Then
do a `make clean`/`make html` as described in the section above.


### TOCtrees ###

Sphinx, by default, is hardcoded in a way that requires all documents
to be referenced in a separate, Sphinx-specific `toctree` structure,
after which the navigation sidebars (and other elements) are generated
on-the-fly at build-time by Sphinx itself.

The current documents are using a third-party extension that defines
the "TOCtree" in a declarative YAML file instead, to work around some
undesirable outputs -- mainly in the GitHub repository views -- that
Sphinx triggers with its on-the-fly `toctree` process.

But this approach isn't (yet?) perfect. Some files (namely this one,
`BUILD.md`, and the image-generation-log files) are manually excluded
from the build process so that they do not generate a flurry of
warning messages. That's deliberate, because the build instructions
and log files are metadata and aren't part of the final documentation
set itself.


================================================
FILE: Makefile
================================================
# Minimal makefile for Sphinx documentation
#

# You can set these variables from the command line, and also
# from the environment for the first two.
SPHINXOPTS    ?=
SPHINXBUILD   ?= sphinx-build
SOURCEDIR     = .
BUILDDIR      = _build
PYSPELLING    = pyspelling
PYSPELLINGMARKDOWNCONF = test/spellcheck.yml
PYSPELLINGHTMLCONF = test/spellcheck_html.yml

# Put it first so that "make" without argument is like "make help".
help:
	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)

.PHONY: help Makefile

# Run tests on links and spelling
test: linktest spellcheck

# Use Sphinx's built-in link checker
linktest:
	@$(SPHINXBUILD) -M linkcheck "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)

# Use PySpelling
spellcheck:
	@$(PYSPELLING) -c "$(PYSPELLINGMARKDOWNCONF)"

# Use PySpelling
htmlspellcheck:
	@$(PYSPELLING) -c "$(PYSPELLINGHTMLCONF)"

# Catch-all target: route all unknown targets to Sphinx using the new
# "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
%: Makefile
	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)


================================================
FILE: README.md
================================================
# OpenType shaping documents #

Sponsored by [YesLogic](https://yeslogic.com/) 

_<aside>Thanks also to the developers of HarfBuzz and AllSorts, plus many other font engineers and text-encoding experts for their generosity of time and insightful contributions.</aside>_

## &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&#127366; &#127344; &#127361; &#127357; &#127352; &#127357; &#127350; ##
>
> This repository is an active WORK IN PROGRESS.
>
> NONE of the documents you currently see here are complete
> nor are they suitable for reference. PLEASE do not use
> them as a guide or as a general information source.
>
> As long as this warning text remains visible, the above 
> holds true. 

These documents are meant to provide a functional specification for
text shaping. The expectation is that an implementer of this
specification will be using fonts in the OpenType font format applied
to input text that complies with Unicode.

At present, we are seeking comments and bugfixes. Interested readers
and contributors can begin at the

  - Indic Model ([general information](opentype-shaping-indic-general.md))
    - Scripts covered: [Devanagari](opentype-shaping-devanagari.md), [Bengali](opentype-shaping-bengali.md), [Gujarati](opentype-shaping-gujarati.md), [Gurmukhi](opentype-shaping-gurmukhi.md), [Kannada](opentype-shaping-kannada.md), [Malayalam](opentype-shaping-malayalam.md),
      [Oriya](opentype-shaping-oriya.md), [Tamil](opentype-shaping-tamil.md), [Telugu](opentype-shaping-telugu.md), [Sinhala](opentype-shaping-sinhala.md)
  - Arabic Model ([general information](opentype-shaping-arabic-general.md)
    - Scripts covered: [Arabic](opentype-shaping-arabic.md), [N'Ko](opentype-shaping-nko.md), [Syriac](opentype-shaping-syriac.md), [Mongolian](opentype-shaping-mongolian.md)
  - [Hangul](opentype-shaping-hangul.md)
  - [Hebrew](opentype-shaping-hebrew.md)
  - [Khmer](opentype-shaping-khmer.md)
  - [Myanmar](opentype-shaping-myanmar.md)
  - [Thai and Lao](opentype-shaping-thai-lao.md)
  - [Tibetan](opentype-shaping-tibetan.md)
  - [Universal Shaping Engine (<abbr>USE</abbr>)](opentype-shaping-use.md)
    - All complex scripts that are not handled by a dedicated
      script-specific shaping model
  - [Default](opentype-shaping-default.md)
    - All non-complex scripts
  - [Emoji](opentype-shaping-emoji.md)
    - Emoji sequences do not constitute a separate shaping model,
      but handling emoji sequences can incorporate many of the same
      Opentype mechanisms and should not be overlooked
  
shaping documents and are encouraged to submit their feedback
on the text or images of any of the linked scripts. The documents are
organized by script; where there are multiple shaping models for a
particular script (including deprecated models), the various models are
all addressed in the same script-specific document.

The documents also include a description of
[normalization](opentype-shaping-normalization.md) in the OpenType
shaping context, which differs from Unicode normalization in several
respects.

Various [notes](notes/README.md) about the document set and the details
of its scope, limitations, and quirks are also provided.

Some [errata](errata.md) about the "upstream" specifications and
reference documents are noted separately. 

In its final form, this repository will hold documentation describing
the shaping behavior used for layout of OpenType text. In particular,
it will focus on complex scripts.

In addition to the primary, per-script documents, implementers and
other interesteed readers are encouraged to check the
[character tables](character-tables/README.md) for correctness and to
examine the [image-generation logs](/images/README.md) to identify
issues seen in the inline images.

### References

These documents cite the following informative references:

1. The Microsoft [Script development
   specifications](https://docs.microsoft.com/en-us/typography/script-development/standard),
   which document the behaviors expected for OpenType Layout fonts and
   provide guidance &amp; examples for type designers. OpenType is a
   registered trademark of Microsoft Corporation. 
2. Related portions of the Microsoft OpenType specification, such as the
   [OpenType Layout tag
   registry](https://docs.microsoft.com/en-us/typography/opentype/spec/ttoreg)
   and [OpenType Layout common table
   formats](https://docs.microsoft.com/en-us/typography/opentype/spec/chapter2),
   which list and define feature tags, script &amp; language tags, and
   other internals of compliant OpenType font binaries. OpenType is a
   registered trademark of Microsoft Corporation. 
3. The [HarfBuzz](https://github.com/harfbuzz/harfbuzz) project, which
   includes a free-software/open-source implementation of OpenType
   Layout shaping with full source code and documentation. 
4. The [AllSorts](https://github.com/yeslogic/allsorts) project, which
   includes a free-software/open-source implementation of OpenType
   Layout shaping with full source code and documentation.
5. The [Unicode
   Standard](http://www.unicode.org/standard/standard.html) and
   related Unicode Consortium projects such as the [Unicode Character
   Database](http://www.unicode.org/reports/tr44/), which defines
   Unicode code points and formal character properties used in
   shaping. Unicode and the Unicode Logo are registered trademarks of
   Unicode, Inc. in the United States and other countries.
6. The YesLogic [text corpus](https://github.com/yeslogic/corpus),
   which includes real-world text data for several Indic scripts,
   scraped from Wikipedia, Reddit, and multiple online news
   sources. This data is used to test shaping in AllSorts and Prince.
7. Known but unofficial information about other shaping-engine
   projects. Primarily this includes tests and reproducible issues
   found via [HarfBuzz](https://github.com/harfbuzz/harfbuzz), because
   HarfBuzz intentionally aims to produce results that will 100% match
   the output of Microsoft Uniscribe (not counting cases where
   Uniscribe's output is known to be incorrect, of course).
   > Note: occasionally, tests or issues documenting the behavior of
   > Apple CoreText are also included, but CoreText compatibility is
   > not an explicit goal for HarfBuzz.
   

================================================
FILE: _ext/LICENSE.md
================================================
# License for shapingdocs Sphinx extension software

Unless otherwise indicated, all code in this directory is licensed
under the two-clause BSD license below. This license does _not_ apply
to code or other files found in parent, sibling, and other directories
within this repository.

Copyright 2025 Nathan Willis.

Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:

1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.

2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.

THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS “AS IS” AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


================================================
FILE: _ext/README.md
================================================
# Sphinx extensions and related build tools

This directory holds custom extensions used by the Sphinx builder for
this set of documents, and a few somewhat-related Python utilities.

The contents of this directory are licensed under the 2-Clause BSD
license. This is the license used by the Sphinx project, and was
chosen here in order to maximize compatibility.

See the accompanying [LICENSE](LICENSE.md) file.

Note that the license does **not** apply to this documentation project
as a whole, but only to the contents of this directory.


================================================
FILE: _ext/abbreviations.py
================================================
# SPDX-FileCopyrightText: Copyright 2025 Nathan Willis
#
# SPDX-License-Identifier: BSD-2-Clause
"""Dictionary of the acronyms or abbreviations and corresponding full-text expansions used in the documents.
"""

# Dictionary mapping the set of acronyms and abbreviations
# that get wrapped in <abbr> tags in the generated output
# to the corresponding full-expansion text for each.
#
# Perhaps obviously, the keys were used to identify and
# tag acronyms with <abbr> in the document source. In some,
# but not all, cases the full expansions are also used.
ABBR_STRING_MAP = {
    "AAT": "Apple Advanced Typography",
    "AJT": "Arabic Joining Type",
    "AMTRA": "Arabic Mark Transient Reordering Algorithm",
    "Ccc": "Canonical Combining Class",
    "CEK": "Combining Enclosing Keycap",
    "CGJ": "Combining Grapheme Joiner",
    "CLDR": "Common Locale Data Repository",
    "CSS": "Cascading Style Sheets",
    "GDEF": "Glyph Definition table",
    "GPOS": "Glyph Positioning table",
    "GSUB": "Glyph Substitution table",
    "LRM": "Left-to-Right Mark",
    "LTR": "Left-To-Right",
    "MCM": "Modifier Combining Mark",
    "NBSP": "No-Break Space",
    "NFC": "Normalization Form C",
    "NFD": "Normalization Form D",
    "NFKC": "Normalization Form KC",
    "NFKD": "Normalization Form KD",
    "PNG": "Portable Network Graphics",
    "PUA": "Private Use Area",
    "RGI": "Recommended for General Interchange",
    "RLM": "Right-to-Left Mark",
    "RTL": "Right-To-Left",
    "SHA": "Secure Hash Algorithm",
    "SVG": "Scalable Vector Graphics",
    "UCD": "Unicode Character Database",
    "UCDM": "Unicode Character Decomposition Mapping",
    "UGC": "Unicode General Category",
    "UIPC": "Unicode Indic Positional Category",
    "UISC": "Unicode Indic Syllabic Category",
    "UJT": "Unicode Joining Type",
    "URL": "Uniform Resource Locator",
    "USE": "Universal Shaping Engine",
    "ZWJ": "Zero-Width Joiner",
    "ZWNJ": "Zero-Width Non Joiner",
}


================================================
FILE: _ext/colors.py
================================================
# SPDX-FileCopyrightText: Copyright 2025 Nathan Willis
#
# SPDX-License-Identifier: BSD-2-Clause
"""The sequence of #RRGGBB colors used in the colorized SVG illustration images.
"""

# Defines the color sequence used to colorize clusters in the SVG illustration
# images.
#
# It is based on the G10 sequence employed by Plotly, as visible at:
# https://plotly.com/python/discrete-color/#color-sequences-in-plotly-express
#
# This sequence is chosen because it is generally consistent in value and it
# does not include any greys.
COLOR_LIST = [ "#3366cc", "#dc3912", "#ff9900", "#109618", "#990099", "#0099c6", "#dd4477", "#66aa00", "#b82e2e", "#316395",]


================================================
FILE: _ext/shapingdocs_svg_color_toggles.py
================================================
# SPDX-FileCopyrightText: Copyright 2025 Nathan Willis
#
# SPDX-License-Identifier: BSD-2-Clause

"""Sphinx extension to attach a color-toggle button to specified SVG elements in documents.

   This extension only affects the `html` builder.
"""

from __future__ import annotations

from docutils import nodes

from docutils.parsers.rst import directives

from sphinx.util import logging

from sphinx.application import Sphinx
from sphinx.util.docutils import SphinxDirective, SphinxTranslator, SphinxRole
#from sphinx.util.typing import ExtensionMetadata

from sphinx.writers.html import HTMLTranslator


# Directive to insert color-toggle button
#
#   Example output:
#     <p>
#        <button class="svg-color-toggle-button" id="button-bengali-akhn-kssa" onclick="toggleColor('bengali-akhn-kssa')">
#      Toggle cluster colors</button>
#     </p>
#


class svg_color_toggle_node(nodes.General, nodes.Element):
    """SVG color-toggle node."""

    pass


class SVGColorToggleButton(SphinxDirective):
    """A directive to insert a color-toggle switch for 
       an SVG element with the specified id."""

    has_content = True
    required_arguments = 1
    optional_arguments = 0
    
    def run(self) -> list[svg_color_toggle_node]:

        # The sole argument is the CSS element id to build
        # the button for
        svg_element = self.arguments[0]
        
        return [
            svg_color_toggle_node(
                target_id = svg_element,
                button_id = "button-" + svg_element,
                button_klass = "svg-color-toggle-button",
                button_label = "Toggle cluster colors",
                )
            ]


def visit_svg_color_toggle_node_html(translator: HTMLTranslator, node: svg_color_toggle_node) -> None:
    """Entry point of the SVG color-toggle node."""
    html: str = ""

    if node["target_id"]:
        html += '<button class="' + node["button_klass"] + '" id="' + node["button_id"] + '" onclick=\'toggleColor(\"' + node["target_id"] + '\")\'>' + node["button_label"]
    else:
        pass

    translator.body.append(html)


def depart_svg_color_toggle_node_html(translator: HTMLTranslator, node: svg_color_toggle_node) -> None:
    """Exit from the SVG color-toggle node."""

    html: str = ""

    if node["target_id"]:
        html += '</button>'

    translator.body.append(html)


def visit_svg_color_toggle_node_unsupported(translator: SphinxTranslator, node: svg_color_toggle_node) -> None:
    """Entry point of the ignored SVG color-toggle node."""
    logger.warning(
        f"SVG color-toggle {node['target_id']}: unsupported output format (node skipped)"
    )
    raise nodes.SkipNode


def setup(app: Sphinx): # The ExtensionMetadata stuff is not available in this version of Sphinx. Sphinx's own docs are pretty terrible about clarifying these matters....
#def setup(app: Sphinx) -> ExtensionMetadata:

    app.add_node(
        svg_color_toggle_node,
        html=(visit_svg_color_toggle_node_html, depart_svg_color_toggle_node_html),
        epub=(visit_svg_color_toggle_node_unsupported, None),
        latex=(visit_svg_color_toggle_node_unsupported, None),
        man=(visit_svg_color_toggle_node_unsupported, None),
        texinfo=(visit_svg_color_toggle_node_unsupported, None),
        text=(visit_svg_color_toggle_node_unsupported, None),
    )
    app.add_directive("svg-color-toggle-button", SVGColorToggleButton)
    
    return {
        'version': '0.1',
        'parallel_read_safe': True,
        'parallel_write_safe': True,
        }


================================================
FILE: _global.md
================================================
```{role} togglebutton(raw)
:format: html
```

```{raw} html

<link rel="preload" href="/images/color-filters.svg" as="image"/>
```


================================================
FILE: _static/LICENSES_FOR_INCORPORATED_SOFTWARE.txt
================================================
This documentation set includes files originating from the following
upstream projects, which are each subject to their own individual
licenses and are copyrighted by their own respective authors.

These respective copyright statements and licenses are reproduced
below or, for those cases where the license is bundled as a separate
file, referenced by file name.


Sphinx
//     https://www.sphinx-doc.org/
//     
//     :copyright: Copyright 2007-2022 by the Sphinx team, see AUTHORS.
//     :license: BSD, see LICENSE for details.
//
//     https://github.com/sphinx-doc/sphinx/blob/master/LICENSE
//
//      - License for Sphinx
//        ==================
//        
//        Unless otherwise indicated, all code in the Sphinx project is licenced under the
//        two clause BSD licence below.
//        
//        Copyright (c) 2007-2023 by the Sphinx team (see AUTHORS file).
//        All rights reserved.
//        
//        Redistribution and use in source and binary forms, with or without
//        modification, are permitted provided that the following conditions are
//        met:
//        
//        * Redistributions of source code must retain the above copyright
//          notice, this list of conditions and the following disclaimer.
//        
//        * Redistributions in binary form must reproduce the above copyright
//          notice, this list of conditions and the following disclaimer in the
//          documentation and/or other materials provided with the distribution.
//        
//        THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
//        "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
//        LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
//        A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
//        HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
//        SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
//        LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
//        DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
//        THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
//        (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
//        OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//        
//        
//        Licenses for incorporated software
//        ==================================
//        
//        The included implementation of NumpyDocstring._parse_numpydoc_see_also_section
//        was derived from code under the following license:
//        
//        -------------------------------------------------------------------------------
//        
//        Copyright (C) 2008 Stefan van der Walt <stefan@mentat.za.net>, Pauli Virtanen <pav@iki.fi>
//        
//        Redistribution and use in source and binary forms, with or without
//        modification, are permitted provided that the following conditions are
//        met:
//        
//         1. Redistributions of source code must retain the above copyright
//            notice, this list of conditions and the following disclaimer.
//         2. Redistributions in binary form must reproduce the above copyright
//            notice, this list of conditions and the following disclaimer in
//            the documentation and/or other materials provided with the
//            distribution.
//        
//        THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
//        IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
//        WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
//        DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
//        INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
//        (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
//        SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
//        HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
//        STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
//        IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
//        POSSIBILITY OF SUCH DAMAGE.
//        
//        -------------------------------------------------------------------------------

./_build/html/_static/searchtools.js
./_build/html/_static/language_data.js
./_build/html/_static/sphinx_highlight.js
./_build/html/_static/basic.css:
./_build/html/_static/doctools.js
./_build/html/_static/documentation_options.js
./_build/html/_static/_sphinx_javascript_frameworks_compat.js
./_build/html/objects.inv
./_build/html/searchindex.js


Sphinx 'Alabaster" theme
//     https://github.com/sphinx-doc/alabaster
//     Copyright (c) 2020 Jeff Forcier.
//     
//     https://github.com/sphinx-doc/alabaster/blob/0.x/LICENSE
//     
//      - Based on original work copyright (c) 2011 Kenneth Reitz and
//        copyright (c) 2010 Armin Ronacher.
//        
//        Some rights reserved.
//        
//        Redistribution and use in source and binary forms of the theme, with or
//        without modification, are permitted provided that the following conditions
//        are met:
//        
//        * Redistributions of source code must retain the above copyright
//          notice, this list of conditions and the following disclaimer.
//        
//        * Redistributions in binary form must reproduce the above
//          copyright notice, this list of conditions and the following
//          disclaimer in the documentation and/or other materials provided
//          with the distribution.
//        
//        * The names of the contributors may not be used to endorse or
//          promote products derived from this software without specific
//          prior written permission.
//        
//        THIS THEME IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
//        AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
//        IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
//        ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
//        LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
//        CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
//        SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
//        INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
//        CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
//        ARISING IN ANY WAY OUT OF THE USE OF THIS THEME, EVEN IF ADVISED OF THE
//        POSSIBILITY OF SUCH DAMAGE.

./_build/html/_static/alabaster.css


jQuery JavaScript Library v3.6.0
//     https://jquery.com/
//     
//     Copyright OpenJS Foundation and other contributors
//     Released under the MIT license
//     https://jquery.org/license
//     
//     Date: 2021-03-02T17:08Z
//
//     https://github.com/jquery/jquery/blob/3.6-stable/LICENSE.txt
//
//      - Copyright OpenJS Foundation and other contributors, https://openjsf.org/
//        
//        Permission is hereby granted, free of charge, to any person obtaining
//        a copy of this software and associated documentation files (the
//        "Software"), to deal in the Software without restriction, including
//        without limitation the rights to use, copy, modify, merge, publish,
//        distribute, sublicense, and/or sell copies of the Software, and to
//        permit persons to whom the Software is furnished to do so, subject to
//        the following conditions:
//        
//        The above copyright notice and this permission notice shall be
//        included in all copies or substantial portions of the Software.
//        
//        THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
//        EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
//        MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
//        NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
//        LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
//        OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
//        WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
//
//
//     Includes Sizzle.js
//     https://sizzlejs.com/
//     Sizzle CSS Selector Engine v2.3.6
//     https://sizzlejs.com/
//     
//     Copyright JS Foundation and other contributors
//     Released under the MIT license
//     https://js.foundation/
//     
//     Date: 2021-02-16
//
//     https://github.com/jquery/sizzle/blob/main/LICENSE.txt
//
//      - Copyright JS Foundation and other contributors, https://js.foundation/
//        
//        This software consists of voluntary contributions made by many
//        individuals. For exact contribution history, see the revision history
//        available at https://github.com/jquery/sizzle
//        
//        The following license applies to all parts of this software except as
//        documented below:
//        
//        ====
//        
//        Permission is hereby granted, free of charge, to any person obtaining
//        a copy of this software and associated documentation files (the
//        "Software"), to deal in the Software without restriction, including
//        without limitation the rights to use, copy, modify, merge, publish,
//        distribute, sublicense, and/or sell copies of the Software, and to
//        permit persons to whom the Software is furnished to do so, subject to
//        the following conditions:
//        
//        The above copyright notice and this permission notice shall be
//        included in all copies or substantial portions of the Software.
//        
//        THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
//        EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
//        MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
//        NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
//        LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
//        OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
//        WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
//        
//        ====
//        
//        All files located in the node_modules and external directories are
//        externally maintained libraries used by this software which have their
//        own licenses; we recommend you read them, as their terms may differ from
//        the terms above.

./_build/html/_static/jquery-3.6.0.js: MIT License
./_build/html/_static/jquery.js


Pygments
//     https://pygments.org/
//
//     Copyright (c) 2006-2022 by the respective authors (see AUTHORS file).
//      - https://github.com/pygments/pygments/blob/master/AUTHORS
//     
//     https://github.com/pygments/pygments/blob/master/LICENSE
//
//      - Copyright (c) 2006-2022 by the respective authors (see AUTHORS file).
//        All rights reserved.
//        
//        Redistribution and use in source and binary forms, with or without
//        modification, are permitted provided that the following conditions are
//        met:
//        
//        * Redistributions of source code must retain the above copyright
//          notice, this list of conditions and the following disclaimer.
//        
//        * Redistributions in binary form must reproduce the above copyright
//          notice, this list of conditions and the following disclaimer in the
//          documentation and/or other materials provided with the distribution.
//        
//        THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
//        "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
//        LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
//        A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
//        OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
//        SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
//        LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
//        DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
//        THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
//        (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
//        OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

./_build/html/_static/pygments.css


Underscore.js 1.13.1
//     https://underscorejs.org
//
//     (c) 2009-2021 Jeremy Ashkenas, Julian Gonggrijp, and
//     DocumentCloud and Investigative Reporters & Editors 
//     Underscore may be freely distributed under the MIT license.
//
//     https://github.com/jashkenas/underscore/blob/master/LICENSE
//
//      - Copyright (c) 2009-2022 Jeremy Ashkenas, Julian Gonggrijp,
//        and DocumentCloud and Investigative Reporters & Editors 
//        
//        Permission is hereby granted, free of charge, to any person
//        obtaining a copy of this software and associated documentation
//        files (the "Software"), to deal in the Software without
//        restriction, including without limitation the rights to use,
//        copy, modify, merge, publish, distribute, sublicense, and/or sell
//        copies of the Software, and to permit persons to whom the
//        Software is furnished to do so, subject to the following
//        conditions:
//        
//        The above copyright notice and this permission notice shall be
//        included in all copies or substantial portions of the Software.
//        
//        THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
//        EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
//        OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
//        NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
//        HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
//        WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
//        FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
//        OTHER DEALINGS IN THE SOFTWARE.

./_build/html/_static/underscore-1.13.1.js MIT License
./_build/html/_static/underscore.js MIT License


Source Code Pro
//     https://github.com/adobe-fonts/source-code-pro
//
//     Copyright 2010, 2012 Adobe Systems Incorporated
//     (http://www.adobe.com/), with Reserved Font Name 'Source'. All Rights
//     Reserved. Source is a trademark of Adobe Systems Incorporated in the
//     United States and/or other countries. 
//     
//     This Font Software is licensed under the SIL Open Font License, Version 1.1.

./_build/html/_static/fonts/Source_Code_Pro/OFL.txt: SIL Open Font License 1.1
./_build/html/_static/fonts/Source_Code_Pro/README.txt
./_build/html/_static/fonts/Source_Code_Pro/SourceCodePro-Italic-VariableFont_wght.ttf
./_build/html/_static/fonts/Source_Code_Pro/SourceCodePro-VariableFont_wght.ttf


Source Sans 3
//     https://github.com/adobe-fonts/source-sans
//
//     Copyright 2010-2020 Adobe (http://www.adobe.com/), with
//     Reserved Font Name 'Source'. All Rights Reserved. Source is a
//     trademark of Adobe in the United States and/or other countries. 
//     
//     This Font Software is licensed under the SIL Open Font License, Version 1.1.

./_build/html/_static/fonts/Source_Sans_3/OFL.txt: SIL Open Font License 1.1
./_build/html/_static/fonts/Source_Sans_3/README.txt
./_build/html/_static/fonts/Source_Sans_3/SourceSans3-Italic-VariableFont_wght.ttf
./_build/html/_static/fonts/Source_Sans_3/SourceSans3-VariableFont_wght.ttf


Source Serif 4
//     https://github.com/adobe-fonts/source-serif
//
//     https://github.com/adobe-fonts/source-serif/blob/release/LICENSE.md
//
//      - Copyright 2014 - 2023 Adobe (http://www.adobe.com/), with
//        Reserved Font Name ‘Source’. All Rights Reserved. Source is a
//        trademark of Adobe in the United States and/or other countries. 
//        
//        This Font Software is licensed under the SIL Open Font License, Version 1.1.

./_build/html/_static/fonts/Source_Serif_4/OFL.txt:  SIL Open Font License 1.1
./_build/html/_static/fonts/Source_Serif_4/README.txt
./_build/html/_static/fonts/Source_Serif_4/SourceSerif4-Italic-VariableFont_opsz,wght.ttf
./_build/html/_static/fonts/Source_Serif_4/SourceSerif4-VariableFont_opsz,wght.ttf


================================================
FILE: _static/custom.css
================================================

/* basic.css | file:///home/nate/code/opentype-shaping-documents/_build/html/_static/basic.css */

div.body {
  /* min-width: 360px; */
  /* max-width: 800px; */
  min-width: 460px;
  max-width: 800px;
}

/* alabaster.css | file:///home/nate/code/opentype-shaping-documents/_build/html/_static/alabaster.css */

/*div.document {
  /* width: 940px; */
/*  width: 1040px;
} */

/* div.sphinxsidebar {
  /* width: 220px; */
/*  width: 240px;
} */

div.bodywrapper {
  /* margin: 0 0 0 220px; */
  margin: 0 0 0 300px;
}


/* Hanging section numbers, for slightly easier in-page navigation. */

/* Indent the body text by a fixed amount on the left, 
   then move section-numbers leftward by the same amount. */
div.body>section {
    margin-left: 4rem;
}


span.section-number {
    display: inline-block;
    width: 3.5rem;
    text-align: right;
    margin-left: -4.5rem;
    margin-right: .5rem;
}
/*
span.section-number::after {
    content: " ";
    white-space: pre;
}*/

/* Locally served fonts, to support more smartfont features */
@font-face {
    font-family: 'Source Serif 4';
    src: url('./fonts/Source_Serif_4/SourceSerif4-Italic-VariableFont_opsz,wght.ttf') format('truetype-variations');
    font-style: italic;
    font-weight: 1 999;
}

@font-face {
    font-family: 'Source Serif 4';
    src: url('./fonts/Source_Serif_4/SourceSerif4-VariableFont_opsz,wght.ttf') format('truetype-variations');
    font-style: normal;
    font-weight: 1 999;
}

@font-face {
    font-family: 'Source Sans 3';
    src: url('./fonts/Source_Sans_3/SourceSans3-Italic-VariableFont_wght.ttf') format('truetype-variations');
    size-adjust: 94%; /* 47% of x-height  originally.... */
    font-style: italic;
    font-weight: 1 999;
}

@font-face {
    font-family: 'Source Sans 3';
    src: url('./fonts/Source_Sans_3/SourceSans3-VariableFont_wght.ttf') format('truetype-variations');
    size-adjust: 94%; /* 47% of x-height  originally.... */
    font-style: normal;
    font-weight: 1 999;
}

@font-face {
    font-family: 'Source Code Pro';
    src: url('./fonts/Source_Code_Pro/SourceCodePro-Italic-VariableFont_wght.ttf') format('truetype-variations');
    size-adjust: 92%; /* 46% of x-height  originally.... */
    font-style: italic;
    font-weight: 1 999;
}

@font-face {
    font-family: 'Source Code Pro';
    src: url('./fonts/Source_Code_Pro/SourceCodePro-VariableFont_wght.ttf') format('truetype-variations');
    size-adjust: 92%; /* 46% of x-height  originally.... */
    font-style: normal;
    font-weight: 1 999;
}

/* Use oldstyle numerals in body text */
body {
    font-feature-settings: "onum";
}


/* Tables                 */
/* alternating background colors like GitHub inline styling uses */
tr:nth-child(even) {background: #F0F0F4}
tr:nth-child(odd) {background: #FFF}
/* Less obtrusive headers */
th {
    font-family: 'Source Sans 3';
    font-size-adjust: 0.48;
    font-weight: 600;
    background: #F0F0F4;
    font-feature-settings: "tnum", "lnum";
}

tr {
    font-feature-settings: "tnum", "lnum";
}

/* Try to target the captions in the toctree sidebar */
p.caption[role="heading"] span.caption-text {
    font-size: 120%
}

/* Try to target the list beneath the captions in the toctree sidebar */ 
p.caption[role="heading"] + ul {
  text-indent: 0.6em;
}

/* Make <abbr> acronyms small-caps, but not interactive tooltips */
/* The font-size adjustment here may be temproary; it depends on */
/* the smallcap height of the font eventually specified.         */
abbr {
  font-variant: all-small-caps;
  font-size: larger;
  font-weight: 375;
  cursor: default;
  border-bottom: none;
}

/* Style 'samp' elements used to indicate explicit sequences and */
/* input/output character references that must be exact.         */
samp {
    font-family: 'Source Sans 3';
    font-size-adjust: 0.48;
    font-weight: 500; /* slightly heavier only */
    color: #558;
}

/* De-emphasize Sphinx's section numbering, so as to be less */
/* distracting.                                              */
/*                                                           */
/* TODO: shift numbers into margin.                          */
.section-number {
    font-size: 70%;
    color: #888;
}

/* Style rules for using Source family fonts */
div.body h1,
div.body h2,
div.body h3,
div.body h4,
div.body h5,
div.body h6 {
    font-weight: 500;
}

/* Make the site-title in the sidebar bolder and different since */
/* there is no project logo.                                     */
h1.logo {
    font-family: 'Source Sans 3';
    font-weight: 800;
    font-size: 32px;
    border-bottom: none;
    text-decoration: none;
}

/* Fix the Alabaster theme's default font-scaling since we are */
/* using a complete superfamily with consistent sizing.        */
pre, tt, code {
    font-size: 1.0em; /* Undo Alabaster setting, best balance for the 3 element types considering Alabaster's other style rules */
    font-weight: 450;
    font-size-adjust: 0.46;
    font-feature-settings: "tnum", "lnum";
}

/* Define table captions */
caption {
    caption-side: bottom;
    padding-top: 6px;
    padding-bottom: 6px;
    color: #656565;
}

/* Set figcaptions to look like table captions */
figcaption {
    padding-top: 6px;
    /* padding-bottom: 6px;  altering this to account for toggle-button placement*/
    padding-bottom: 0px;
    margin-bottom: -12px;
    color: #656565;
}

figcaption p {
    margin-top: 8.5px;
    margin-bottom: 8.5px;
}

/* Slightly lighten bgcolor on pre/tt/code in the captions, since the fg text color is lighter */

caption pre,
caption span.pre,
caption span.tt,
caption span.code,
figcaption pre,
figcaption span.pre,
figcaption span.tt,
figcaption span.code {
    background-color: #eff4f7; /* testing; needs to be lighter because text is grey */
}

/* Set maximum size of SVG illustrations   */
/*                                         */
/* Width is currently limited to 100% of   */
/* the parent element; height is currently */
/* specified relative to the text size,    */
/* for presumptive convenience. 18em is a  */
/* trial-and-error value that seems to be  */
/* reasonable, but is by no means science. */
svg.shaping-demo {
    max-width: 100%;
    max-height: 18em;
}


/* Toggleable SVG clusters */
/*                    
/* Greyscale               */
/* dc: dotted-circle       */
.shaping-demo.greyscale-svg .dc {
    fill: #999999;
    stroke-width: 0%;
}
/* arrow: right-arrow      */
.shaping-demo.greyscale-svg .arrow {
    fill: #666666;
    stroke-width: 0%;
}
/* z: ZWJ and ZWNJ         */
.shaping-demo.greyscale-svg .z {
    fill: #999999;
    stroke: #999999;
    stroke-width: 1px;
}
/* c0: cluster 0           */
.shaping-demo.greyscale-svg .c0 {
    fill: #000000;
    stroke-width: 0%;
}
/* c0: cluster 1           */
.shaping-demo.greyscale-svg .c1 {
    fill: #000000;
    stroke-width: 0%;
}
/* c0: cluster 2           */
.shaping-demo.greyscale-svg .c2 {
    fill: #000000;
    stroke-width: 0%;
}
/* c0: cluster 3           */
.shaping-demo.greyscale-svg .c3 {
    fill: #000000;
    stroke-width: 0%;
}
/* c0: cluster 4           */
.shaping-demo.greyscale-svg .c4 {
    fill: #000000;
    stroke-width: 0%;
}
/* c0: cluster 5           */
.shaping-demo.greyscale-svg .c5 {
    fill: #000000;
    stroke-width: 0%;
}
/* c0: cluster 6           */
.shaping-demo.greyscale-svg .c6 {
    fill: #000000;
    stroke-width: 0%;
}
/* c0: cluster 7           */
.shaping-demo.greyscale-svg .c7 {
    fill: #000000;
    stroke-width: 0%;
}
/* c0: cluster 8           */
.shaping-demo.greyscale-svg .c8 {
    fill: #000000;
    stroke-width: 0%;
}
/* c0: cluster 9           */
.shaping-demo.greyscale-svg .c9 {
    fill: #000000;
    stroke-width: 0%;
}

/* Colorized               */
/* dc: dotted-circle       */
.shaping-demo.color-svg .dc {
    fill: #999999;
    stroke-width: 0%;
}
/* arrow: right-arrow      */
.shaping-demo.color-svg .arrow {
    fill: #666666;
    stroke-width: 0%;
}
/* z: ZWJ and ZWNJ         */
.shaping-demo.color-svg .z {
    fill: #999999;
    stroke: #999999;
    stroke-width: 1px;
}
/* c0: cluster 0           */
.shaping-demo.color-svg .c0 {
    fill: #3366cc;
    stroke-width: 0%;
}
/* c0: cluster 1           */
.shaping-demo.color-svg .c1 {
    fill: #dc3912;
    stroke-width: 0%;
}
/* c0: cluster 2           */
.shaping-demo.color-svg .c2 {
    fill: #ff9900;
    stroke-width: 0%;
}
/* c0: cluster 3           */
.shaping-demo.color-svg .c3 {
    fill: #109618;
    stroke-width: 0%;
}
/* c0: cluster 4           */
.shaping-demo.color-svg .c4 {
    fill: #990099;
    stroke-width: 0%;
}
/* c0: cluster 5           */
.shaping-demo.color-svg .c5 {
    fill: #0099c6;
    stroke-width: 0%;
}
/* c0: cluster 6           */
.shaping-demo.color-svg .c6 {
    fill: #dd4477;
    stroke-width: 0%;
}
/* c0: cluster 7           */
.shaping-demo.color-svg .c7 {
    fill: #66aa00;
    stroke-width: 0%;
}
/* c0: cluster 8           */
.shaping-demo.color-svg .c8 {
    fill: #b82e2e;
    stroke-width: 0%;
}
/* c0: cluster 9           */
.shaping-demo.color-svg .c9 {
    fill: #316395;
    stroke-width: 0%;
}

button.svg-color-toggle-button {
    display: block;
    margin-left: auto;
    margin-right: auto;
    /* margin-top: -8.5px; This makes alignment overly complicated.... */
    padding: 4px 16px 5px 16px;
    font-size: small;
    color: #999;
    background-color: #fff0;
    border: 1px solid;
    border-color: #bbb;
    border-radius: 3px;
}

blockquote {
    margin-top: 17px;
}

/* Static navigation sidebar */
/* Turn off bullet point on heading items */
li.static-nav-heading {
    list-style: "";
}

/* L1 */
li.toctree-l1.static-nav {
    font-size: 120%;
}
/* L2 headings */
li.toctree-l2.static-nav {
    font-size: 100%;
    font-style: italic;
}
/* L2 page links */
li.toctree-l2.static-nav a {
    font-size: 100%;
    font-style: normal;
}


================================================
FILE: _static/fonts/Source_Code_Pro/OFL.txt
================================================
Copyright 2010, 2012 Adobe Systems Incorporated (http://www.adobe.com/), with Reserved Font Name 'Source'. All Rights Reserved. Source is a trademark of Adobe Systems Incorporated in the United States and/or other countries.

This Font Software is licensed under the SIL Open Font License, Version 1.1.
This license is copied below, and is also available with a FAQ at:
http://scripts.sil.org/OFL


-----------------------------------------------------------
SIL OPEN FONT LICENSE Version 1.1 - 26 February 2007
-----------------------------------------------------------

PREAMBLE
The goals of the Open Font License (OFL) are to stimulate worldwide
development of collaborative font projects, to support the font creation
efforts of academic and linguistic communities, and to provide a free and
open framework in which fonts may be shared and improved in partnership
with others.

The OFL allows the licensed fonts to be used, studied, modified and
redistributed freely as long as they are not sold by themselves. The
fonts, including any derivative works, can be bundled, embedded, 
redistributed and/or sold with any software provided that any reserved
names are not used by derivative works. The fonts and derivatives,
however, cannot be released under any other type of license. The
requirement for fonts to remain under this license does not apply
to any document created using the fonts or their derivatives.

DEFINITIONS
"Font Software" refers to the set of files released by the Copyright
Holder(s) under this license and clearly marked as such. This may
include source files, build scripts and documentation.

"Reserved Font Name" refers to any names specified as such after the
copyright statement(s).

"Original Version" refers to the collection of Font Software components as
distributed by the Copyright Holder(s).

"Modified Version" refers to any derivative made by adding to, deleting,
or substituting -- in part or in whole -- any of the components of the
Original Version, by changing formats or by porting the Font Software to a
new environment.

"Author" refers to any designer, engineer, programmer, technical
writer or other person who contributed to the Font Software.

PERMISSION & CONDITIONS
Permission is hereby granted, free of charge, to any person obtaining
a copy of the Font Software, to use, study, copy, merge, embed, modify,
redistribute, and sell modified and unmodified copies of the Font
Software, subject to the following conditions:

1) Neither the Font Software nor any of its individual components,
in Original or Modified Versions, may be sold by itself.

2) Original or Modified Versions of the Font Software may be bundled,
redistributed and/or sold with any software, provided that each copy
contains the above copyright notice and this license. These can be
included either as stand-alone text files, human-readable headers or
in the appropriate machine-readable metadata fields within text or
binary files as long as those fields can be easily viewed by the user.

3) No Modified Version of the Font Software may use the Reserved Font
Name(s) unless explicit written permission is granted by the corresponding
Copyright Holder. This restriction only applies to the primary font name as
presented to the users.

4) The name(s) of the Copyright Holder(s) or the Author(s) of the Font
Software shall not be used to promote, endorse or advertise any
Modified Version, except to acknowledge the contribution(s) of the
Copyright Holder(s) and the Author(s) or with their explicit written
permission.

5) The Font Software, modified or unmodified, in part or in whole,
must be distributed entirely under this license, and must not be
distributed under any other license. The requirement for fonts to
remain under this license does not apply to any document created
using the Font Software.

TERMINATION
This license becomes null and void if any of the above conditions are
not met.

DISCLAIMER
THE FONT SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO ANY WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT
OF COPYRIGHT, PATENT, TRADEMARK, OR OTHER RIGHT. IN NO EVENT SHALL THE
COPYRIGHT HOLDER BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
INCLUDING ANY GENERAL, SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL
DAMAGES, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF THE USE OR INABILITY TO USE THE FONT SOFTWARE OR FROM
OTHER DEALINGS IN THE FONT SOFTWARE.


================================================
FILE: _static/fonts/Source_Code_Pro/README.txt
================================================
Source Code Pro Variable Font
=============================

This download contains Source Code Pro as both variable fonts and static fonts.

Source Code Pro is a variable font with this axis:
  wght

This means all the styles are contained in these files:
  SourceCodePro-VariableFont_wght.ttf
  SourceCodePro-Italic-VariableFont_wght.ttf

If your app fully supports variable fonts, you can now pick intermediate styles
that aren’t available as static fonts. Not all apps support variable fonts, and
in those cases you can use the static font files for Source Code Pro:
  static/SourceCodePro-ExtraLight.ttf
  static/SourceCodePro-Light.ttf
  static/SourceCodePro-Regular.ttf
  static/SourceCodePro-Medium.ttf
  static/SourceCodePro-SemiBold.ttf
  static/SourceCodePro-Bold.ttf
  static/SourceCodePro-ExtraBold.ttf
  static/SourceCodePro-Black.ttf
  static/SourceCodePro-ExtraLightItalic.ttf
  static/SourceCodePro-LightItalic.ttf
  static/SourceCodePro-Italic.ttf
  static/SourceCodePro-MediumItalic.ttf
  static/SourceCodePro-SemiBoldItalic.ttf
  static/SourceCodePro-BoldItalic.ttf
  static/SourceCodePro-ExtraBoldItalic.ttf
  static/SourceCodePro-BlackItalic.ttf

Get started
-----------

1. Install the font files you want to use

2. Use your app's font picker to view the font family and all the
available styles

Learn more about variable fonts
-------------------------------

  https://developers.google.com/web/fundamentals/design-and-ux/typography/variable-fonts
  https://variablefonts.typenetwork.com
  https://medium.com/variable-fonts

In desktop apps

  https://theblog.adobe.com/can-variable-fonts-illustrator-cc
  https://helpx.adobe.com/nz/photoshop/using/fonts.html#variable_fonts

Online

  https://developers.google.com/fonts/docs/getting_started
  https://developer.mozilla.org/en-US/docs/Web/CSS/CSS_Fonts/Variable_Fonts_Guide
  https://developer.microsoft.com/en-us/microsoft-edge/testdrive/demos/variable-fonts

Installing fonts

  MacOS: https://support.apple.com/en-us/HT201749
  Linux: https://www.google.com/search?q=how+to+install+a+font+on+gnu%2Blinux
  Windows: https://support.microsoft.com/en-us/help/314960/how-to-install-or-remove-a-font-in-windows

Android Apps

  https://developers.google.com/fonts/docs/android
  https://developer.android.com/guide/topics/ui/look-and-feel/downloadable-fonts

License
-------
Please read the full license text (OFL.txt) to understand the permissions,
restrictions and requirements for usage, redistribution, and modification.

You can use them in your products & projects – print or digital,
commercial or otherwise.

This isn't legal advice, please consider consulting a lawyer and see the full
license for all details.


================================================
FILE: _static/fonts/Source_Sans_3/OFL.txt
================================================
Copyright 2010-2020 Adobe (http://www.adobe.com/), with Reserved Font Name 'Source'. All Rights Reserved. Source is a trademark of Adobe in the United States and/or other countries.

This Font Software is licensed under the SIL Open Font License, Version 1.1.
This license is copied below, and is also available with a FAQ at:
http://scripts.sil.org/OFL


-----------------------------------------------------------
SIL OPEN FONT LICENSE Version 1.1 - 26 February 2007
-----------------------------------------------------------

PREAMBLE
The goals of the Open Font License (OFL) are to stimulate worldwide
development of collaborative font projects, to support the font creation
efforts of academic and linguistic communities, and to provide a free and
open framework in which fonts may be shared and improved in partnership
with others.

The OFL allows the licensed fonts to be used, studied, modified and
redistributed freely as long as they are not sold by themselves. The
fonts, including any derivative works, can be bundled, embedded, 
redistributed and/or sold with any software provided that any reserved
names are not used by derivative works. The fonts and derivatives,
however, cannot be released under any other type of license. The
requirement for fonts to remain under this license does not apply
to any document created using the fonts or their derivatives.

DEFINITIONS
"Font Software" refers to the set of files released by the Copyright
Holder(s) under this license and clearly marked as such. This may
include source files, build scripts and documentation.

"Reserved Font Name" refers to any names specified as such after the
copyright statement(s).

"Original Version" refers to the collection of Font Software components as
distributed by the Copyright Holder(s).

"Modified Version" refers to any derivative made by adding to, deleting,
or substituting -- in part or in whole -- any of the components of the
Original Version, by changing formats or by porting the Font Software to a
new environment.

"Author" refers to any designer, engineer, programmer, technical
writer or other person who contributed to the Font Software.

PERMISSION & CONDITIONS
Permission is hereby granted, free of charge, to any person obtaining
a copy of the Font Software, to use, study, copy, merge, embed, modify,
redistribute, and sell modified and unmodified copies of the Font
Software, subject to the following conditions:

1) Neither the Font Software nor any of its individual components,
in Original or Modified Versions, may be sold by itself.

2) Original or Modified Versions of the Font Software may be bundled,
redistributed and/or sold with any software, provided that each copy
contains the above copyright notice and this license. These can be
included either as stand-alone text files, human-readable headers or
in the appropriate machine-readable metadata fields within text or
binary files as long as those fields can be easily viewed by the user.

3) No Modified Version of the Font Software may use the Reserved Font
Name(s) unless explicit written permission is granted by the corresponding
Copyright Holder. This restriction only applies to the primary font name as
presented to the users.

4) The name(s) of the Copyright Holder(s) or the Author(s) of the Font
Software shall not be used to promote, endorse or advertise any
Modified Version, except to acknowledge the contribution(s) of the
Copyright Holder(s) and the Author(s) or with their explicit written
permission.

5) The Font Software, modified or unmodified, in part or in whole,
must be distributed entirely under this license, and must not be
distributed under any other license. The requirement for fonts to
remain under this license does not apply to any document created
using the Font Software.

TERMINATION
This license becomes null and void if any of the above conditions are
not met.

DISCLAIMER
THE FONT SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO ANY WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT
OF COPYRIGHT, PATENT, TRADEMARK, OR OTHER RIGHT. IN NO EVENT SHALL THE
COPYRIGHT HOLDER BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
INCLUDING ANY GENERAL, SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL
DAMAGES, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF THE USE OR INABILITY TO USE THE FONT SOFTWARE OR FROM
OTHER DEALINGS IN THE FONT SOFTWARE.


================================================
FILE: _static/fonts/Source_Sans_3/README.txt
================================================
Source Sans 3 Variable Font
===========================

This download contains Source Sans 3 as both variable fonts and static fonts.

Source Sans 3 is a variable font with this axis:
  wght

This means all the styles are contained in these files:
  SourceSans3-VariableFont_wght.ttf
  SourceSans3-Italic-VariableFont_wght.ttf

If your app fully supports variable fonts, you can now pick intermediate styles
that aren’t available as static fonts. Not all apps support variable fonts, and
in those cases you can use the static font files for Source Sans 3:
  static/SourceSans3-ExtraLight.ttf
  static/SourceSans3-Light.ttf
  static/SourceSans3-Regular.ttf
  static/SourceSans3-Medium.ttf
  static/SourceSans3-SemiBold.ttf
  static/SourceSans3-Bold.ttf
  static/SourceSans3-ExtraBold.ttf
  static/SourceSans3-Black.ttf
  static/SourceSans3-ExtraLightItalic.ttf
  static/SourceSans3-LightItalic.ttf
  static/SourceSans3-Italic.ttf
  static/SourceSans3-MediumItalic.ttf
  static/SourceSans3-SemiBoldItalic.ttf
  static/SourceSans3-BoldItalic.ttf
  static/SourceSans3-ExtraBoldItalic.ttf
  static/SourceSans3-BlackItalic.ttf

Get started
-----------

1. Install the font files you want to use

2. Use your app's font picker to view the font family and all the
available styles

Learn more about variable fonts
-------------------------------

  https://developers.google.com/web/fundamentals/design-and-ux/typography/variable-fonts
  https://variablefonts.typenetwork.com
  https://medium.com/variable-fonts

In desktop apps

  https://theblog.adobe.com/can-variable-fonts-illustrator-cc
  https://helpx.adobe.com/nz/photoshop/using/fonts.html#variable_fonts

Online

  https://developers.google.com/fonts/docs/getting_started
  https://developer.mozilla.org/en-US/docs/Web/CSS/CSS_Fonts/Variable_Fonts_Guide
  https://developer.microsoft.com/en-us/microsoft-edge/testdrive/demos/variable-fonts

Installing fonts

  MacOS: https://support.apple.com/en-us/HT201749
  Linux: https://www.google.com/search?q=how+to+install+a+font+on+gnu%2Blinux
  Windows: https://support.microsoft.com/en-us/help/314960/how-to-install-or-remove-a-font-in-windows

Android Apps

  https://developers.google.com/fonts/docs/android
  https://developer.android.com/guide/topics/ui/look-and-feel/downloadable-fonts

License
-------
Please read the full license text (OFL.txt) to understand the permissions,
restrictions and requirements for usage, redistribution, and modification.

You can use them in your products & projects – print or digital,
commercial or otherwise.

This isn't legal advice, please consider consulting a lawyer and see the full
license for all details.


================================================
FILE: _static/fonts/Source_Serif_4/OFL.txt
================================================
This Font Software is licensed under the SIL Open Font License, Version 1.1.
This license is copied below, and is also available with a FAQ at:
http://scripts.sil.org/OFL


-----------------------------------------------------------
SIL OPEN FONT LICENSE Version 1.1 - 26 February 2007
-----------------------------------------------------------

PREAMBLE
The goals of the Open Font License (OFL) are to stimulate worldwide
development of collaborative font projects, to support the font creation
efforts of academic and linguistic communities, and to provide a free and
open framework in which fonts may be shared and improved in partnership
with others.

The OFL allows the licensed fonts to be used, studied, modified and
redistributed freely as long as they are not sold by themselves. The
fonts, including any derivative works, can be bundled, embedded, 
redistributed and/or sold with any software provided that any reserved
names are not used by derivative works. The fonts and derivatives,
however, cannot be released under any other type of license. The
requirement for fonts to remain under this license does not apply
to any document created using the fonts or their derivatives.

DEFINITIONS
"Font Software" refers to the set of files released by the Copyright
Holder(s) under this license and clearly marked as such. This may
include source files, build scripts and documentation.

"Reserved Font Name" refers to any names specified as such after the
copyright statement(s).

"Original Version" refers to the collection of Font Software components as
distributed by the Copyright Holder(s).

"Modified Version" refers to any derivative made by adding to, deleting,
or substituting -- in part or in whole -- any of the components of the
Original Version, by changing formats or by porting the Font Software to a
new environment.

"Author" refers to any designer, engineer, programmer, technical
writer or other person who contributed to the Font Software.

PERMISSION & CONDITIONS
Permission is hereby granted, free of charge, to any person obtaining
a copy of the Font Software, to use, study, copy, merge, embed, modify,
redistribute, and sell modified and unmodified copies of the Font
Software, subject to the following conditions:

1) Neither the Font Software nor any of its individual components,
in Original or Modified Versions, may be sold by itself.

2) Original or Modified Versions of the Font Software may be bundled,
redistributed and/or sold with any software, provided that each copy
contains the above copyright notice and this license. These can be
included either as stand-alone text files, human-readable headers or
in the appropriate machine-readable metadata fields within text or
binary files as long as those fields can be easily viewed by the user.

3) No Modified Version of the Font Software may use the Reserved Font
Name(s) unless explicit written permission is granted by the corresponding
Copyright Holder. This restriction only applies to the primary font name as
presented to the users.

4) The name(s) of the Copyright Holder(s) or the Author(s) of the Font
Software shall not be used to promote, endorse or advertise any
Modified Version, except to acknowledge the contribution(s) of the
Copyright Holder(s) and the Author(s) or with their explicit written
permission.

5) The Font Software, modified or unmodified, in part or in whole,
must be distributed entirely under this license, and must not be
distributed under any other license. The requirement for fonts to
remain under this license does not apply to any document created
using the Font Software.

TERMINATION
This license becomes null and void if any of the above conditions are
not met.

DISCLAIMER
THE FONT SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO ANY WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT
OF COPYRIGHT, PATENT, TRADEMARK, OR OTHER RIGHT. IN NO EVENT SHALL THE
COPYRIGHT HOLDER BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
INCLUDING ANY GENERAL, SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL
DAMAGES, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF THE USE OR INABILITY TO USE THE FONT SOFTWARE OR FROM
OTHER DEALINGS IN THE FONT SOFTWARE.


================================================
FILE: _static/fonts/Source_Serif_4/README.txt
================================================
Source Serif 4 Variable Font
============================

This download contains Source Serif 4 as both variable fonts and static fonts.

Source Serif 4 is a variable font with these axes:
  opsz
  wght

This means all the styles are contained in these files:
  SourceSerif4-VariableFont_opsz,wght.ttf
  SourceSerif4-Italic-VariableFont_opsz,wght.ttf

If your app fully supports variable fonts, you can now pick intermediate styles
that aren’t available as static fonts. Not all apps support variable fonts, and
in those cases you can use the static font files for Source Serif 4:
  static/SourceSerif4/SourceSerif4-ExtraLight.ttf
  static/SourceSerif4/SourceSerif4-Light.ttf
  static/SourceSerif4/SourceSerif4-Regular.ttf
  static/SourceSerif4/SourceSerif4-Medium.ttf
  static/SourceSerif4/SourceSerif4-SemiBold.ttf
  static/SourceSerif4/SourceSerif4-Bold.ttf
  static/SourceSerif4/SourceSerif4-ExtraBold.ttf
  static/SourceSerif4/SourceSerif4-Black.ttf
  static/SourceSerif4_18pt/SourceSerif4_18pt-ExtraLight.ttf
  static/SourceSerif4_18pt/SourceSerif4_18pt-Light.ttf
  static/SourceSerif4_18pt/SourceSerif4_18pt-Regular.ttf
  static/SourceSerif4_18pt/SourceSerif4_18pt-Medium.ttf
  static/SourceSerif4_18pt/SourceSerif4_18pt-SemiBold.ttf
  static/SourceSerif4_18pt/SourceSerif4_18pt-Bold.ttf
  static/SourceSerif4_18pt/SourceSerif4_18pt-ExtraBold.ttf
  static/SourceSerif4_18pt/SourceSerif4_18pt-Black.ttf
  static/SourceSerif4_36pt/SourceSerif4_36pt-ExtraLight.ttf
  static/SourceSerif4_36pt/SourceSerif4_36pt-Light.ttf
  static/SourceSerif4_36pt/SourceSerif4_36pt-Regular.ttf
  static/SourceSerif4_36pt/SourceSerif4_36pt-Medium.ttf
  static/SourceSerif4_36pt/SourceSerif4_36pt-SemiBold.ttf
  static/SourceSerif4_36pt/SourceSerif4_36pt-Bold.ttf
  static/SourceSerif4_36pt/SourceSerif4_36pt-ExtraBold.ttf
  static/SourceSerif4_36pt/SourceSerif4_36pt-Black.ttf
  static/SourceSerif4_48pt/SourceSerif4_48pt-ExtraLight.ttf
  static/SourceSerif4_48pt/SourceSerif4_48pt-Light.ttf
  static/SourceSerif4_48pt/SourceSerif4_48pt-Regular.ttf
  static/SourceSerif4_48pt/SourceSerif4_48pt-Medium.ttf
  static/SourceSerif4_48pt/SourceSerif4_48pt-SemiBold.ttf
  static/SourceSerif4_48pt/SourceSerif4_48pt-Bold.ttf
  static/SourceSerif4_48pt/SourceSerif4_48pt-ExtraBold.ttf
  static/SourceSerif4_48pt/SourceSerif4_48pt-Black.ttf
  static/SourceSerif4/SourceSerif4-ExtraLightItalic.ttf
  static/SourceSerif4/SourceSerif4-LightItalic.ttf
  static/SourceSerif4/SourceSerif4-Italic.ttf
  static/SourceSerif4/SourceSerif4-MediumItalic.ttf
  static/SourceSerif4/SourceSerif4-SemiBoldItalic.ttf
  static/SourceSerif4/SourceSerif4-BoldItalic.ttf
  static/SourceSerif4/SourceSerif4-ExtraBoldItalic.ttf
  static/SourceSerif4/SourceSerif4-BlackItalic.ttf
  static/SourceSerif4_18pt/SourceSerif4_18pt-ExtraLightItalic.ttf
  static/SourceSerif4_18pt/SourceSerif4_18pt-LightItalic.ttf
  static/SourceSerif4_18pt/SourceSerif4_18pt-Italic.ttf
  static/SourceSerif4_18pt/SourceSerif4_18pt-MediumItalic.ttf
  static/SourceSerif4_18pt/SourceSerif4_18pt-SemiBoldItalic.ttf
  static/SourceSerif4_18pt/SourceSerif4_18pt-BoldItalic.ttf
  static/SourceSerif4_18pt/SourceSerif4_18pt-ExtraBoldItalic.ttf
  static/SourceSerif4_18pt/SourceSerif4_18pt-BlackItalic.ttf
  static/SourceSerif4_36pt/SourceSerif4_36pt-ExtraLightItalic.ttf
  static/SourceSerif4_36pt/SourceSerif4_36pt-LightItalic.ttf
  static/SourceSerif4_36pt/SourceSerif4_36pt-Italic.ttf
  static/SourceSerif4_36pt/SourceSerif4_36pt-MediumItalic.ttf
  static/SourceSerif4_36pt/SourceSerif4_36pt-SemiBoldItalic.ttf
  static/SourceSerif4_36pt/SourceSerif4_36pt-BoldItalic.ttf
  static/SourceSerif4_36pt/SourceSerif4_36pt-ExtraBoldItalic.ttf
  static/SourceSerif4_36pt/SourceSerif4_36pt-BlackItalic.ttf
  static/SourceSerif4_48pt/SourceSerif4_48pt-ExtraLightItalic.ttf
  static/SourceSerif4_48pt/SourceSerif4_48pt-LightItalic.ttf
  static/SourceSerif4_48pt/SourceSerif4_48pt-Italic.ttf
  static/SourceSerif4_48pt/SourceSerif4_48pt-MediumItalic.ttf
  static/SourceSerif4_48pt/SourceSerif4_48pt-SemiBoldItalic.ttf
  static/SourceSerif4_48pt/SourceSerif4_48pt-BoldItalic.ttf
  static/SourceSerif4_48pt/SourceSerif4_48pt-ExtraBoldItalic.ttf
  static/SourceSerif4_48pt/SourceSerif4_48pt-BlackItalic.ttf

Get started
-----------

1. Install the font files you want to use

2. Use your app's font picker to view the font family and all the
available styles

Learn more about variable fonts
-------------------------------

  https://developers.google.com/web/fundamentals/design-and-ux/typography/variable-fonts
  https://variablefonts.typenetwork.com
  https://medium.com/variable-fonts

In desktop apps

  https://theblog.adobe.com/can-variable-fonts-illustrator-cc
  https://helpx.adobe.com/nz/photoshop/using/fonts.html#variable_fonts

Online

  https://developers.google.com/fonts/docs/getting_started
  https://developer.mozilla.org/en-US/docs/Web/CSS/CSS_Fonts/Variable_Fonts_Guide
  https://developer.microsoft.com/en-us/microsoft-edge/testdrive/demos/variable-fonts

Installing fonts

  MacOS: https://support.apple.com/en-us/HT201749
  Linux: https://www.google.com/search?q=how+to+install+a+font+on+gnu%2Blinux
  Windows: https://support.microsoft.com/en-us/help/314960/how-to-install-or-remove-a-font-in-windows

Android Apps

  https://developers.google.com/fonts/docs/android
  https://developer.android.com/guide/topics/ui/look-and-feel/downloadable-fonts

License
-------
Please read the full license text (OFL.txt) to understand the permissions,
restrictions and requirements for usage, redistribution, and modification.

You can use them in your products & projects – print or digital,
commercial or otherwise.

This isn't legal advice, please consider consulting a lawyer and see the full
license for all details.


================================================
FILE: _static/fontsizes.html
================================================
<html>
  <head>
    <title>Testing relative font sizes</title>

<style>
  @font-face {
    font-family: 'Source Serif 4';
    src: url('./fonts/Source_Serif_4/SourceSerif4-Italic-VariableFont_opsz,wght.ttf') format('truetype-variations');
    font-style: italic;
    font-weight: 1 999;
}

@font-face {
    font-family: 'Source Serif 4';
    src: url('./fonts/Source_Serif_4/SourceSerif4-VariableFont_opsz,wght.ttf') format('truetype-variations');
    font-style: normal;
    font-weight: 1 999;
}

@font-face {
    font-family: 'Source Sans 3';
    src: url('./fonts/Source_Sans_3/SourceSans3-Italic-VariableFont_wght.ttf') format('truetype-variations');
    font-style: italic;
    font-weight: 1 999;
}

@font-face {
    font-family: 'Source Sans 3';
    src: url('./fonts/Source_Sans_3/SourceSans3-VariableFont_wght.ttf') format('truetype-variations');
    font-style: normal;
    font-weight: 1 999;
}

@font-face {
    font-family: 'Source Code Pro';
    src: url('./fonts/Source_Code_Pro/SourceCodePro-Italic-VariableFont_wght.ttf') format('truetype-variations');
    font-style: italic;
    font-weight: 1 999;
}

@font-face {
    font-family: 'Source Code Pro';
    src: url('./fonts/Source_Code_Pro/SourceCodePro-VariableFont_wght.ttf') format('truetype-variations');
    font-style: normal;
    font-weight: 1 999;
}
</style>

<style>
  span.adj-serif {
      font-size-adjust: none; 
      font-weight: 400; 
  }
  span.adj-sans {
      font-size-adjust: 0.47; 
      font-weight: 400; 
  }
  span.adj-mono {
      font-size-adjust: 0.46; 
      font-weight: 400; 
  }
  span.adj-serif-i {
      font-size-adjust: none; 
      font-weight: 400; 
  }
  span.adj-sans-i {
      font-size-adjust: 0.47; 
      font-weight: 400;
  }
  span.adj-mono-i {
      font-size-adjust: 0.46; 
      font-weight: 400; 
  }
</style>


  </head>
  <body>

    <h1>Samples for comparing font-size-adjust and variable-axis
    tweaks on the Source Superfamily</h1>
    
    <h2>Unadjusted</h2>
    <h3>72 pt</h3>
    <p>
      Source Serif 4 | Source Sans 3 | Source Serif 4 | Source Code Pro | Source Serif 4 | ...italics
      <p>
	<span style="font-family:'Source Serif 4'; font-size:72pt">Hn</span><span style="font-family:'Source Sans 3'; font-size:72pt">Hn</span><span style="font-family:'Source Serif 4'; font-size:72pt">Hn</span><span style="font-family:'Source Code Pro'; font-size:72pt">Hn</span><span style="font-family:'Source Serif 4'; font-size:72pt">Hn</span><span style="font-family:'Source Serif 4'; font-size:72pt; font-style: italic">Hn</span><span style="font-family:'Source Sans 3'; font-size:72pt; font-style: italic">Hn</span><span style="font-family:'Source Serif 4'; font-size:72pt; font-style: italic">Hn</span><span style="font-family:'Source Code Pro'; font-size:72pt; font-style: italic">Hn</span>
      </p>

    <h3>28 pt</h3>
    <p>
      Source Serif 4 | Source Sans 3 | Source Serif 4 | Source Code Pro | Source Serif 4 | ...italics
      <p>
	<span style="font-family:'Source Serif 4'; font-size:28pt">Hn</span><span style="font-family:'Source Sans 3'; font-size:28pt">Hn</span><span style="font-family:'Source Serif 4'; font-size:28pt">Hn</span><span style="font-family:'Source Code Pro'; font-size:28pt">Hn</span><span style="font-family:'Source Serif 4'; font-size:28pt">Hn</span><span style="font-family:'Source Serif 4'; font-size:28pt; font-style: italic">Hn</span><span style="font-family:'Source Sans 3'; font-size:28pt; font-style: italic">Hn</span><span style="font-family:'Source Serif 4'; font-size:28pt; font-style: italic">Hn</span><span style="font-family:'Source Code Pro'; font-size:28pt; font-style: italic">Hn</span>
      </p>

    <h3>16 pt</h3>
    <p>
      Source Serif 4 | Source Sans 3 | Source Serif 4 | Source Code Pro | Source Serif 4 | ...italics
      <p>
	<span style="font-family:'Source Serif 4'; font-size:16pt">Hn</span><span style="font-family:'Source Sans 3'; font-size:16pt">Hn</span><span style="font-family:'Source Serif 4'; font-size:16pt">Hn</span><span style="font-family:'Source Code Pro'; font-size:16pt">Hn</span><span style="font-family:'Source Serif 4'; font-size:16pt">Hn</span><span style="font-family:'Source Serif 4'; font-size:16pt; font-style: italic">Hn</span><span style="font-family:'Source Sans 3'; font-size:16pt; font-style: italic">Hn</span><span style="font-family:'Source Serif 4'; font-size:16pt; font-style: italic">Hn</span><span style="font-family:'Source Code Pro'; font-size:16pt; font-style: italic">Hn</span>
      </p>
      
    <h3>10 pt</h3>
    <p>
      Source Serif 4 | Source Sans 3 | Source Serif 4 | Source Code Pro | Source Serif 4 | ...italics
      <p>
	<span style="font-family:'Source Serif 4'; font-size:10pt">Hn</span><span style="font-family:'Source Sans 3'; font-size:10pt">Hn</span><span style="font-family:'Source Serif 4'; font-size:10pt">Hn</span><span style="font-family:'Source Code Pro'; font-size:10pt">Hn</span><span style="font-family:'Source Serif 4'; font-size:10pt">Hn</span><span style="font-family:'Source Serif 4'; font-size:10pt; font-style: italic">Hn</span><span style="font-family:'Source Sans 3'; font-size:10pt; font-style: italic">Hn</span><span style="font-family:'Source Serif 4'; font-size:10pt; font-style: italic">Hn</span><span style="font-family:'Source Code Pro'; font-size:10pt; font-style: italic">Hn</span>
      </p>


      <hr>
      
    <h2>Adjusted</h2>
    <h3>72 pt</h3>
    <p>
      control + Source Serif 4 | Source Sans 3 | Source Serif 4 | Source Code Pro | Source Serif 4 | ...italics
      <p>
	<span style="font-family:'Source Serif 4'; font-size:72pt">Hn</span> <span class="adj-serif" style="font-family:'Source Serif 4'; font-size:72pt">Hn</span><span class="adj-sans" style="font-family:'Source Sans 3'; font-size:72pt">Hn</span><span class="adj-serif" style="font-family:'Source Serif 4'; font-size:72pt">Hn</span><span class="adj-mono" style="font-family:'Source Code Pro'; font-size:72pt">Hn</span><span class="adj-serif" style="font-family:'Source Serif 4'; font-size:72pt">Hn</span><span class="adj-serif-i" style="font-family:'Source Serif 4'; font-size:72pt; font-style: italic">Hn</span><span class="adj-sans-i" style="font-family:'Source Sans 3'; font-size:72pt; font-style: italic">Hn</span><span class="adj-serif-i" style="font-family:'Source Serif 4'; font-size:72pt; font-style: italic">Hn</span><span class="adj-mono-i" style="font-family:'Source Code Pro'; font-size:72pt; font-style: italic">Hn</span>
      </p>

    <h3>28 pt</h3>
    <p>
      control + Source Serif 4 | Source Sans 3 | Source Serif 4 | Source Code Pro | Source Serif 4 | ...italics
      <p>
	<span style="font-family:'Source Serif 4'; font-size:28pt">Hn</span> <span class="adj-serif" style="font-family:'Source Serif 4'; font-size:28pt">Hn</span><span class="adj-sans" style="font-family:'Source Sans 3'; font-size:28pt">Hn</span><span class="adj-serif" style="font-family:'Source Serif 4'; font-size:28pt">Hn</span><span class="adj-mono" style="font-family:'Source Code Pro'; font-size:28pt">Hn</span><span class="adj-serif" style="font-family:'Source Serif 4'; font-size:28pt">Hn</span><span class="adj-serif-i" style="font-family:'Source Serif 4'; font-size:28pt; font-style: italic">Hn</span><span class="adj-sans-i" style="font-family:'Source Sans 3'; font-size:28pt; font-style: italic">Hn</span><span class="adj-serif-i" style="font-family:'Source Serif 4'; font-size:28pt; font-style: italic">Hn</span><span class="adj-mono-i" style="font-family:'Source Code Pro'; font-size:28pt; font-style: italic">Hn</span>
      </p>

    <h3>16 pt</h3>
    <p>
      control + Source Serif 4 | Source Sans 3 | Source Serif 4 | Source Code Pro | Source Serif 4 | ...italics
      <p>
	<span style="font-family:'Source Serif 4'; font-size:16pt">Hn</span> <span class="adj-serif" style="font-family:'Source Serif 4'; font-size:16pt">Hn</span><span class="adj-sans" style="font-family:'Source Sans 3'; font-size:16pt">Hn</span><span class="adj-serif" style="font-family:'Source Serif 4'; font-size:16pt">Hn</span><span class="adj-mono" style="font-family:'Source Code Pro'; font-size:16pt">Hn</span><span class="adj-serif" style="font-family:'Source Serif 4'; font-size:16pt">Hn</span><span class="adj-serif-i" style="font-family:'Source Serif 4'; font-size:16pt; font-style: italic">Hn</span><span class="adj-sans-i" style="font-family:'Source Sans 3'; font-size:16pt; font-style: italic">Hn</span><span class="adj-serif-i" style="font-family:'Source Serif 4'; font-size:16pt; font-style: italic">Hn</span><span class="adj-mono-i" style="font-family:'Source Code Pro'; font-size:16pt; font-style: italic">Hn</span>
      </p>
      
    <h3>10 pt</h3>
    <p>
      control + Source Serif 4 | Source Sans 3 | Source Serif 4 | Source Code Pro | Source Serif 4 | ...italics
      <p>
	<span style="font-family:'Source Serif 4'; font-size:10pt">Hn</span> <span class="adj-serif" style="font-family:'Source Serif 4'; font-size:10pt">Hn</span><span class="adj-sans" style="font-family:'Source Sans 3'; font-size:10pt">Hn</span><span class="adj-serif" style="font-family:'Source Serif 4'; font-size:10pt">Hn</span><span class="adj-mono" style="font-family:'Source Code Pro'; font-size:10pt">Hn</span><span class="adj-serif" style="font-family:'Source Serif 4'; font-size:10pt">Hn</span><span class="adj-serif-i" style="font-family:'Source Serif 4'; font-size:10pt; font-style: italic">Hn</span><span class="adj-sans-i" style="font-family:'Source Sans 3'; font-size:10pt; font-style: italic">Hn</span><span class="adj-serif-i" style="font-family:'Source Serif 4'; font-size:10pt; font-style: italic">Hn</span><span class="adj-mono-i" style="font-family:'Source Code Pro'; font-size:10pt; font-style: italic">Hn</span>
      </p>

      
  </body>
</html>


================================================
FILE: _static/toggleSvgColors.js
================================================
function toggleColor(elementId) {
    demoImage = document.getElementById(elementId);
    console.log(elementId);
    
    if (demoImage.classList.contains("shaping-demo")) {
	if (demoImage.classList.contains("greyscale-svg")) {
	    
	    demoImage.classList.add("color-svg");
	    demoImage.classList.remove("greyscale-svg");
	    
	} else {
	    if (demoImage.classList.contains("color-svg")) {

		demoImage.classList.add("greyscale-svg");
		demoImage.classList.remove("color-svg");
	      
	    }
	}
    }
    else {
	console.log("toggleColor called on element that is not .shaping-demo class");
    }
}


================================================
FILE: _templates/layout.html
================================================
{%- extends "!layout.html" %}
{% block extrahead %}
  {{ super() }}
{% endblock %}


================================================
FILE: _templates/static_nav.html
================================================
<h3>Contents</h3>
<ul class="current">
  <li class="toctree-l1 static-nav">
    <a class="reference internal" href="/index.html">Overview</a>
  </li>
  <li class="toctree-l1 static-nav static-nav-heading">
    Script shaping
    <ul class="current">
      <li class="toctree-l2 static-nav static-nav-heading">
	Indic Model
	<ul class="current">
	  <li class="toctree-l3 static-nav">
	    <a class="reference internal" href="/opentype-shaping-indic-general.html">Indic general</a>
	  </li>
	  <li class="toctree-l3 static-nav">
	    <a class="reference internal" href="/opentype-shaping-devanagari.html">Devanagari</a>
	  </li>
	  <li class="toctree-l3 static-nav">
	    <a class="reference internal" href="/opentype-shaping-bengali.html">Bengali</a>
	  </li>
	  <li class="toctree-l3 static-nav">
	    <a class="reference internal" href="/opentype-shaping-gujarati.html">Gujarati</a>
	  </li>
	  <li class="toctree-l3 static-nav">
	    <a class="reference internal" href="/opentype-shaping-gurmukhi.html">Gurmukhi</a>
	  </li>
	  <li class="toctree-l3 static-nav">
	    <a class="reference internal" href="/opentype-shaping-kannada.html">Kannada</a>
	  </li>
	  <li class="toctree-l3 static-nav">
	    <a class="reference internal" href="/opentype-shaping-malayalam.html">Malayalam</a>
	  </li>
	  <li class="toctree-l3 static-nav">
	    <a class="reference internal" href="/opentype-shaping-oriya.html">Oriya</a>
	  </li>
	  <li class="toctree-l3 static-nav">
	    <a class="reference internal" href="/opentype-shaping-sinhala.html">Sinhala</a>
	  </li>
	  <li class="toctree-l3 static-nav">
	    <a class="reference internal" href="/opentype-shaping-tamil.html">Tamil</a>
	  </li>
	  <li class="toctree-l3 static-nav">
	    <a class="reference internal" href="/opentype-shaping-telugu.html">Telugu</a>
	  </li>
	  <li class="toctree-l3 static-nav">
	    <a class="reference internal" href="/opentype-shaping-vedic-extensions.html">Vedic Extensions</a>
	  </li>
	</ul>
      </li>
      <li class="toctree-l2 static-nav static-nav-heading">
	Arabic Model
	<ul class="current">
	  <li class="toctree-l3 static-nav">
	    <a class="reference internal" href="/opentype-shaping-arabic-general.html">Arabic general</a>
	  </li>
	  <li class="toctree-l3 static-nav">
	    <a class="reference internal" href="/opentype-shaping-arabic.html">Arabic</a>
	  </li>
	  <li class="toctree-l3 static-nav">
	    <a class="reference internal" href="/opentype-shaping-nko.html">N'Ko</a>
	  </li>
	  <li class="toctree-l3 static-nav">
	    <a class="reference internal" href="/opentype-shaping-syriac.html">Syriac</a>
	  </li>
	  <li class="toctree-l3 static-nav">
	    <a class="reference internal" href="/opentype-shaping-mongolian.html">Mongolian</a>
	  </li>
	</ul>
      </li>
      <li class="toctree-l2 static-nav">
	<a class="reference internal" href="/opentype-shaping-hangul.html">Hangul</a>
      </li>
      <li class="toctree-l2 static-nav">
	<a class="reference internal" href="/opentype-shaping-hebrew.html">Hebrew</a>
      </li>
      <li class="toctree-l2 static-nav">
	<a class="reference internal" href="/opentype-shaping-khmer.html">Khmer</a>
      </li>
      <li class="toctree-l2 static-nav">
	<a class="reference internal" href="/opentype-shaping-myanmar.html">Myanmar</a>
      </li>
      <li class="toctree-l2 static-nav">
	<a class="reference internal" href="/opentype-shaping-thai-lao.html">Thai and Lao</a>
      </li>
      <li class="toctree-l2 static-nav">
	<a class="reference internal" href="/opentype-shaping-tibetan.html">Tibetan</a>
      </li>
      <li class="toctree-l2 static-nav">
	<a class="reference internal" href="/opentype-shaping-use.html">Universal Shaping Engine</a>
      </li>
      <li class="toctree-l2 static-nav">
	<a class="reference internal" href="/opentype-shaping-default.html">Default scripts</a>
      </li>
      <li class="toctree-l2 static-nav">
	<a class="reference internal" href="/opentype-shaping-emoji.html">Emoji</a>
      </li>
    </ul>
  </li>
  <li class="toctree-l1 static-nav">
    <a class="reference internal" href="/opentype-shaping-normalization.html">Normalization</a>
  </li>
  <li class="toctree-l1 static-nav">
    <a class="reference internal" href="/notes/README.html">Notes</a>
    <ul>
      <li class="toctree-l2 static-nav">
	<a class="reference internal" href="/notes/uniscribe-bug-compatibility.html">Uniscribe compatibility</a>
      </li>
      <li class="toctree-l2 static-nav">
	<a class="reference internal" href="/notes/ragel-machine-notation.html">Ragel state-machine operators</a>
      </li>
      <li class="toctree-l2 static-nav">
	<a class="reference internal" href="/notes/emoji-implementation.html">Emoji implementation</a>
      </li>
    </ul>
  </li>
  <li class="toctree-l1 static-nav">
    <a class="reference internal" href="/character-tables/character-tables-index.html">Character Tables</a>
    <ul>
      <li class="toctree-l2 static-nav">
	<a class="reference internal" href="/character-tables/character-tables-arabic.html">Arabic</a>
      </li>
      <li class="toctree-l2 static-nav">
	<a class="reference internal" href="/character-tables/character-tables-bengali.html">Bengali</a>
      </li>
      <li class="toctree-l2 static-nav">
	<a class="reference internal" href="/character-tables/character-tables-devanagari.html">Devanagari</a>
      </li>
      <li class="toctree-l2 static-nav">
	<a class="reference internal" href="/character-tables/character-tables-gujarati.html">Gujarati</a>
      </li>
      <li class="toctree-l2 static-nav">
	<a class="reference internal" href="/character-tables/character-tables-gurmukhi.html">Gurmukhi</a>
      </li>
      <li class="toctree-l2 static-nav">
	<a class="reference internal" href="/character-tables/character-tables-hangul.html">Hangul</a>
      </li>
      <li class="toctree-l2 static-nav">
	<a class="reference internal" href="/character-tables/character-tables-hebrew.html">Hebrew</a>
      </li>
      <li class="toctree-l2 static-nav">
	<a class="reference internal" href="/character-tables/character-tables-kannada.html">Kannada</a>
      </li>
      <li class="toctree-l2 static-nav">
	<a class="reference internal" href="/character-tables/character-tables-khmer.html">Khmer</a>
      </li>
      <li class="toctree-l2 static-nav">
	<a class="reference internal" href="/character-tables/character-tables-lao.html">Lao</a>
      </li>
      <li class="toctree-l2 static-nav">
	<a class="reference internal" href="/character-tables/character-tables-malayalam.html">Malayalam</a>
      </li>
      <li class="toctree-l2 static-nav">
	<a class="reference internal" href="/character-tables/character-tables-mongolian.html">Mongolian</a>
      </li>
      <li class="toctree-l2 static-nav">
	<a class="reference internal" href="/character-tables/character-tables-myanmar.html">Myanmar</a>
      </li>
      <li class="toctree-l2 static-nav">
	<a class="reference internal" href="/character-tables/character-tables-nko.html">N'Ko</a>
      </li>
      <li class="toctree-l2 static-nav">
	<a class="reference internal" href="/character-tables/character-tables-oriya.html">Oriya</a>
      </li>
      <li class="toctree-l2 static-nav">
	<a class="reference internal" href="/character-tables/character-tables-sinhala.html">Sinhala</a>
      </li>
      <li class="toctree-l2 static-nav">
	<a class="reference internal" href="/character-tables/character-tables-syriac.html">Syriac</a>
      </li>
      <li class="toctree-l2 static-nav">
	<a class="reference internal" href="/character-tables/character-tables-tamil.html">Tamil</a>
      </li>
      <li class="toctree-l2 static-nav">
	<a class="reference internal" href="/character-tables/character-tables-telugu.html">Telugu</a>
      </li>
      <li class="toctree-l2 static-nav">
	<a class="reference internal" href="/character-tables/character-tables-thai.html">Thai</a>
      </li>
      <li class="toctree-l2 static-nav">
	<a class="reference internal" href="/character-tables/character-tables-tibetan.html">Tibetan</a>
      </li>
    </ul>    
  </li>
  <li class="toctree-l1 static-nav">
    <a class="reference internal" href="/errata.html">Errata</a>
  </li>
</ul>

<hr>

<ul class="extra-links">
  <li class="toctree-l1 static-nav">
    <a class="reference external" href="https://github.com/n8willis/opentype-shaping-documents/issues">GitHub issues</a>
  </li>
  <li class="toctree-l1 static-nav">
    <a class="reference external" href="https://github.com/n8willis/opentype-shaping-documents/blob/master/BUILD.md">Build process</a>
  </li>
</ul>


================================================
FILE: _toc.yml
================================================
root: index
options:
  maxdepth: 0
  numbered: False
  hidden: True
  titlesonly: True
entries:
  - file: overview
    title: Overview
    subtrees:
    - maxdepth: 0
      numbered: 3
      entries:
      - file: opentype-shaping-indic-general
        title: Indic general
      - file: opentype-shaping-devanagari
        title: Devanagari
      - file: opentype-shaping-bengali
        title: Bengali
      - file: opentype-shaping-gujarati
        title: Gujarati
      - file: opentype-shaping-gurmukhi
        title: Gurmukhi
      - file: opentype-shaping-kannada
        title: Kannada
      - file: opentype-shaping-malayalam
        title: Malayalam
      - file: opentype-shaping-oriya
        title: Oriya
      - file: opentype-shaping-sinhala
        title: Sinhala
      - file: opentype-shaping-tamil
        title: Tamil
      - file: opentype-shaping-telugu
        title: Telugu
      - file: opentype-shaping-vedic-extensions
        title: Vedic extensions
      - file: opentype-shaping-arabic-general
        title: Arabic general
      - file: opentype-shaping-arabic
        title: Arabic
      - file: opentype-shaping-nko
        title: N'Ko
      - file: opentype-shaping-syriac
        title: Syriac
      - file: opentype-shaping-mongolian
        title: Mongolian
      - file: opentype-shaping-hangul
        title: Hangul
      - file: opentype-shaping-hebrew
        title: Hebrew
      - file: opentype-shaping-khmer
        title: Khmer
      - file: opentype-shaping-myanmar
        title: Myanmar
      - file: opentype-shaping-thai-lao
        title: Thai and Lao
      - file: opentype-shaping-tibetan
        title: Tibetan
      - file: opentype-shaping-use
        title: Universal Shaping Engine
      - file: opentype-shaping-default
        title: Default scripts
      - file: opentype-shaping-emoji
        title: Emoji
      - file: opentype-shaping-normalization
        title: Normalization
  - file: notes/index
    title: Notes
    subtrees:
    - maxdepth: 0
      numbered: False
      entries:
      - file: notes/uniscribe-bug-compatibility
        title: Uniscribe compatibility
      - file: notes/ragel-machine-notation
        title: Ragel state-machine operators
      - file: notes/emoji-implementation
        title: Emoji implementation
      - file: character-tables/index
        title: Character tables
      - file: character-tables/character-tables-arabic
        title: Arabic
      - file: character-tables/character-tables-bengali
        title: Bengali
      - file: character-tables/character-tables-devanagari
        title: Devanagari
      - file: character-tables/character-tables-gujarati
        title: Gujarati
      - file: character-tables/character-tables-gurmukhi
        title: Gurmukhi
      - file: character-tables/character-tables-hangul
        title: Hangul
      - file: character-tables/character-tables-hebrew
        title: Hebrew
      - file: character-tables/character-tables-kannada
        title: Kannada
      - file: character-tables/character-tables-khmer
        title: Khmer
      - file: character-tables/character-tables-lao
        title: Lao
      - file: character-tables/character-tables-malayalam
        title: Malayalam
      - file: character-tables/character-tables-mongolian
        title: Mongolian
      - file: character-tables/character-tables-myanmar
        title: Myanmar
      - file: character-tables/character-tables-nko
        title: N'Ko
      - file: character-tables/character-tables-oriya
        title: Oriya
      - file: character-tables/character-tables-sinhala
        title: Sinhala
      - file: character-tables/character-tables-syriac
        title: Syriac
      - file: character-tables/character-tables-tamil
        title: Tamil
      - file: character-tables/character-tables-telugu
        title: Telugu
      - file: character-tables/character-tables-thai
        title: Thai
      - file: character-tables/character-tables-tibetan
        title: Tibetan
      - file: errata
        title: Errata
  

================================================
FILE: build-requirements.txt
================================================
alabaster==1.0.0
importlib-metadata>=5.0.0
myst-parser>=0.19.1
docutils==0.21.2
markdown-it-py==3.0.0
pip>=22.1.2
pyparsing>=3.0.9
pyspelling>=2.12.1
pytz>=2022.4
setuptools>=62.6.0
Sphinx==8.1.3
sphinx_external_toc>=1.1.0
sphinx-inline-svg>=0.2.0
sphinx-multitoc-numbering==0.1.3
svg-stack>=0.1.0
cloud-sptheme>=1.10.0


================================================
FILE: character-tables/README.md
================================================
# Character tables #

The files in this directory include per-srcipt reference tables
showing the shaping-related properties of the codepoints used for each
script.


  - Indic
      - [Devanagari](character-tables-devanagari.md)
      - [Bengali](character-tables-bengali.md)
      - [Gujarati](character-tables-gujarati.md)
      - [Gurmukhi](character-tables-gurmukhi.md)
      - [Kannada](character-tables-kannada.md)
      - [Malayalam](character-tables-malayalam.md)
      - [Oriya](character-tables-oriya.md)
      - [Tamil](character-tables-tamil.md)
      - [Telugu](character-tables-telugu.md)
      - [Sinhala](character-tables-sinhala.md)
	  - _Vedic Extensions tables are included in each Indic script_
  - Arabic
      - [Arabic](character-tables-arabic.md)
      - [Syriac](character-tables-syriac.md)
      - [N'Ko](character-tables-nko.md)
      - [Mongolian](character-tables-mongolian.md)
  - Hangul
      - [Hangul Jamo](character-tables-hangul.md)
  - Hebrew
      - [Hebrew](character-tables-hebrew.md)
  - Khmer
      - [Khmer](character-tables-khmer.md)
  - Lao
      - [Lao](character-tables-lao.md)
  - Myanmar
      - [Myanmar](character-tables-myanmar.md)
  - Thai
      - [Thai](character-tables-thai.md)
  - Tibetan
      - [Tibetan](character-tables-tibetan.md)


Tables are not provided for the default or Universal Shaping Engine
(<abbr>USE</abbr>) shaping documents, each of which covers a
multitude of individual scripts, nor for the emoji shaping document,
because emoji usage is not specific to any individual script.


================================================
FILE: character-tables/character-tables-arabic.md
================================================
# Arabic character tables #

This document lists the per-character shaping information needed to
[shape Arabic text](../opentype-shaping-arabic.md).

**Contents**

  - [Arabic character table](#arabic-character-table)
  - [Arabic Supplement character table](#arabic-supplement-character-table)
  - [Arabic Extended-A character table](#arabic-extended-a-character-table)
  - [Arabic Extended-B character table](#arabic-extended-b-character-table)
  - [Arabic Extended-C character table](#arabic-extended-c-character-table)
  - [Rumi Numeral Symbols character table](#rumi-numeral-symbols-character-table)
  - [Miscellaneous character table](#miscellaneous-character-table)


## Arabic character table ##

Arabic glyphs should be classified as in the following
table. Codepoints in the Arabic block with no assigned meaning are
designated as _unassigned_ in the _Unicode category_ column.

The _Joining type_ column indicates whether each codepoint is defined
as joining with adjacent characters on the left side, right side, left
and right sides ("DUAL"), or neither side ("NON_JOINING"). Codepoints
designated TRANSPARENT in the _Joining type_ column do not join with
adjacent characters and, in addition, do not affect the joining
behavior of surrounding characters. Non-spacing marks are of type
TRANSPARENT. Codepoints designated JOIN_CAUSING force adjacent
characters to join.

The _Joining group_ column lists the fundamental letter that the
listed codepoint behaves like for joining purposes.

Assigned codepoints with a _null_ in the _Joining group_
column evoke no special behavior from the shaping engine during the
join-computation stage.

The _Mark class_ column indicates the Canonical Combining Class
for the codepoint.  Marks are assigned non-zero combining classes so
that sequences of adjacent marks can be reordered as required by the
orthography. 

For Arabic, a subset of marks in the 220 and 230 classes are also
designated _Modifier Combining Marks_ (<abbr>MCM</abbr>). These are denoted with
_220_MCM_ and _230_MCM_ in the _Mark class_ column. The <abbr title="Modifier Combining Mark">MCM</abbr> marks are
treated differently during the mark-reordering stage.


:::{table} Arabic block table

| Codepoint | Unicode category | Joining type | Joining group        | Mark class | Glyph                                         |
|:----------|:-----------------|:-------------|:---------------------|:-----------|-----------------------------------------------|
|`U+0600`   | Other            | NON_JOINING  | _null_               | _0_        | &#x0600; Number Sign                          |
|`U+0601`   | Other            | NON_JOINING  | _null_               | _0_        | &#x0601; Sign Sanah                           |
|`U+0602`   | Other            | NON_JOINING  | _null_               | _0_        | &#x0602; Footnote Marker                      |
|`U+0603`   | Other            | NON_JOINING  | _null_               | _0_        | &#x0603; Sign Safha                           |
|`U+0604`   | Other            | NON_JOINING  | _null_               | _0_        | &#x0604; Sign Samvat                          |
|`U+0605`   | Other            | NON_JOINING  | _null_               | _0_        | &#x0605; Number Mark Above                    |
|`U+0606`   | Symbol           | NON_JOINING  | _null_               | _0_        | &#x0606; Cube Root                            |
|`U+0607`   | Symbol           | NON_JOINING  | _null_               | _0_        | &#x0607; Fourth Root                          |
|`U+0608`   | Symbol           | NON_JOINING  | _null_               | _0_        | &#x0608; Ray                                  |
|`U+0609`   | Punctuation      | NON_JOINING  | _null_               | _0_        | &#x0609; Per Mille                            |
|`U+060A`   | Punctuation      | NON_JOINING  | _null_               | _0_        | &#x060A; Per Ten Thousand                     |
|`U+060B`   | Symbol           | NON_JOINING  | _null_               | _0_        | &#x060B; Afghani Sign                         |
|`U+060C`   | Punctuation      | NON_JOINING  | _null_               | _0_        | &#x060C; Comma                                |
|`U+060D`   | Punctuation      | NON_JOINING  | _null_               | _0_        | &#x060D; Date Separator                       |
|`U+060E`   | Symbol           | NON_JOINING  | _null_               | _0_        | &#x060E; Poetic Verse Sign                    |
|`U+060F`   | Symbol           | NON_JOINING  | _null_               | _0_        | &#x060F; Sign Misra                           |
| | | | | |                                                                                                                      
|`U+0610`   | Mark [Mn]        | TRANSPARENT  | _null_               | 230        | &#x0610; Sign Sallallahou Alayhe Wassallam    |
|`U+0611`   | Mark [Mn]        | TRANSPARENT  | _null_               | 230        | &#x0611; Sign Alayhe Assallam                 |
|`U+0612`   | Mark [Mn]        | TRANSPARENT  | _null_               | 230        | &#x0612; Sign Rahmatullah Alayhe              |
|`U+0613`   | Mark [Mn]        | TRANSPARENT  | _null_               | 230        | &#x0613; Sign Radi Allahou Anhu               |
|`U+0614`   | Mark [Mn]        | TRANSPARENT  | _null_               | 230        | &#x0614; Sign Takhallus                       |
|`U+0615`   | Mark [Mn]        | TRANSPARENT  | _null_               | 230        | &#x0615; Small High Tah                       |
|`U+0616`   | Mark [Mn]        | TRANSPARENT  | _null_               | 230        | &#x0616; Small High Alef Lam Yeh              |
|`U+0617`   | Mark [Mn]        | TRANSPARENT  | _null_               | 230        | &#x0617; Small High Zain                      |
|`U+0618`   | Mark [Mn]        | TRANSPARENT  | _null_               | 30         | &#x0618; Small Fatha                          |
|`U+0619`   | Mark [Mn]        | TRANSPARENT  | _null_               | 31         | &#x0619; Small Damma                          |
|`U+061A`   | Mark [Mn]        | TRANSPARENT  | _null_               | 32         | &#x061A; Small Kasra                          |
|`U+061B`   | Punctuation      | NON_JOINING  | _null_               | _0_        | &#x061B; Semicolon                            |
|`U+061C`   | Other            | TRANSPARENT  | _null_               | _0_        | &#x061C; Arabic Letter Mark                   |
|`U+061D`   | Punctuation      | NON_JOINING  | _null_               | _0_        | &#x061D; End Of Text Mark                     |
|`U+061E`   | Punctuation      | NON_JOINING  | _null_               | _0_        | &#x061E; Triple Dot Punctuation Mark          |
|`U+061F`   | Punctuation      | NON_JOINING  | _null_               | _0_        | &#x061F; Question Mark                        |
| | | | | |                                                                                                                       
|`U+0620`   | Letter           | DUAL         | YEH                  | _0_        | &#x0620; Kashmiri Yeh                         |
|`U+0621`   | Letter           | NON_JOINING  | _null_               | _0_        | &#x0621; Hamza                                |
|`U+0622`   | Letter           | RIGHT        | ALEF                 | _0_        | &#x0622; Alef With Madda Above                |
|`U+0623`   | Letter           | RIGHT        | ALEF                 | _0_        | &#x0623; Alef With Hamza Above                |
|`U+0624`   | Letter           | RIGHT        | WAW                  | _0_        | &#x0624; Waw With Hamza Above                 |
|`U+0625`   | Letter           | RIGHT        | ALEF                 | _0_        | &#x0625; Alef With Hamza Below                |
|`U+0626`   | Letter           | DUAL         | YEH                  | _0_        | &#x0626; Dotless Yeh With Hamza Above         |
|`U+0627`   | Letter           | RIGHT        | ALEF                 | _0_        | &#x0627; Alef                                 |
|`U+0628`   | Letter           | DUAL         | BEH                  | _0_        | &#x0628; Beh                                  |
|`U+0629`   | Letter           | RIGHT        | TEH_MARBUTA          | _0_        | &#x0629; Teh Marbuta                          |
|`U+062A`   | Letter           | DUAL         | BEH                  | _0_        | &#x062A; Dotless Beh With 2 Dots Above        |
|`U+062B`   | Letter           | DUAL         | BEH                  | _0_        | &#x062B; Dotless Beh With 3 Dots Above        |
|`U+062C`   | Letter           | DUAL         | HAH                  | _0_        | &#x062C; Hah With Dot Below                   |
|`U+062D`   | Letter           | DUAL         | HAH                  | _0_        | &#x062D; Hah                                  |
|`U+062E`   | Letter           | DUAL         | HAH                  | _0_        | &#x062E; Hah With Dot Above                   |
|`U+062F`   | Letter           | RIGHT        | DAL                  | _0_        | &#x062F; Dal                                  |
| | | | | |                                                                                                                       
|`U+0630`   | Letter           | RIGHT        | DAL                  | _0_        | &#x0630; Dal With Dot Above                   |
|`U+0631`   | Letter           | RIGHT        | REH                  | _0_        | &#x0631; Reh                                  |
|`U+0632`   | Letter           | RIGHT        | REH                  | _0_        | &#x0632; Reh With Dot Above                   |
|`U+0633`   | Letter           | DUAL         | SEEN                 | _0_        | &#x0633; Seen                                 |
|`U+0634`   | Letter           | DUAL         | SEEN                 | _0_        | &#x0634; Seen With 3 Dots Above               |
|`U+0635`   | Letter           | DUAL         | SAD                  | _0_        | &#x0635; Sad                                  |
|`U+0636`   | Letter           | DUAL         | SAD                  | _0_        | &#x0636; Sad With Dot Above                   |
|`U+0637`   | Letter           | DUAL         | TAH                  | _0_        | &#x0637; Tah                                  |
|`U+0638`   | Letter           | DUAL         | TAH                  | _0_        | &#x0638; Tah With Dot Above                   |
|`U+0639`   | Letter           | DUAL         | AIN                  | _0_        | &#x0639; Ain                                  |
|`U+063A`   | Letter           | DUAL         | AIN                  | _0_        | &#x063A; Ain With Dot Above                   |
|`U+063B`   | Letter           | DUAL         | GAF                  | _0_        | &#x063B; Keheh With 2 Dots Above              |
|`U+063C`   | Letter           | DUAL         | GAF                  | _0_        | &#x063C; Keheh With 3 Dots Below              |
|`U+063D`   | Letter           | DUAL         | FARSI_YEH            | _0_        | &#x063D; Farsi Yeh With Inverted V Above      |
|`U+063E`   | Letter           | DUAL         | FARSI_YEH            | _0_        | &#x063E; Farsi Yeh With 2 Dots Above          |
|`U+063F`   | Letter           | DUAL         | FARSI_YEH            | _0_        | &#x063F; Farsi Yeh With 3 Dots Above          |
| | | | | |                                                                                                                       
|`U+0640`   | Letter modifier  | JOIN_CAUSING | _null_               | _0_        | &#x0640; Tatweel                              |
|`U+0641`   | Letter           | DUAL         | FEH                  | _0_        | &#x0641; Feh                                  |
|`U+0642`   | Letter           | DUAL         | QAF                  | _0_        | &#x0642; Qaf                                  |
|`U+0643`   | Letter           | DUAL         | KAF                  | _0_        | &#x0643; Kaf                                  |
|`U+0644`   | Letter           | DUAL         | LAM                  | _0_        | &#x0644; Lam                                  |
|`U+0645`   | Letter           | DUAL         | MEEM                 | _0_        | &#x0645; Meem                                 |
|`U+0646`   | Letter           | DUAL         | NOON                 | _0_        | &#x0646; Noon                                 |
|`U+0647`   | Letter           | DUAL         | HEH                  | _0_        | &#x0647; Heh                                  |
|`U+0648`   | Letter           | RIGHT        | WAW                  | _0_        | &#x0648; Waw                                  |
|`U+0649`   | Letter           | DUAL         | YEH                  | _0_        | &#x0649; Dotless Yeh                          |
|`U+064A`   | Letter           | DUAL         | YEH                  | _0_        | &#x064A; Yeh                                  |
|`U+064B`   | Mark [Mn]        | TRANSPARENT  | _null_               | 27         | &#x064B; Fathatan                             |
|`U+064C`   | Mark [Mn]        | TRANSPARENT  | _null_               | 28         | &#x064C; Dammatan                             |
|`U+064D`   | Mark [Mn]        | TRANSPARENT  | _null_               | 29         | &#x064D; Kasratan                             |
|`U+064E`   | Mark [Mn]        | TRANSPARENT  | _null_               | 30         | &#x064E; Fatha                                |
|`U+064F`   | Mark [Mn]        | TRANSPARENT  | _null_               | 31         | &#x064F; Damma                                |
| | | | | |                                                                                                                      
|`U+0650`   | Mark [Mn]        | TRANSPARENT  | _null_               | 32         | &#x0650; Kasra                                |
|`U+0651`   | Mark [Mn]        | TRANSPARENT  | _null_               | 33         | &#x0651; Shadda                               |
|`U+0652`   | Mark [Mn]        | TRANSPARENT  | _null_               | 34         | &#x0652; Sukun                                |
|`U+0653`   | Mark [Mn]        | TRANSPARENT  | _null_               | 230        | &#x0653; Maddah Above                         |
|`U+0654`   | Mark [Mn]        | TRANSPARENT  | _null_               | 230_MCM    | &#x0654; Hamza Above                          |
|`U+0655`   | Mark [Mn]        | TRANSPARENT  | _null_               | 220_MCM    | &#x0655; Hamza Below                          |
|`U+0656`   | Mark [Mn]        | TRANSPARENT  | _null_               | 220        | &#x0656; Subscript Alef                       |
|`U+0657`   | Mark [Mn]        | TRANSPARENT  | _null_               | 230        | &#x0657; Inverted Damma                       |
|`U+0658`   | Mark [Mn]        | TRANSPARENT  | _null_               | 230_MCM    | &#x0658; Noon Ghunna                          |
|`U+0659`   | Mark [Mn]        | TRANSPARENT  | _null_               | 230        | &#x0659; Zwarakay                             |
|`U+065A`   | Mark [Mn]        | TRANSPARENT  | _null_               | 230        | &#x065A; Vowel Sign Small V Above             |
|`U+065B`   | Mark [Mn]        | TRANSPARENT  | _null_               | 230        | &#x065B; Vowel Sign Inverted Small V Above    |
|`U+065C`   | Mark [Mn]        | TRANSPARENT  | _null_               | 220        | &#x065C; Vowel Sign Dot Below                 |
|`U+065D`   | Mark [Mn]        | TRANSPARENT  | _null_               | 230        | &#x065D; Reversed Damma                       |
|`U+065E`   | Mark [Mn]        | TRANSPARENT  | _null_               | 230        | &#x065E; Fatha with Two Dots                  |
|`U+065F`   | Mark [Mn]        | TRANSPARENT  | _null_               | 220        | &#x065F; Wavy Hamza Below                     |
| | | | | |                                                                                                                      
|`U+0660`   | Number           | NON_JOINING  | _null_               | _0_        | &#x0660; Digit Zero                           |
|`U+0661`   | Number           | NON_JOINING  | _null_               | _0_        | &#x0661; Digit One                            |
|`U+0662`   | Number           | NON_JOINING  | _null_               | _0_        | &#x0662; Digit Two                            |
|`U+0663`   | Number           | NON_JOINING  | _null_               | _0_        | &#x0663; Digit Three                          |
|`U+0664`   | Number           | NON_JOINING  | _null_               | _0_        | &#x0664; Digit Four                           |
|`U+0665`   | Number           | NON_JOINING  | _null_               | _0_        | &#x0665; Digit Five                           |
|`U+0666`   | Number           | NON_JOINING  | _null_               | _0_        | &#x0666; Digit Six                            |
|`U+0667`   | Number           | NON_JOINING  | _null_               | _0_        | &#x0667; Digit Seven                          |
|`U+0668`   | Number           | NON_JOINING  | _null_               | _0_        | &#x0668; Digit Eight                          |
|`U+0669`   | Number           | NON_JOINING  | _null_               | _0_        | &#x0669; Digit Nine                           |
|`U+066A`   | Punctuation      | NON_JOINING  | _null_               | _0_        | &#x066A; Percent Sign                         |
|`U+066B`   | Punctuation      | NON_JOINING  | _null_               | _0_        | &#x066B; Decimal Separator                    |
|`U+066C`   | Punctuation      | NON_JOINING  | _null_               | _0_        | &#x066C; Thousands Separator                  |
|`U+066D`   | Punctuation      | NON_JOINING  | _null_               | _0_        | &#x066D; Five Pointed Star                    |
|`U+066E`   | Letter           | DUAL         | BEH                  | _0_        | &#x066E; Dotless Beh                          |
|`U+066F`   | Letter           | DUAL         | QAF                  | _0_        | &#x066F; Dotless Qaf                          |
| | | | | |                                                                                                                      
|`U+0670`   | Mark [Mn]        | TRANSPARENT  | _null_               | 35         | &#x0670; Superscript Alef                     |
|`U+0671`   | Letter           | RIGHT        | ALEF                 | _0_        | &#x0671; Alef With Wasla Above                |
|`U+0672`   | Letter           | RIGHT        | ALEF                 | _0_        | &#x0672; Alef With Wavy Hamza Above           |
|`U+0673`   | Letter           | RIGHT        | ALEF                 | _0_        | &#x0673; Alef With Wavy Hamza Below           |
|`U+0674`   | Letter           | NON_JOINING  | _null_               | _0_        | &#x0674; High Hamza                           |
|`U+0675`   | Letter           | RIGHT        | ALEF                 | _0_        | &#x0675; High Hamza Alef                      |
|`U+0676`   | Letter           | RIGHT        | WAW                  | _0_        | &#x0676; High Hamza Waw                       |
|`U+0677`   | Letter           | RIGHT        | WAW                  | _0_        | &#x0677; High Hamza Waw With Damma Above      |
|`U+0678`   | Letter           | DUAL         | YEH                  | _0_        | &#x0678; High Hamza Dotless Yeh               |
|`U+0679`   | Letter           | DUAL         | BEH                  | _0_        | &#x0679; Dotless Beh With Tah Above           |
|`U+067A`   | Letter           | DUAL         | BEH                  | _0_        | &#x067A; Dotless Beh With Vertical 2 Dots Above|
|`U+067B`   | Letter           | DUAL         | BEH                  | _0_        | &#x067B; Dotless Beh With Vertical 2 Dots Below|
|`U+067C`   | Letter           | DUAL         | BEH                  | _0_        | &#x067C; Dotless Beh With Attached Ring Below And 2 Dots Above|
|`U+067D`   | Letter           | DUAL         | BEH                  | _0_        | &#x067D; Dotless Beh With Inverted 3 Dots Above|
|`U+067E`   | Letter           | DUAL         | BEH                  | _0_        | &#x067E; Dotless Beh With 3 Dots Below        |
|`U+067F`   | Letter           | DUAL         | BEH                  | _0_        | &#x067F; Dotless Beh With 4 Dots Above        |
| | | | | |                                                                                                                      
|`U+0680`   | Letter           | DUAL         | BEH                  | _0_        | &#x0680; Dotless Beh With 4 Dots Below        |
|`U+0681`   | Letter           | DUAL         | HAH                  | _0_        | &#x0681; Hah With Hamza Above                 |
|`U+0682`   | Letter           | DUAL         | HAH                  | _0_        | &#x0682; Hah With Vertical 2 Dots Above       |
|`U+0683`   | Letter           | DUAL         | HAH                  | _0_        | &#x0683; Hah With 2 Dots Below                |
|`U+0684`   | Letter           | DUAL         | HAH                  | _0_        | &#x0684; Hah With Vertical 2 Dots Below       |
|`U+0685`   | Letter           | DUAL         | HAH                  | _0_        | &#x0685; Hah With 3 Dots Above                |
|`U+0686`   | Letter           | DUAL         | HAH                  | _0_        | &#x0686; Hah With 3 Dots Below                |
|`U+0687`   | Letter           | DUAL         | HAH                  | _0_        | &#x0687; Hah With 4 Dots Below                |
|`U+0688`   | Letter           | RIGHT        | DAL                  | _0_        | &#x0688; Dal With Tah Above                   |
|`U+0689`   | Letter           | RIGHT        | DAL                  | _0_        | &#x0689; Dal With Attached Ring Below         |
|`U+068A`   | Letter           | RIGHT        | DAL                  | _0_        | &#x068A; Dal With Dot Below                   |
|`U+068B`   | Letter           | RIGHT        | DAL                  | _0_        | &#x068B; Dal With Dot Below And Tah Above     |
|`U+068C`   | Letter           | RIGHT        | DAL                  | _0_        | &#x068C; Dal With 2 Dots Above                |
|`U+068D`   | Letter           | RIGHT        | DAL                  | _0_        | &#x068D; Dal With 2 Dots Below                |
|`U+068E`   | Letter           | RIGHT        | DAL                  | _0_        | &#x068E; Dal With 3 Dots Above                |
|`U+068F`   | Letter           | RIGHT        | DAL                  | _0_        | &#x068F; Dal With Inverted 3 Dots Above       |
| | | | | |                                                                                                                      
|`U+0690`   | Letter           | RIGHT        | DAL                  | _0_        | &#x0690; Dal With 4 Dots Above                |
|`U+0691`   | Letter           | RIGHT        | REH                  | _0_        | &#x0691; Reh With Tah Above                   |
|`U+0692`   | Letter           | RIGHT        | REH                  | _0_        | &#x0692; Reh With V Above                     |
|`U+0693`   | Letter           | RIGHT        | REH                  | _0_        | &#x0693; Reh With Attached Ring Below         |
|`U+0694`   | Letter           | RIGHT        | REH                  | _0_        | &#x0694; Reh With Dot Below                   |
|`U+0695`   | Letter           | RIGHT        | REH                  | _0_        | &#x0695; Reh With V Below                     |
|`U+0696`   | Letter           | RIGHT        | REH                  | _0_        | &#x0696; Reh With Dot Below And Dot Within    |
|`U+0697`   | Letter           | RIGHT        | REH                  | _0_        | &#x0697; Reh With 2 Dots Above                |
|`U+0698`   | Letter           | RIGHT        | REH                  | _0_        | &#x0698; Reh With 3 Dots Above                |
|`U+0699`   | Letter           | RIGHT        | REH                  | _0_        | &#x0699; Reh With 4 Dots Above                |
|`U+069A`   | Letter           | DUAL         | SEEN                 | _0_        | &#x069A; Seen With Dot Below And Dot Above    |
|`U+069B`   | Letter           | DUAL         | SEEN                 | _0_        | &#x069B; Seen With 3 Dots Below               |
|`U+069C`   | Letter           | DUAL         | SEEN                 | _0_        | &#x069C; Seen With 3 Dots Below And 3 Dots Above|
|`U+069D`   | Letter           | DUAL         | SAD                  | _0_        | &#x069D; Sad With 2 Dots Below                |
|`U+069E`   | Letter           | DUAL         | SAD                  | _0_        | &#x069E; Sad With 3 Dots Above                |
|`U+069F`   | Letter           | DUAL         | TAH                  | _0_        | &#x069F; Tah With 3 Dots Above                |
| | | | | |                                                                                                                      
|`U+06A0`   | Letter           | DUAL         | AIN                  | _0_        | &#x06A0; Ain With 3 Dots Above                |
|`U+06A1`   | Letter           | DUAL         | FEH                  | _0_        | &#x06A1; Dotless Feh                          |
|`U+06A2`   | Letter           | DUAL         | FEH                  | _0_        | &#x06A2; Dotless Feh With Dot Below           |
|`U+06A3`   | Letter           | DUAL         | FEH                  | _0_        | &#x06A3; Feh With Dot Below                   |
|`U+06A4`   | Letter           | DUAL         | FEH                  | _0_        | &#x06A4; Dotless Feh With 3 Dots Above        |
|`U+06A5`   | Letter           | DUAL         | FEH                  | _0_        | &#x06A5; Dotless Feh With 3 Dots Below        |
|`U+06A6`   | Letter           | DUAL         | FEH                  | _0_        | &#x06A6; Dotless Feh With 4 Dots Above        |
|`U+06A7`   | Letter           | DUAL         | QAF                  | _0_        | &#x06A7; Dotless Qaf With Dot Above           |
|`U+06A8`   | Letter           | DUAL         | QAF                  | _0_        | &#x06A8; Dotless Qaf With 3 Dots Above        |
|`U+06A9`   | Letter           | DUAL         | GAF                  | _0_        | &#x06A9; Keheh                                |
|`U+06AA`   | Letter           | DUAL         | SWASH_KAF            | _0_        | &#x06AA; Swash Kaf                            |
|`U+06AB`   | Letter           | DUAL         | GAF                  | _0_        | &#x06AB; Keheh With Attached Ring Below       |
|`U+06AC`   | Letter           | DUAL         | KAF                  | _0_        | &#x06AC; Kaf With Dot Above                   |
|`U+06AD`   | Letter           | DUAL         | KAF                  | _0_        | &#x06AD; Kaf With 3 Dots Above                |
|`U+06AE`   | Letter           | DUAL         | KAF                  | _0_        | &#x06AE; Kaf With 3 Dots Below                |
|`U+06AF`   | Letter           | DUAL         | GAF                  | _0_        | &#x06AF; Gaf                                  |
| | | | | |                                                                                                                     
|`U+06B0`   | Letter           | DUAL         | GAF                  | _0_        | &#x06B0; Gaf With Attached Ring Below         |
|`U+06B1`   | Letter           | DUAL         | GAF                  | _0_        | &#x06B1; Gaf With 2 Dots Above                |
|`U+06B2`   | Letter           | DUAL         | GAF                  | _0_        | &#x06B2; Gaf With 2 Dots Below                |
|`U+06B3`   | Letter           | DUAL         | GAF                  | _0_        | &#x06B3; Gaf With Vertical 2 Dots Below       |
|`U+06B4`   | Letter           | DUAL         | GAF                  | _0_        | &#x06B4; Gaf With 3 Dots Above                |
|`U+06B5`   | Letter           | DUAL         | LAM                  | _0_        | &#x06B5; Lam With V Above                     |
|`U+06B6`   | Letter           | DUAL         | LAM                  | _0_        | &#x06B6; Lam With Dot Above                   |
|`U+06B7`   | Letter           | DUAL         | LAM                  | _0_        | &#x06B7; Lam With 3 Dots Above                |
|`U+06B8`   | Letter           | DUAL         | LAM                  | _0_        | &#x06B8; Lam With 3 Dots Below                |
|`U+06B9`   | Letter           | DUAL         | NOON                 | _0_        | &#x06B9; Noon With Dot Below                  |
|`U+06BA`   | Letter           | DUAL         | NOON                 | _0_        | &#x06BA; Dotless Noon                         |
|`U+06BB`   | Letter           | DUAL         | NOON                 | _0_        | &#x06BB; Dotless Noon With Tah Above          |
|`U+06BC`   | Letter           | DUAL         | NOON                 | _0_        | &#x06BC; Noon With Attached Ring Below        |
|`U+06BD`   | Letter           | DUAL         | NYA                  | _0_        | &#x06BD; Nya                                  |
|`U+06BE`   | Letter           | DUAL         | KNOTTED_HEH          | _0_        | &#x06BE; Knotted Heh                          |
|`U+06BF`   | Letter           | DUAL         | HAH                  | _0_        | &#x06BF; Hah With 3 Dots Below And Dot Above  |
| | | | | |                                                                                                                      
|`U+06C0`   | Letter           | RIGHT        | TEH_MARBUTA          | _0_        | &#x06C0; Dotless Teh Marbuta With Hamza Above |
|`U+06C1`   | Letter           | DUAL         | HEH_GOAL             | _0_        | &#x06C1; Heh Goal                             |
|`U+06C2`   | Letter           | DUAL         | HEH_GOAL             | _0_        | &#x06C2; Heh Goal With Hamza Above            |
|`U+06C3`   | Letter           | RIGHT        | TEH_MARBUTA_GOAL     | _0_        | &#x06C3; Teh Marbuta Goal                     |
|`U+06C4`   | Letter           | RIGHT        | WAW                  | _0_        | &#x06C4; Waw With Attached Ring Within        |
|`U+06C5`   | Letter           | RIGHT        | WAW                  | _0_        | &#x06C5; Waw With Bar                         |
|`U+06C6`   | Letter           | RIGHT        | WAW                  | _0_        | &#x06C6; Waw With V Above                     |
|`U+06C7`   | Letter           | RIGHT        | WAW                  | _0_        | &#x06C7; Waw With Damma Above                 |
|`U+06C8`   | Letter           | RIGHT        | WAW                  | _0_        | &#x06C8; Waw With Alef Above                  |
|`U+06C9`   | Letter           | RIGHT        | WAW                  | _0_        | &#x06C9; Waw With Inverted V Above            |
|`U+06CA`   | Letter           | RIGHT        | WAW                  | _0_        | &#x06CA; Waw With 2 Dots Above                |
|`U+06CB`   | Letter           | RIGHT        | WAW                  | _0_        | &#x06CB; Waw With 3 Dots Above                |
|`U+06CC`   | Letter           | DUAL         | FARSI_YEH            | _0_        | &#x06CC; Farsi Yeh                            |
|`U+06CD`   | Letter           | RIGHT        | YEH_WITH_TAIL        | _0_        | &#x06CD; Yeh With Tail                        |
|`U+06CE`   | Letter           | DUAL         | FARSI_YEH            | _0_        | &#x06CE; Farsi Yeh With V Above               |
|`U+06CF`   | Letter           | RIGHT        | WAW                  | _0_        | &#x06CF; Waw With Dot Above                   |
| | | | | |                                                                                                                      
|`U+06D0`   | Letter           | DUAL         | YEH                  | _0_        | &#x06D0; Dotless Yeh With Vertical 2 Dots Below|
|`U+06D1`   | Letter           | DUAL         | YEH                  | _0_        | &#x06D1; Dotless Yeh With 3 Dots Below        |
|`U+06D2`   | Letter           | RIGHT        | YEH_BARREE           | _0_        | &#x06D2; Yeh Barree                           |
|`U+06D3`   | Letter           | RIGHT        | YEH_BARREE           | _0_        | &#x06D3; Yeh Barree With Hamza Above          |
|`U+06D4`   | Punctuation      | NON_JOINING  | _null_               | _0_        | &#x06D4; Full Stop                            |
|`U+06D5`   | Letter           | NON_JOINING  | TEH_MARBUTA          | _0_        | &#x06D5; Dotless Teh Marbuta                  |
|`U+06D6`   | Mark [Mn]        | TRANSPARENT  | _null_               | 230        | &#x06D6; Small High Sad Lam Alef Maksura      |
|`U+06D7`   | Mark [Mn]        | TRANSPARENT  | _null_               | 230        | &#x06D7; Small High Qaf Lam Alef Maksura      |
|`U+06D8`   | Mark [Mn]        | TRANSPARENT  | _null_               | 230        | &#x06D8; Small High Meem Initial Form         |
|`U+06D9`   | Mark [Mn]        | TRANSPARENT  | _null_               | 230        | &#x06D9; Small High Lam Alef                  |
|`U+06DA`   | Mark [Mn]        | TRANSPARENT  | _null_               | 230        | &#x06DA; Small High Jeem                      |
|`U+06DB`   | Mark [Mn]        | TRANSPARENT  | _null_               | 230        | &#x06DB; Small High Three Dots                |
|`U+06DC`   | Mark [Mn]        | TRANSPARENT  | _null_               | 230_MCM    | &#x06DC; Small High Seen                      |
|`U+06DD`   | Other            | NON_JOINING  | _null_               | _0_        | &#x06DD; End Of Ayah                          |
|`U+06DE`   | Other            | NON_JOINING  | _null_               | _0_        | &#x06DE; Start Of Rub El Hizb                 |
|`U+06DF`   | Mark [Mn]        | TRANSPARENT  | _null_               | 230        | &#x06DF; Small High Rounded Zero              |
| | | | | |                                                                                                                      
|`U+06E0`   | Mark [Mn]        | TRANSPARENT  | _null_               | 230        | &#x06E0; Small High Upright Rectangular Zero  |
|`U+06E1`   | Mark [Mn]        | TRANSPARENT  | _null_               | 230        | &#x06E1; Small High Dotless Head Of Khah      |
|`U+06E2`   | Mark [Mn]        | TRANSPARENT  | _null_               | 230        | &#x06E2; Small High Meem Isolated Form        |
|`U+06E3`   | Mark [Mn]        | TRANSPARENT  | _null_               | 220_MCM    | &#x06E3; Small Low Seen                       |
|`U+06E4`   | Mark [Mn]        | TRANSPARENT  | _null_               | 230        | &#x06E4; Small High Madda                     |
|`U+06E5`   | Letter modifier  | NON_JOINING  | _null_               | _0_        | &#x06E5; Small Waw                            |
|`U+06E6`   | Letter modifier  | NON_JOINING  | _null_               | _0_        | &#x06E6; Small Yeh                            |
|`U+06E7`   | Mark [Mn]        | TRANSPARENT  | _null_               | 230_MCM    | &#x06E7; Small High Yeh                       |
|`U+06E8`   | Mark [Mn]        | TRANSPARENT  | _null_               | 230_MCM    | &#x06E8; Small High Noon                      |
|`U+06E9`   | Symbol           | NON_JOINING  | _null_               | _0_        | &#x06E9; Place Of Sajdah                      |
|`U+06EA`   | Mark [Mn]        | TRANSPARENT  | _null_               | 220        | &#x06EA; Empty Centre Low Stop                |
|`U+06EB`   | Mark [Mn]        | TRANSPARENT  | _null_               | 230        | &#x06EB; Empty Centre High Stop               |
|`U+06EC`   | Mark [Mn]        | TRANSPARENT  | _null_               | 230        | &#x06EC; Rounded High Stop With Filled Centre |
|`U+06ED`   | Mark [Mn]        | TRANSPARENT  | _null_               | 220        | &#x06ED; Small Low Meem                       |
|`U+06EE`   | Letter           | RIGHT        | DAL                  | _0_        | &#x06EE; Dal With Inverted V Above            |
|`U+06EF`   | Letter           | RIGHT        | REH                  | _0_        | &#x06EF; Reh With Inverted V Above            |
| | | | | |                                                                                                                      
|`U+06F0`   | Number           | NON_JOINING  | _null_               | _0_        | &#x06F0; Extended Digit Zero                  |
|`U+06F1`   | Number           | NON_JOINING  | _null_               | _0_        | &#x06F1; Extended Digit One                   |
|`U+06F2`   | Number           | NON_JOINING  | _null_               | _0_        | &#x06F2; Extended Digit Two                   |
|`U+06F3`   | Number           | NON_JOINING  | _null_               | _0_        | &#x06F3; Extended Digit Three                 |
|`U+06F4`   | Number           | NON_JOINING  | _null_               | _0_        | &#x06F4; Extended Digit Four                  |
|`U+06F5`   | Number           | NON_JOINING  | _null_               | _0_        | &#x06F5; Extended Digit Five                  |
|`U+06F6`   | Number           | NON_JOINING  | _null_               | _0_        | &#x06F6; Extended Digit Six                   |
|`U+06F7`   | Number           | NON_JOINING  | _null_               | _0_        | &#x06F7; Extended Digit Seven                 |
|`U+06F8`   | Number           | NON_JOINING  | _null_               | _0_        | &#x06F8; Extended Digit Eight                 |
|`U+06F9`   | Number           | NON_JOINING  | _null_               | _0_        | &#x06F9; Extended Digit Nine                  |
|`U+06FA`   | Letter           | DUAL         | SEEN                 | _0_        | &#x06FA; Sheen With Dot Below                 |
|`U+06FB`   | Letter           | DUAL         | SAD                  | _0_        | &#x06FB; Dad With Dot Below                   |
|`U+06FC`   | Letter           | DUAL         | AIN                  | _0_        | &#x06FC; Ghain With Dot Below                 |
|`U+06FD`   | Symbol           | NON_JOINING  | _null_               | _0_        | &#x06FD; Sign Sindhi Ampersand                |
|`U+06FE`   | Symbol           | NON_JOINING  | _null_               | _0_        | &#x06FE; Sign Sindhi Postposition Men         |
|`U+06FF`   | Letter           | DUAL         | KNOTTED_HEH          | _0_        | &#x06FF; Knotted Heh With Inverted V Above    |          
:::


## Arabic Supplement character table ##


:::{table} Arabic Supplement block table

| Codepoint | Unicode category | Joining type | Joining group        | Mark class | Glyph                                                           |
|:----------|:-----------------|:-------------|:---------------------|:-----------|-----------------------------------------------------------------|
|`U+0750`   | Letter           | DUAL         | BEH                  | _0_        | &#x0750; Dotless Beh With Horizontal 3 Dots Below               |
|`U+0751`   | Letter           | DUAL         | BEH                  | _0_        | &#x0751; Beh With 3 Dots Above                                  |
|`U+0752`   | Letter           | DUAL         | BEH                  | _0_        | &#x0752; Dotless Beh With Inverted 3 Dots Below                 |
|`U+0753`   | Letter           | DUAL         | BEH                  | _0_        | &#x0753; Dotless Beh With Inverted 3 Dots Below And 2 Dots Above|
|`U+0754`   | Letter           | DUAL         | BEH                  | _0_        | &#x0754; Dotless Beh With 2 Dots Below And Dot Above            |
|`U+0755`   | Letter           | DUAL         | BEH                  | _0_        | &#x0755; Dotless Beh With Inverted V Below                      |
|`U+0756`   | Letter           | DUAL         | BEH                  | _0_        | &#x0756; Dotless Beh With V Above                               |
|`U+0757`   | Letter           | DUAL         | HAH                  | _0_        | &#x0757; Hah With 2 Dots Above                                  |
|`U+0758`   | Letter           | DUAL         | HAH                  | _0_        | &#x0758; Hah With Inverted 3 Dots Below                         |
|`U+0759`   | Letter           | RIGHT        | DAL                  | _0_        | &#x0759; Dal With Vertical 2 Dots Below And Tah Above           |
|`U+075A`   | Letter           | RIGHT        | DAL                  | _0_        | &#x075A; Dal With Inverted V Below                              |
|`U+075B`   | Letter           | RIGHT        | REH                  | _0_        | &#x075B; Reh With Bar                                           |
|`U+075C`   | Letter           | DUAL         | SEEN                 | _0_        | &#x075C; Seen With 4 Dots Above                                 |
|`U+075D`   | Letter           | DUAL         | AIN                  | _0_        | &#x075D; Ain With 2 Dots Above                                  |
|`U+075E`   | Letter           | DUAL         | AIN                  | _0_        | &#x075E; Ain With Inverted 3 Dots Above                         |
|`U+075F`   | Letter           | DUAL         | AIN                  | _0_        | &#x075F; Ain With Vertical 2 Dots Above                         |
| | | | | |                                                                                                              
|`U+0760`   | Letter           | DUAL         | FEH                  | _0_        | &#x0760; Dotless Feh With 2 Dots Below                          |
|`U+0761`   | Letter           | DUAL         | FEH                  | _0_        | &#x0761; Dotless Feh With Inverted 3 Dots Below                 |
|`U+0762`   | Letter           | DUAL         | GAF                  | _0_        | &#x0762; Keheh With Dot Above                                   |
|`U+0763`   | Letter           | DUAL         | GAF                  | _0_        | &#x0763; Keheh With 3 Dots Above                                |
|`U+0764`   | Letter           | DUAL         | GAF                  | _0_        | &#x0764; Keheh With Inverted 3 Dots Below                       |
|`U+0765`   | Letter           | DUAL         | MEEM                 | _0_        | &#x0765; Meem With Dot Above                                    |
|`U+0766`   | Letter           | DUAL         | MEEM                 | _0_        | &#x0766; Meem With Dot Below                                    |
|`U+0767`   | Letter           | DUAL         | NOON                 | _0_        | &#x0767; Noon With 2 Dots Below                                 |
|`U+0768`   | Letter           | DUAL         | NOON                 | _0_        | &#x0768; Noon With Tah Above                                    |
|`U+0769`   | Letter           | DUAL         | NOON                 | _0_        | &#x0769; Noon With V Above                                      |
|`U+076A`   | Letter           | DUAL         | LAM                  | _0_        | &#x076A; Lam With Bar                                           |
|`U+076B`   | Letter           | RIGHT        | REH                  | _0_        | &#x076B; Reh With Vertical 2 Dots Above                         |
|`U+076C`   | Letter           | RIGHT        | REH                  | _0_        | &#x076C; Reh With Hamza Above                                   |
|`U+076D`   | Letter           | DUAL         | SEEN                 | _0_        | &#x076D; Seen With Vertical 2 Dots Above                        |
|`U+076E`   | Letter           | DUAL         | HAH                  | _0_        | &#x076E; Hah With Tah Below                                     |
|`U+076F`   | Letter           | DUAL         | HAH                  | _0_        | &#x076F; Hah With Tah And 2 Dots Below                          |
| | | | | |                                                                                                                      
|`U+0770`   | Letter           | DUAL         | SEEN                 | _0_        | &#x0770; Seen With 2 Dots And Tah Above                         |
|`U+0771`   | Letter           | RIGHT        | REH                  | _0_        | &#x0771; Reh With 2 Dots And Tah Above                          |
|`U+0772`   | Letter           | DUAL         | HAH                  | _0_        | &#x0772; Hah With Tah Above                                     |
|`U+0773`   | Letter           | RIGHT        | ALEF                 | _0_        | &#x0773; Alef With Digit Two Above                              |
|`U+0774`   | Letter           | RIGHT        | ALEF                 | _0_        | &#x0774; Alef With Digit Three Above                            |
|`U+0775`   | Letter           | DUAL         | FARSI_YEH            | _0_        | &#x0775; Farsi Yeh With Digit Two Above                         |
|`U+0776`   | Letter           | DUAL         | FARSI_YEH            | _0_        | &#x0776; Farsi Yeh With Digit Three Above                       |
|`U+0777`   | Letter           | DUAL         | YEH                  | _0_        | &#x0777; Dotless Yeh With Digit Four Below                      |
|`U+0778`   | Letter           | RIGHT        | WAW                  | _0_        | &#x0778; Waw With Digit Two Above                               |
|`U+0779`   | Letter           | RIGHT        | WAW                  | _0_        | &#x0779; Waw With Digit Three Above                             |
|`U+077A`   | Letter           | DUAL         | BURUSHASKI_YEH_BARREE| _0_        | &#x077A; Burushaski Yeh Barree With Digit Two Above             |
|`U+077B`   | Letter           | DUAL         | BURUSHASKI_YEH_BARREE| _0_        | &#x077B; Burushaski Yeh Barree With Digit Three Above           |
|`U+077C`   | Letter           | DUAL         | HAH                  | _0_        | &#x077C; Hah With Digit Four Below                              |
|`U+077D`   | Letter           | DUAL         | SEEN                 | _0_        | &#x077D; Seen With Digit Four Above                             |
|`U+077E`   | Letter           | DUAL         | SEEN                 | _0_        | &#x077E; Seen With Inverted V Above                             |
|`U+077F`   | Letter           | DUAL         | KAF                  | _0_        | &#x077F; Kaf With 2 Dots Above                                  |                        
:::


## Arabic Extended-A character table ##


:::{table} Arabic Extended-A block table

| Codepoint | Unicode category | Joining type | Joining group        | Mark class | Glyph                                                 |
|:----------|:-----------------|:-------------|:---------------------|:-----------|-------------------------------------------------------|
|`U+08A0`   | Letter           | DUAL         | BEH                  | _0_        | &#x08A0; Dotless Beh With V Below                     |
|`U+08A1`   | Letter           | DUAL         | BEH                  | _0_        | &#x08A1; Beh With Hamza Above                         |
|`U+08A2`   | Letter           | DUAL         | HAH                  | _0_        | &#x08A2; Hah With Dot Below And 2 Dots Above          |
|`U+08A3`   | Letter           | DUAL         | TAH                  | _0_        | &#x08A3; Tah With 2 Dots Above                        |
|`U+08A4`   | Letter           | DUAL         | FEH                  | _0_        | &#x08A4; Dotless Feh With Dot Below And 3 Dots Above  |
|`U+08A5`   | Letter           | DUAL         | QAF                  | _0_        | &#x08A5; Qaf With Dot Below                           |
|`U+08A6`   | Letter           | DUAL         | LAM                  | _0_        | &#x08A6; Lam With Double Bar                          |
|`U+08A7`   | Letter           | DUAL         | MEEM                 | _0_        | &#x08A7; Meem With 3 Dots Above                       |
|`U+08A8`   | Letter           | DUAL         | YEH                  | _0_        | &#x08A8; Yeh With Hamza Above                         |
|`U+08A9`   | Letter           | DUAL         | YEH                  | _0_        | &#x08A9; Yeh With Dot Above                           |
|`U+08AA`   | Letter           | RIGHT        | REH                  | _0_        | &#x08AA; Reh With Loop                                |
|`U+08AB`   | Letter           | RIGHT        | WAW                  | _0_        | &#x08AB; Waw With Dot Within                          |
|`U+08AC`   | Letter           | RIGHT        | ROHINGYA_YEH         | _0_        | &#x08AC; Rohingya Yeh                                 |
|`U+08AD`   | Letter           | NON_JOINING  | _null_               | _0_        | &#x08AD; Low Alef                                     |
|`U+08AE`   | Letter           | RIGHT        | DAL                  | _0_        | &#x08AE; Dal With 3 Dots Below                        |
|`U+08AF`   | Letter           | DUAL         | SAD                  | _0_        | &#x08AF; Sad With 3 Dots Below                        |
| | | | | |                                                                                                              
|`U+08B0`   | Letter           | DUAL         | GAF                  | _0_        | &#x08B0; Keheh With Stroke Below                      |
|`U+08B1`   | Letter           | RIGHT        | STRAIGHT_WAW         | _0_        | &#x08B1; Straight Waw                                 |
|`U+08B2`   | Letter           | RIGHT        | REH                  | _0_        | &#x08B2; Reh With Dot And Inverted V Above            |
|`U+08B3`   | Letter           | DUAL         | AIN                  | _0_        | &#x08B3; Ain With 3 Dots Below                        |
|`U+08B4`   | Letter           | DUAL         | KAF                  | _0_        | &#x08B4; Kaf With Dot Below                           |
|`U+08B5`   | Letter           | DUAL         | QAF                  | _0_        | &#x08B5; Qaf With Dot Below                           |
|`U+08B6`   | Letter           | DUAL         | BEH                  | _0_        | &#x08B6; Beh With Meem Above                          |
|`U+08B7`   | Letter           | DUAL         | BEH                  | _0_        | &#x08B7; Dotless Beh With 3 Dots Below And Meem Above |
|`U+08B8`   | Letter           | DUAL         | BEH                  | _0_        | &#x08B8; Dotless Beh With Teh Above                   |
|`U+08B9`   | Letter           | RIGHT        | REH                  | _0_        | &#x08B9; Reh With Noon Above                          |
|`U+08BA`   | Letter           | DUAL         | YEH                  | _0_        | &#x08BA; Yeh With Noon Above                          |
|`U+08BB`   | Letter           | DUAL         | AFRICAN_FEH          | _0_        | &#x08BB; African Feh                                  |
|`U+08BC`   | Letter           | DUAL         | AFRICAN_QAF          | _0_        | &#x08BC; African Qaf                                  |
|`U+08BD`   | Letter           | DUAL         | AFRICAN_NOON         | _0_        | &#x08BD; African Noon                                 |
|`U+08BE`   | Letter           | DUAL         | BEH                  | _0_        | &#x08BE; Peh With Small V                             |
|`U+08BF`   | Letter           | DUAL         | BEH                  | _0_        | &#x08BF; Teh With Small V                             |
| | | | | |
|`U+08C0`   | Letter           | DUAL         | BEH                  | _0_        | &#x08C0; Tteh With Small V                            |
|`U+08C1`   | Letter           | DUAL         | HAH                  | _0_        | &#x08C1; Tcheh With Small V                           |
|`U+08C2`   | Letter           | DUAL         | GAF                  | _0_        | &#x08C2; Keheh With Small V                           |
|`U+08C3`   | Letter           | DUAL         | AIN                  | _0_        | &#x08C3; Ghain With 3 Dots Above                      |
|`U+08C4`   | Letter           | DUAL         | AFRICAN_QAF          | _0_        | &#x08C4; African Qaf With 3 Dots Above                |
|`U+08C5`   | Letter           | DUAL         | HAH                  | _0_        | &#x08C5; Jeem With 3 Dots Above                       |
|`U+08C6`   | Letter           | DUAL         | HAH                  | _0_        | &#x08C6; Jeem With 3 Dots Below                       |
|`U+08C7`   | Letter           | DUAL         | LAM                  | _0_        | &#x08C7; Lam With Small Arabic Tah Above              |
|`U+08C8`   | Letter           | DUAL         | GAF                  | _0_        | &#x08C8; Graf                                         |
|`U+08C9`   | Letter modifier  | TRANSPARENT  | _null_               | _0_        | &#x08C9; Small Farsi Yeh                              |
|`U+08CA`   | Mark [Mn]        | TRANSPARENT  | _null_               | 230_MCM    | &#x08CA; Small High Farsi Yeh                         |
|`U+08CB`   | Mark [Mn]        | TRANSPARENT  | _null_               | 230_MCM    | &#x08CB; Small High Yeh Barree With Two Dots Below    |
|`U+08CC`   | Mark [Mn]        | TRANSPARENT  | _null_               | 230        | &#x08CC; Small High Word Sah                          |
|`U+08CD`   | Mark [Mn]        | TRANSPARENT  | _null_               | 230_MCM    | &#x08CD; Small High Zah                               |
|`U+08CE`   | Mark [Mn]        | TRANSPARENT  | _null_               | 230_MCM    | &#x08CE; Large Round Dot Above                        |
|`U+08CF`   | Mark [Mn]        | TRANSPARENT  | _null_               | 220_MCM    | &#x08CF; Large Round Dot Below                        |
| | | | | |
|`U+08D0`   | Mark [Mn]        | TRANSPARENT  | _null_               | 220        | &#x08D0; Sukun Below                                  |
|`U+08D1`   | Mark [Mn]        | TRANSPARENT  | _null_               | 220        | &#x08D1; Large Circle Below                           |
|`U+08D2`   | Mark [Mn]        | TRANSPARENT  | _null_               | 220        | &#x08D2; Large Round Dot Inside Circle Below          |
|`U+08D3`   | Mark [Mn]        | TRANSPARENT  | _null_               | 220_MCM    | &#x08D3; Small Low Waw                                |
|`U+08D4`   | Mark [Mn]        | TRANSPARENT  | _null_               | 230        | &#x08D4; Small High Word Ar-Rub                       |
|`U+08D5`   | Mark [Mn]        | TRANSPARENT  | _null_               | 230        | &#x08D5; Small High Sad                               |
|`U+08D6`   | Mark [Mn]        | TRANSPARENT  | _null_               | 230        | &#x08D6; Small High Ain                               |
|`U+08D7`   | Mark [Mn]        | TRANSPARENT  | _null_               | 230        | &#x08D7; Small High Qaf                               |
|`U+08D8`   | Mark [Mn]        | TRANSPARENT  | _null_               | 230        | &#x08D8; Small High Noon With Kasra                   |
|`U+08D9`   | Mark [Mn]        | TRANSPARENT  | _null_               | 230        | &#x08D9; Small Low Noon With Kasra                    |
|`U+08DA`   | Mark [Mn]        | TRANSPARENT  | _null_               | 230        | &#x08DA; Small High Word Ath-Thalatha                 |
|`U+08DB`   | Mark [Mn]        | TRANSPARENT  | _null_               | 230        | &#x08DB; Small High Word As-Sajda                     |
|`U+08DC`   | Mark [Mn]        | TRANSPARENT  | _null_               | 230        | &#x08DC; Small High Word An-Nisf                      |
|`U+08DD`   | Mark [Mn]        | TRANSPARENT  | _null_               | 230        | &#x08DD; Small High Word Sakta                        |
|`U+08DE`   | Mark [Mn]        | TRANSPARENT  | _null_               | 230        | &#x08DE; Small High Word Qif                          |
|`U+08DF`   | Mark [Mn]        | TRANSPARENT  | _null_               | 230        | &#x08DF; Small High Word Waqfa                        |
| | | | | | 
|`U+08E0`   | Mark [Mn]        | TRANSPARENT  | _null_               | 230        | &#x08E0; Small High Footnote Marker                   |
|`U+08E1`   | Mark [Mn]        | TRANSPARENT  | _null_               | 230        | &#x08E1; Small High Sign Safha                        |
|`U+08E2`   | Other            | NON_JOINING  | _null_               | _0_        | &#x08E2; Disputed End Of Ayah                         |
|`U+08E3`   | Mark [Mn]        | TRANSPARENT  | _null_               | 220        | &#x08E3; Turned Damma Below                           |
|`U+08E4`   | Mark [Mn]        | TRANSPARENT  | _null_               | 230        | &#x08E4; Curly Fatha                                  |
|`U+08E5`   | Mark [Mn]        | TRANSPARENT  | _null_               | 230        | &#x08E5; Curly Damma                                  |
|`U+08E6`   | Mark [Mn]        | TRANSPARENT  | _null_               | 220        | &#x08E6; Curly Kasra                                  |
|`U+08E7`   | Mark [Mn]        | TRANSPARENT  | _null_               | 230        | &#x08E7; Curly Fathatan                               |
|`U+08E8`   | Mark [Mn]        | TRANSPARENT  | _null_               | 230        | &#x08E8; Curly Dammatan                               |
|`U+08E9`   | Mark [Mn]        | TRANSPARENT  | _null_               | 220        | &#x08E9; Curly Kasratan                               |
|`U+08EA`   | Mark [Mn]        | TRANSPARENT  | _null_               | 230        | &#x08EA; Tone One Dot Above                           |
|`U+08EB`   | Mark [Mn]        | TRANSPARENT  | _null_               | 230        | &#x08EB; Tone Two Dots aAove                          |
|`U+08EC`   | Mark [Mn]        | TRANSPARENT  | _null_               | 230        | &#x08EC; Tone Loop Above                              |
|`U+08ED`   | Mark [Mn]        | TRANSPARENT  | _null_               | 220        | &#x08ED; Tone One Dot Below                           |
|`U+08EE`   | Mark [Mn]        | TRANSPARENT  | _null_               | 220        | &#x08EE; Tone Two Dots Below                          |
|`U+08EF`   | Mark [Mn]        | TRANSPARENT  | _null_               | 220        | &#x08EF; Tone Loop Below                              |
| | | | | |                                                                                                                      
|`U+08F0`   | Mark [Mn]        | TRANSPARENT  | _null_               | 27         | &#x08F0; Open Fathatan                                |
|`U+08F1`   | Mark [Mn]        | TRANSPARENT  | _null_               | 28         | &#x08F1; Open Dammatan                                |
|`U+08F2`   | Mark [Mn]        | TRANSPARENT  | _null_               | 29         | &#x08F2; Open Kasratan                                |
|`U+08F3`   | Mark [Mn]        | TRANSPARENT  | _null_               | 230_MCM    | &#x08F3; Small High Waw                               |
|`U+08F4`   | Mark [Mn]        | TRANSPARENT  | _null_               | 230        | &#x08F4; Fatha With Ring                              |
|`U+08F5`   | Mark [Mn]        | TRANSPARENT  | _null_               | 230        | &#x08F5; Fatha With Dot Above                         |
|`U+08F6`   | Mark [Mn]        | TRANSPARENT  | _null_               | 220        | &#x08F6; Kasra With Dot Below                         |
|`U+08F7`   | Mark [Mn]        | TRANSPARENT  | _null_               | 230        | &#x08F7; Left Arrowhead Above                         |
|`U+08F8`   | Mark [Mn]        | TRANSPARENT  | _null_               | 230        | &#x08F8; Right Arrowhead Above                        |
|`U+08F9`   | Mark [Mn]        | TRANSPARENT  | _null_               | 220        | &#x08F9; Left Arrowhead Below                         |
|`U+08FA`   | Mark [Mn]        | TRANSPARENT  | _null_               | 220        | &#x08FA; Right Arrowhead Below                        |
|`U+08FB`   | Mark [Mn]        | TRANSPARENT  | _null_               | 230        | &#x08FB; Double Right Arrowhead Above                 |
|`U+08FC`   | Mark [Mn]        | TRANSPARENT  | _null_               | 230        | &#x08FC; Double Right Arrowhead Above With Dot        |
|`U+08FD`   | Mark [Mn]        | TRANSPARENT  | _null_               | 230        | &#x08FD; Right Arrowhead Above With Dot               |
|`U+08FE`   | Mark [Mn]        | TRANSPARENT  | _null_               | 230        | &#x08FE; Damma With Dot                               |
|`U+08FF`   | Mark [Mn]        | TRANSPARENT  | _null_               | 230        | &#x08FF; Mark Sideways Noon Ghunna                    |          
:::


## Arabic Extended-B character table ##


:::{table} Arabic Extended-B block table

| Codepoint | Unicode category | Joining type | Joining group        | Mark class | Glyph                                                 |
|:----------|:-----------------|:-------------|:---------------------|:-----------|-------------------------------------------------------|
|`U+0870`   | Letter           | RIGHT        | ALEF                 | _0_        | &#x0870; Alef With Attached Fatha                     |
|`U+0871`   | Letter           | RIGHT        | ALEF                 | _0_        | &#x0871; Alef With Attached Top Right Fatha           |
|`U+0872`   | Letter           | RIGHT        | ALEF                 | _0_        | &#x0872; Alef With Right Middle Stroke                |
|`U+0873`   | Letter           | RIGHT        | ALEF                 | _0_        | &#x0873; Alef With Left Middle Stroke                 |
|`U+0874`   | Letter           | RIGHT        | ALEF                 | _0_        | &#x0874; Alef With Attached Kasra                     |
|`U+0875`   | Letter           | RIGHT        | ALEF                 | _0_        | &#x0875; Alef With Attached Bottom Right Kasra        |
|`U+0876`   | Letter           | RIGHT        | ALEF                 | _0_        | &#x0876; Alef With Attached Round Dot Above           |
|`U+0877`   | Letter           | RIGHT        | ALEF                 | _0_        | &#x0877; Alef With Attached Right Round Dot           |
|`U+0878`   | Letter           | RIGHT        | ALEF                 | _0_        | &#x0878; Alef With Attached Left Round Dot            |
|`U+0879`   | Letter           | RIGHT        | ALEF                 | _0_        | &#x0879; Alef With Attached Round Dot Below           |
|`U+087A`   | Letter           | RIGHT        | ALEF                 | _0_        | &#x087A; Alef With Dot Above                          |
|`U+087B`   | Letter           | RIGHT        | ALEF                 | _0_        | &#x087B; Alef With Attached Top Right Fatha And Dot Above|
|`U+087C`   | Letter           | RIGHT        | ALEF                 | _0_        | &#x087C; Alef With Right Middle Stroke And Dot Above  |
|`U+087D`   | Letter           | RIGHT        | ALEF                 | _0_        | &#x087D; Alef With Attached Bottom Right Kasra And Dot Above|
|`U+087E`   | Letter           | RIGHT        | ALEF                 | _0_        | &#x087E; Alef With Attached Top Right Fatha And Left Ring|
|`U+087F`   | Letter           | RIGHT        | ALEF                 | _0_        | &#x087F; Alef With Right Middle Stroke And Left Ring  |
| | | | | |
|`U+0880`   | Letter           | RIGHT        | ALEF                 | _0_        | &#x0880; Alef With Attached Bottom Right Kasra And Left Ring|
|`U+0881`   | Letter           | RIGHT        | ALEF                 | _0_        | &#x0881; Alef With Attached Right Hamza               |
|`U+0882`   | Letter           | RIGHT        | ALEF                 | _0_        | &#x0882; Alef With Attached Left Hamza                |
|`U+0883`   | Letter modifier  | JOIN_CAUSING | _null_               | _0_        | &#x0883; Tatweel With Overstruck Hamza                |
|`U+0884`   | Letter modifier  | JOIN_CAUSING | _null_               | _0_        | &#x0884; Tatweel With Overstruck Waw                  |
|`U+0885`   | Letter modifier  | JOIN_CAUSING | _null_               | _0_        | &#x0885; Tatweel With Two Dots Below                  |
|`U+0886`   | Letter           | DUAL         | THIN_YEH             | _0_        | &#x0886; Thin Yeh                                     |
|`U+0887`   | Letter           | NON_JOINING  | _null_               | _0_        | &#x0887; Baseline Round Dot                           |
|`U+0888`   | Symbol           | NON_JOINING  | _null_               | _0_        | &#x0888; Raised Round Dot                             |
|`U+0889`   | Letter           | DUAL         | NOON                 | _0_        | &#x0889; Noon With Inverted Small V                   |
|`U+088A`   | Letter           | DUAL         | HAH                  | _0_        | &#x088A; Hah With Inverted Small V Below              |
|`U+088B`   | Letter           | DUAL         | TAH                  | _0_        | &#x088B; Tah With Dot Below                           |
|`U+088C`   | Letter           | DUAL         | TAH                  | _0_        | &#x088C; Tah With Three Dots Below                    |
|`U+088D`   | Letter           | DUAL         | GAF                  | _0_        | &#x088D; Keheh With Two Dots Vertically Below         |
|`U+088E`   | Letter           | RIGHT        | VERTICAL_TAIL        | _0_        | &#x088E; Vertical Tail                                |
|`U+088F`   | _unassigned_     |              |                      |            |                                                       |
| | | | | |
|`U+0890`   | Symbol           | NON_JOINING  | _null_               | _0_        | &#x0890; Pound Mark Above                             |
|`U+0891`   | Symbol           | NON_JOINING  | _null_               | _0_        | &#x0891; Piastre Mark Above                           |
|`U+0892`   | _unassigned_     |              |                      |            |                                                       |
|`U+0893`   | _unassigned_     |              |                      |            |                                                       |
|`U+0894`   | _unassigned_     |              |                      |            |                                                       |
|`U+0895`   | _unassigned_     |              |                      |            |                                                       |
|`U+0896`   | _unassigned_     |              |                      |            |                                                       |
|`U+0897`   | Mark [Mn]        | TRANSPARENT  | _null_               | 230        | &#x0897; Pepet                                        |
|`U+0898`   | Mark [Mn]        | TRANSPARENT  | _null_               | 230        | &#x0898; Small High Word Al-Juz                       |
|`U+0899`   | Mark [Mn]        | TRANSPARENT  | _null_               | 220        | &#x0899; Small Low Word Ishmaam                       |
|`U+089A`   | Mark [Mn]        | TRANSPARENT  | _null_               | 220        | &#x089A; Small Low Word Imaala                        |
|`U+089B`   | Mark [Mn]        | TRANSPARENT  | _null_               | 220        | &#x089B; Small Low Word Tasheel                       |
|`U+089C`   | Mark [Mn]        | TRANSPARENT  | _null_               | 230        | &#x089C; Madda Waajib                                 |
|`U+089D`   | Mark [Mn]        | TRANSPARENT  | _null_               | 230        | &#x089D; Superscript Alef Mokhassas                   |
|`U+089E`   | Mark [Mn]        | TRANSPARENT  | _null_               | 230        | &#x089E; Doubled Madda                                |
|`U+089F`   | Mark [Mn]        | TRANSPARENT  | _null_               | 230        | &#x089F; Half Madda Over Madda                        |
| | | | | |
:::


## Arabic Extended-C character table ##


:::{table} Arabic Extended-C block table

| Codepoint | Unicode category | Joining type | Joining group        | Mark class | Glyph                                                 |
|:----------|:-----------------|:-------------|:---------------------|:-----------|-------------------------------------------------------|
|`U+10EC0`  | _unassigned_     |              |                      |            |                                                       |
|`U+10EC1`  | _unassigned_     |              |                      |            |                                                       |
|`U+10EC2`  | Letter           | RIGHT        | DAL                  | _0_        | &#x10EC2; Dal With Two Dots Vertically Below          |
|`U+10EC3`  | Letter           | DUAL         | TAH                  | _0_        | &#x10EC2; Tah With Two Dots Vertically Below          |
|`U+10EC4`  | Letter           | DUAL         | KAF                  | _0_        | &#x10EC2; Kaf With Two Dots Vertically Below          |
|`U+10EC5`  | _unassigned_     |              |                      |            |                                                       |
|`U+10EC6`  | _unassigned_     |              |                      |            |                                                       |
|`U+10EC7`  | _unassigned_     |              |                      |            |                                                       |
|`U+10EC8`  | _unassigned_     |              |                      |            |                                                       |
|`U+10EC9`  | _unassigned_     |              |                      |            |                                                       |
|`U+10ECA`  | _unassigned_     |              |                      |            |                                                       |
|`U+10ECB`  | _unassigned_     |              |                      |            |                                                       |
|`U+10ECC`  | _unassigned_     |              |                      |            |                                                       |
|`U+10ECD`  | _unassigned_     |              |                      |            |                                                       |
|`U+10ECE`  | _unassigned_     |              |                      |            |                                                       |
|`U+10ECF`  | _unassigned_     |              |                      |            |                                                       |
| | | | | |
|`U+10ED0`  | _unassigned_     |              |                      |            |                                                       |
|`U+10ED1`  | _unassigned_     |              |                      |            |                                                       |
|`U+10ED2`  | _unassigned_     |              |                      |            |                                                       |
|`U+10ED3`  | _unassigned_     |              |                      |            |                                                       |
|`U+10ED4`  | _unassigned_     |              |                      |            |                                                       |
|`U+10ED5`  | _unassigned_     |              |                      |            |                                                       |
|`U+10ED6`  | _unassigned_     |              |                      |            |                                                       |
|`U+10ED7`  | _unassigned_     |              |                      |            |                                                       |
|`U+10ED8`  | _unassigned_     |              |                      |            |                                                       |
|`U+10ED9`  | _unassigned_     |              |                      |            |                                                       |
|`U+10EDA`  | _unassigned_     |              |                      |            |                                                       |
|`U+10EDB`  | _unassigned_     |              |                      |            |                                                       |
|`U+10EDC`  | _unassigned_     |              |                      |            |                                                       |
|`U+10EDD`  | _unassigned_     |              |                      |            |                                                       |
|`U+10EDE`  | _unassigned_     |              |                      |            |                                                       |
|`U+10EDF`  | _unassigned_     |              |                      |            |                                                       |
| | | | | |
|`U+10EE0`  | _unassigned_     |              |                      |            |                                                       |
|`U+10EE1`  | _unassigned_     |              |                      |            |                                                       |
|`U+10EE2`  | _unassigned_     |              |                      |            |                                                       |
|`U+10EE3`  | _unassigned_     |              |                      |            |                                                       |
|`U+10EE4`  | _unassigned_     |              |                      |            |                                                       |
|`U+10EE5`  | _unassigned_     |              |                      |            |                                                       |
|`U+10EE6`  | _unassigned_     |              |                      |            |                                                       |
|`U+10EE7`  | _unassigned_     |              |                      |            |                                                       |
|`U+10EE8`  | _unassigned_     |              |                      |            |                                                       |
|`U+10EE9`  | _unassigned_     |              |                      |            |                                                       |
|`U+10EEA`  | _unassigned_     |              |                      |            |                                                       |
|`U+10EEB`  | _unassigned_     |              |                      |            |                                                       |
|`U+10EEC`  | _unassigned_     |              |                      |            |                                                       |
|`U+10EED`  | _unassigned_     |              |                      |            |                                                       |
|`U+10EEE`  | _unassigned_     |              |                      |            |                                                       |
|`U+10EEF`  | _unassigned_     |              |                      |            |                                                       |
| | | | | |
|`U+10EF0`  | _unassigned_     |              |                      |            |                                                       |
|`U+10EF1`  | _unassigned_     |              |                      |            |                                                       |
|`U+10EF2`  | _unassigned_     |              |                      |            |                                                       |
|`U+10EF3`  | _unassigned_     |              |                      |            |                                                       |
|`U+10EF4`  | _unassigned_     |              |                      |            |                                                       |
|`U+10EF5`  | _unassigned_     |              |                      |            |                                                       |
|`U+10EF6`  | _unassigned_     |              |                      |            |                                                       |
|`U+10EF7`  | _unassigned_     |              |                      |            |                                                       |
|`U+10EF8`  | _unassigned_     |              |                      |            |                                                       |
|`U+10EF9`  | _unassigned_     |              |                      |            |                                                       |
|`U+10EFA`  | _unassigned_     |              |                      |            |                                                       |
|`U+10EFB`  | _unassigned_     |              |                      |            |                                                       |
|`U+10EFC`  | Mark [Mn]        | TRANSPARENT  | _null_               | _0_        | &#x10EFC; Combining Alef Overlay                      |
|`U+10EFD`  | Mark [Mn]        | TRANSPARENT  | _null_               | 220        | &#x10EFD; Small Low Word Sakta                        |
|`U+10EFE`  | Mark [Mn]        | TRANSPARENT  | _null_               | 220        | &#x10EFE; Small Low Word Qasr                         |
|`U+10EFF`  | Mark [Mn]        | TRANSPARENT  | _null_               | 220        | &#x10EFF; Small Low Word Madda                        |
| | | | | |
:::


## Rumi Numeral Symbols character table ##

:::{table} Rumi Numeral Symbols block table

| Codepoint | Unicode category | Joining type | Joining group        | Mark class | Glyph                          |
|:----------|:-----------------|:-------------|:---------------------|:-----------|--------------------------------|
|`U+10E60`  | Number           | NON_JOINING  | _null_               | _0_        | &#x10E60; Digit One            |
|`U+10E61`  | Number           | NON_JOINING  | _null_               | _0_        | &#x10E61; Digit Two            |
|`U+10E62`  | Number           | NON_JOINING  | _null_               | _0_        | &#x10E62; Digit Three          |
|`U+10E63`  | Number           | NON_JOINING  | _null_               | _0_        | &#x10E63; Digit Four           |
|`U+10E64`  | Number           | NON_JOINING  | _null_               | _0_        | &#x10E64; Digit Five           |
|`U+10E65`  | Number           | NON_JOINING  | _null_               | _0_        | &#x10E65; Digit Six            |
|`U+10E66`  | Number           | NON_JOINING  | _null_               | _0_        | &#x10E66; Digit Seven          |
|`U+10E67`  | Number           | NON_JOINING  | _null_               | _0_        | &#x10E67; Digit Eight          |
|`U+10E68`  | Number           | NON_JOINING  | _null_               | _0_        | &#x10E68; Digit Nine           |
|`U+10E69`  | Number           | NON_JOINING  | _null_               | _0_        | &#x10E69; Number Ten           |
|`U+10E6A`  | Number           | NON_JOINING  | _null_               | _0_        | &#x10E6A; Number Twenty        |
|`U+10E6B`  | Number           | NON_JOINING  | _null_               | _0_        | &#x10E6B; Number Thirty        |
|`U+10E6C`  | Number           | NON_JOINING  | _null_               | _0_        | &#x10E6C; Number Forty         |
|`U+10E6D`  | Number           | NON_JOINING  | _null_               | _0_        | &#x10E6D; Number Fifty         |
|`U+10E6E`  | Number           | NON_JOINING  | _null_               | _0_        | &#x10E6E; Number Sixty         |
|`U+10E6F`  | Number           | NON_JOINING  | _null_               | _0_        | &#x10E6F; Number Seventy       |
| | | | | |                                                                                          
|`U+10E70`  | Number           | NON_JOINING  | _null_               | _0_        | &#x10E70; Number Eighty        |
|`U+10E71`  | Number           | NON_JOINING  | _null_               | _0_        | &#x10E71; Number Ninety        |
|`U+10E72`  | Number           | NON_JOINING  | _null_               | _0_        | &#x10E72; Number One Hundred   |
|`U+10E73`  | Number           | NON_JOINING  | _null_               | _0_        | &#x10E73; Number Two Hundred   |
|`U+10E74`  | Number           | NON_JOINING  | _null_               | _0_        | &#x10E74; Number Three Hundred |
|`U+10E75`  | Number           | NON_JOINING  | _null_               | _0_        | &#x10E75; Number Four Hundred  |
|`U+10E76`  | Number           | NON_JOINING  | _null_               | _0_        | &#x10E76; Number Five Hundred  |
|`U+10E77`  | Number           | NON_JOINING  | _null_               | _0_        | &#x10E77; Number Six Hundred   |
|`U+10E78`  | Number           | NON_JOINING  | _null_               | _0_        | &#x10E78; Number Seven Hundred |
|`U+10E79`  | Number           | NON_JOINING  | _null_               | _0_        | &#x10E79; Number Eight Hundred |
|`U+10E7A`  | Number           | NON_JOINING  | _null_               | _0_        | &#x10E7A; Number Nine Hundred  |
|`U+10E7B`  | Number           | NON_JOINING  | _null_               | _0_        | &#x10E7B; Fraction One Half    |
|`U+10E7C`  | Number           | NON_JOINING  | _null_               | _0_        | &#x10E7C; Fraction One Quarter |
|`U+10E7D`  | Number           | NON_JOINING  | _null_               | _0_        | &#x10E7D; Fraction One Third   |
|`U+10E7E`  | Number           | NON_JOINING  | _null_               | _0_        | &#x10E7E; Fraction Two Thirds  |
|`U+10E7F`  | _unassigned_     |              |                      |            |                                |
:::


<!--- 
## Arabic Mathematical Alphabetic Symbols character table ##

| Codepoint | Unicode category | Joining type | Joining group        | Mark class | Glyph                          |
|:----------|:-----------------|:-------------|:---------------------|:-----------|--------------------------------|
|`U+10E60`  | Number           | NON_JOINING  | _null_               | _0_        | &#x10E60; Digit One            |
|`U+10E61`  | Number           | NON_JOINING  | _null_               | _0_        | &#x10E61; Digit Two            |
|`U+10E62`  | Number           | NON_JOINING  | _null_               | _0_        | &#x10E62; Digit Three          |
|`U+10E63`  | Number           | NON_JOINING  | _null_               | _0_        | &#x10E63; Digit Four           |
|`U+10E64`  | Number           | NON_JOINING  | _null_               | _0_        | &#x10E64; Digit Five           |
|`U+10E65`  | Number           | NON_JOINING  | _null_               | _0_        | &#x10E65; Digit Six            |
|`U+10E66`  | Number           | NON_JOINING  | _null_               | _0_        | &#x10E66; Digit Seven          |
|`U+10E67`  | Number           | NON_JOINING  | _null_               | _0_        | &#x10E67; Digit Eight          |
|`U+10E68`  | Number           | NON_JOINING  | _null_               | _0_        | &#x10E68; Digit Nine           |
|`U+10E69`  | Number           | NON_JOINING  | _null_               | _0_        | &#x10E69; Number Ten           |
|`U+10E6A`  | Number           | NON_JOINING  | _null_               | _0_        | &#x10E6A; Number Twenty        |
|`U+10E6B`  | Number           | NON_JOINING  | _null_               | _0_        | &#x10E6B; Number Thirty        |
|`U+10E6C`  | Number           | NON_JOINING  | _null_               | _0_        | &#x10E6C; Number Forty         |
|`U+10E6D`  | Number           | NON_JOINING  | _null_               | _0_        | &#x10E6D; Number Fifty         |
|`U+10E6E`  | Number           | NON_JOINING  | _null_               | _0_        | &#x10E6E; Number Sixty         |
|`U+10E6F`  | Number           | NON_JOINING  | _null_               | _0_        | &#x10E6F; Number Seventy       |
| | | | | |                                                                                          

--->


## Miscellaneous character table ##

Other important characters that may be encountered when shaping runs
of Arabic text include the dotted-circle placeholder (`U+25CC`), the
combining grapheme joiner (`U+034F`), the zero-width joiner (`U+200D`)
and zero-width non-joiner (`U+200C`), the left-to-right text marker
(`U+200E`) and right-to-left text marker (`U+200F`), and the no-break
space (`U+00A0`).

The dotted-circle placeholder is frequently used when displaying a
combining mark in isolation. Real-world text syllables may also use
other characters, such as hyphens or dashes, in a similar placeholder
fashion; shaping engines should cope with this situation gracefully.


:::{table} Miscellaneous character table

| Codepoint | Unicode category | Joining type | Joining group        | Mark class | Glyph                          |
|:----------|:-----------------|:-------------|:---------------------|:-----------|--------------------------------|
|`U+00A0`   | Separator        | NON_JOINING  | _null_               | _0_        | &#x00A0; No-break space        |
|`U+034F`   | Other            | NON_JOINING  | _null_               | _0_        | &#x034F; Combining grapheme joiner |
|`U+200C`   | Other            | NON_JOINING  | _null_               | _0_        | &#x200C; Zero-width non-joiner |
|`U+200D`   | Other            | JOIN_CAUSING | _null_               | _0_        | &#x200D; Zero-width joiner     |
|`U+200E`   | Other            | NON_JOINING  | _null_               | _0_        | &#x200E; Left-to-Right marker  |
|`U+200F`   | Other            | NON_JOINING  | _null_               | _0_        | &#x200F; Right-to-Left marker  |
|`U+2010`   | Punctuation      | NON_JOINING  | _null_               | _0_        | &#x2010; Hyphen                |
|`U+2011`   | Punctuation      | NON_JOINING  | _null_               | _0_        | &#x2011; No-break hyphen       |
|`U+2012`   | Punctuation      | NON_JOINING  | _null_               | _0_        | &#x2012; Figure dash           |
|`U+2013`   | Punctuation      | NON_JOINING  | _null_               | _0_        | &#x2013; En dash               |
|`U+2014`   | Punctuation      | NON_JOINING  | _null_               | _0_        | &#x2014; Em dash               |
|`U+25CC`   | Symbol           | NON_JOINING  | _null_               | _0_        | &#x25CC; Dotted circle         |
:::


The combining grapheme joiner (<abbr>CGJ</abbr>) is primarily used to alter the
order in which adjacent marks are positioned during the
mark-reordering stage, in order to adhere to the needs of a
non-default language orthography.
<!--- combining grapheme joiner explanation --->

The zero-width joiner (<abbr>ZWJ</abbr>) is primarily used to force the usage of the
cursive connecting form of a letter even when the context of the
adjoining letters would not trigger the connecting form. 

For example, to show the initial form of a letter in isolation (such
as for displaying it in a table of forms), the sequence "_Letter_,ZWJ"
would be used. To show the medial form of a letter in isolation, the
sequence "ZWJ,_Letter_,ZWJ" would be used.


<!--- Zero-Width Non Joiner explanation --->

The right-to-left mark (<abbr>RLM</abbr>) and left-to-right mark (<abbr>LRM</abbr>) are used by
the Unicode bidirectionality algorithm (BiDi) to indicate the points
in a text run at which the writing direction changes.


<!--- How shaping is affected by the <abbr title="Left-To-Right">LTR</abbr> and <abbr title="Right-To-Left">RTL</abbr> markers explanation --->


The no-break space is primarily used to display those codepoints that
are defined as non-spacing (such as vowel or diacritical marks and "Hamza") in an
isolated context, as an alternative to displaying them superimposed on
the dotted-circle placeholder.


================================================
FILE: character-tables/character-tables-bengali.md
================================================
# Bengali character tables #

This document lists the per-character shaping information needed to
[shape Bengali text](../opentype-shaping-bengali.md).

**Contents**

  - [Bengali character table](#bengali-character-table)
  - [Vedic Extensions character table](#vedic-extensions-character-table)
  - [Miscellaneous character table](#miscellaneous-character-table)
	  

## Bengali character table ##

Bengali glyphs should be classified as in the following
table. Codepoints in the Bengali block with no assigned meaning are
designated as _unassigned_ in the _Unicode category_ column. 

Assigned codepoints with a _null_ in the _Shaping class_
column evoke no special behavior from the shaping engine. Note that
this does include some valid codepoints, such as currency marks,
punctuation, and other symbols.

> Note: the `NUMBER` and `SYMBOL` _Shaping classes_ are important
> during syllable identification, but generally evoke no further
> special behavior during the rest of the shaping process. 

The _Mark-placement subclass_ column indicates mark-placement
positioning for codepoints in the _Mark_ category. Assigned, non-mark
codepoints have a _null_ in this column and evoke no special
mark-placement behavior. Marks tagged with [Mn] in the _Unicode
category_ column are categorized as non-spacing; marks tagged with
[Mc] are categorized as spacing-combining.

Some codepoints in the following table use a _Shaping class_ that
differs from the codepoint's Unicode _General Category_. The _Shaping
class_ takes precedence during OpenType shaping, as it captures more
specific, script-aware behavior.


:::{table} Bengali character table

| Codepoint | Unicode category | Shaping class     | Mark-placement subclass    | Glyph                        |
|:----------|:-----------------|:------------------|:---------------------------|:-----------------------------|
|`U+0980`   | Letter           | CONSONANT_PLACEHOLDER | _null_                 | &#x0980; Anji                |
|`U+0981`   | Mark [Mn]        | BINDU             | TOP_POSITION               | &#x0981; Candrabindu         |
|`U+0982`   | Mark [Mc]        | BINDU             | RIGHT_POSITION             | &#x0982; Anusvara            |
|`U+0983`   | Mark [Mc]        | VISARGA           | RIGHT_POSITION             | &#x0983; Visarga             |
|`U+0984`   | _unassigned_     |                   |                            |                              |
|`U+0985`   | Letter           | VOWEL_INDEPENDENT | _null_                     | &#x0985; A                   |
|`U+0986`   | Letter           | VOWEL_INDEPENDENT | _null_                     | &#x0986; Aa                  |
|`U+0987`   | Letter           | VOWEL_INDEPENDENT | _null_                     | &#x0987; I                   |
|`U+0988`   | Letter           | VOWEL_INDEPENDENT | _null_                     | &#x0988; Ii                  |
|`U+0989`   | Letter           | VOWEL_INDEPENDENT | _null_                     | &#x0989; U                   |
|`U+098A`   | Letter           | VOWEL_INDEPENDENT | _null_                     | &#x098A; Uu                  |
|`U+098B`   | Letter           | VOWEL_INDEPENDENT | _null_                     | &#x098B; Vocalic R           |
|`U+098C`   | Letter           | VOWEL_INDEPENDENT | _null_                     | &#x098C; Vocalic L           |
|`U+098D`   | _unassigned_     |                   |                            |                              |
|`U+098E`   | _unassigned_     |                   |                            |                              |
|`U+098F`   | Letter           | VOWEL_INDEPENDENT | _null_                     | &#x098F; E                   |
| | | | |																	   
|`U+0990`   | Letter           | VOWEL_INDEPENDENT | _null_                     | &#x0990; Ai                  |
|`U+0991`   | _unassigned_     |                   |                            |                              |
|`U+0992`   | _unassigned_     |                   |                            |                              |
|`U+0993`   | Letter           | VOWEL_INDEPENDENT | _null_                     | &#x0993; O                   |
|`U+0994`   | Letter           | VOWEL_INDEPENDENT | _null_                     | &#x0994; Au                  |
|`U+0995`   | Letter           | CONSONANT         | _null_                     | &#x0995; Ka                  |
|`U+0996`   | Letter           | CONSONANT         | _null_                     | &#x0996; Kha                 |
|`U+0997`   | Letter           | CONSONANT         | _null_                     | &#x0997; Ga                  |
|`U+0998`   | Letter           | CONSONANT         | _null_                     | &#x0998; Gha                 |
|`U+0999`   | Letter           | CONSONANT         | _null_                     | &#x0999; Nga                 |
|`U+099A`   | Letter           | CONSONANT         | _null_                     | &#x099A; Ca                  |
|`U+099B`   | Letter           | CONSONANT         | _null_                     | &#x099B; Cha                 |
|`U+099C`   | Letter           | CONSONANT         | _null_                     | &#x099C; Ja                  |
|`U+099D`   | Letter           | CONSONANT         | _null_                     | &#x099D; Jha                 |
|`U+099E`   | Letter           | CONSONANT         | _null_                     | &#x099E; Nya                 |
|`U+099F`   | Letter           | CONSONANT         | _null_                     | &#x099F; Tta                 |
| | | | |																	   
|`U+09A0`   | Letter           | CONSONANT         | _null_                     | &#x09A0; Ttha                |
|`U+09A1`   | Letter           | CONSONANT         | _null_                     | &#x09A1; Dda                 |
|`U+09A2`   | Letter           | CONSONANT         | _null_                     | &#x09A2; Ddha                |
|`U+09A3`   | Letter           | CONSONANT         | _null_                     | &#x09A3; Nna                 |
|`U+09A4`   | Letter           | CONSONANT         | _null_                     | &#x09A4; Ta                  |
|`U+09A5`   | Letter           | CONSONANT         | _null_                     | &#x09A5; Tha                 |
|`U+09A6`   | Letter           | CONSONANT         | _null_                     | &#x09A6; Da                  |
|`U+09A7`   | Letter           | CONSONANT         | _null_                     | &#x09A7; Dha                 |
|`U+09A8`   | Letter           | CONSONANT         | _null_                     | &#x09A8; Na                  |
|`U+09A9`   | _unassigned_     |                   |                            |                              |
|`U+09AA`   | Letter           | CONSONANT         | _null_                     | &#x09AA; Pa                  |
|`U+09AB`   | Letter           | CONSONANT         | _null_                     | &#x09AB; Pha                 |
|`U+09AC`   | Letter           | CONSONANT         | _null_                     | &#x09AC; Ba                  |
|`U+09AD`   | Letter           | CONSONANT         | _null_                     | &#x09AD; Bha                 |
|`U+09AE`   | Letter           | CONSONANT         | _null_                     | &#x09AE; Ma                  |
|`U+09AF`   | Letter           | CONSONANT         | _null_                     | &#x09AF; Ya                  |
| | | | |																	    
|`U+09B0`   | Letter           | CONSONANT         | _null_                     | &#x09B0; Ra                  |
|`U+09B1`   | _unassigned_     |                   |                            |                              |
|`U+09B2`   | Letter           | CONSONANT         | _null_                     | &#x09B2; La                  |
|`U+09B3`   | _unassigned_     |                   |                            |                              |
|`U+09B4`   | _unassigned_     |                   |                            |                              |
|`U+09B5`   | _unassigned_     |                   |                            |                              |
|`U+09B6`   | Letter           | CONSONANT         | _null_                     | &#x09B6; Sha                 |
|`U+09B7`   | Letter           | CONSONANT         | _null_                     | &#x09B7; Ssa                 |
|`U+09B8`   | Letter           | CONSONANT         | _null_                     | &#x09B8; Sa                  |
|`U+09B9`   | Letter           | CONSONANT         | _null_                     | &#x09B9; Ha                  |
|`U+09BA`   | _unassigned_     |                   |                            |                              |
|`U+09BB`   | _unassigned_     |                   |                            |                              |
|`U+09BC`   | Mark [Mn]        | NUKTA             | BOTTOM_POSITION            | &#x09BC; Nukta               |
|`U+09BD`   | Letter           | AVAGRAHA          | _null_                     | &#x09BD; Avagraha            |
|`U+09BE`   | Mark [Mc]        | VOWEL_DEPENDENT   | RIGHT_POSITION             | &#x09BE; Sign Aa             |
|`U+09BF`   | Mark [Mc]        | VOWEL_DEPENDENT   | LEFT_POSITION              | &#x09BF; Sign I              |
| | | | |																	   
|`U+09C0`   | Mark [Mc]        | VOWEL_DEPENDENT   | RIGHT_POSITION             | &#x09C0; Sign Ii             |
|`U+09C1`   | Mark [Mn]        | VOWEL_DEPENDENT   | BOTTOM_POSITION            | &#x09C1; Sign U              |
|`U+09C2`   | Mark [Mn]        | VOWEL_DEPENDENT   | BOTTOM_POSITION            | &#x09C2; Sign Uu             |
|`U+09C3`   | Mark [Mn]        | VOWEL_DEPENDENT   | BOTTOM_POSITION            | &#x09C3; Sign Vocalic R      |
|`U+09C4`   | Mark [Mn]        | VOWEL_DEPENDENT   | BOTTOM_POSITION            | &#x09C4; Sign Vocalic Rr     |
|`U+09C5`   | _unassigned_     |                   |                            |                              |
|`U+09C6`   | _unassigned_     |                   |                            |                              |
|`U+09C7`   | Mark [Mc]        | VOWEL_DEPENDENT   | LEFT_POSITION              | &#x09C7; Sign E              |
|`U+09C8`   | Mark [Mc]        | VOWEL_DEPENDENT   | LEFT_POSITION              | &#x09C8; Sign Ai             |
|`U+09C9`   | _unassigned_     |                   |                            |                              |
|`U+09CA`   | _unassigned_     |                   |                            |                              |
|`U+09CB`   | Mark [Mc]        | VOWEL_DEPENDENT   | LEFT_AND_RIGHT_POSITION    | &#x09CB; Sign O              |
|`U+09CC`   | Mark [Mc]        | VOWEL_DEPENDENT   | LEFT_AND_RIGHT_POSITION    | &#x09CC; Sign Au             |
|`U+09CD`   | Mark [Mn]        | VIRAMA            | BOTTOM_POSITION            | &#x09CD; Virama              |
|`U+09CE`   | Letter           | CONSONANT_DEAD    | _null_                     | &#x09CE; Khanda Ta           |
|`U+09CF`   | _unassigned_     |                   |                            |                              |
| | | | |																	   
|`U+09D0`   | _unassigned_     |                   |                            |                              |
|`U+09D1`   | _unassigned_     |                   |                            |                              |
|`U+09D2`   | _unassigned_     |                   |                            |                              |
|`U+09D3`   | _unassigned_     |                   |                            |                              |
|`U+09D4`   | _unassigned_     |                   |                            |                              |
|`U+09D5`   | _unassigned_     |                   |                            |                              |
|`U+09D6`   | _unassigned_     |                   |                            |                              |
|`U+09D7`   | Mark [Mc]        | VOWEL_DEPENDENT   | RIGHT_POSITION             | &#x09D7; Au Length Mark      |
|`U+09D8`   | _unassigned_     |                   |                            |                              |
|`U+09D9`   | _unassigned_     |                   |                            |                              |
|`U+09DA`   | _unassigned_     |                   |                            |                              |
|`U+09DB`   | _unassigned_     |                   |                            |                              |
|`U+09DC`   | Letter           | CONSONANT         | _null_                     | &#x09DC; Rra                 |
|`U+09DD`   | Letter           | CONSONANT         | _null_                     | &#x09DD; Rha                 |
|`U+09DE`   | _unassigned_     |                   |                            |                              |
|`U+09DF`   | Letter           | CONSONANT         | _null_                     | &#x09DF; Yya                 |
| | | | |																	   
|`U+09E0`   | Letter           | VOWEL_INDEPENDENT | _null_                     | &#x09E0; Vocalic Rr          |
|`U+09E1`   | Letter           | VOWEL_INDEPENDENT | _null_                     | &#x09E1; Vocalic Ll          |
|`U+09E2`   | Mark [Mn]        | VOWEL_DEPENDENT   | BOTTOM_POSITION            | &#x09E2; Sign Vocalic L      |
|`U+09E3`   | Mark [Mn]        | VOWEL_DEPENDENT   | BOTTOM_POSITION            | &#x09E3; Sign Vocalic Ll     |
|`U+09E4`   | _unassigned_     |                   |                            |                              |
|`U+09E5`   | _unassigned_     |                   |                            |                              |
|`U+09E6`   | Number           | NUMBER            | _null_                     | &#x09E6; Digit Zero          |
|`U+09E7`   | Number           | NUMBER            | _null_                     | &#x09E7; Digit One           |
|`U+09E8`   | Number           | NUMBER            | _null_                     | &#x09E8; Digit Two           |
|`U+09E9`   | Number           | NUMBER            | _null_                     | &#x09E9; Digit Three         |
|`U+09EA`   | Number           | NUMBER            | _null_                     | &#x09EA; Digit Four          |
|`U+09EB`   | Number           | NUMBER            | _null_                     | &#x09EB; Digit Five          |
|`U+09EC`   | Number           | NUMBER            | _null_                     | &#x09EC; Digit Six           |
|`U+09ED`   | Number           | NUMBER            | _null_                     | &#x09ED; Digit Seven         |
|`U+09EE`   | Number           | NUMBER            | _null_                     | &#x09EE; Digit Eight         |
|`U+09EF`   | Number           | NUMBER            | _null_                     | &#x09EF; Digit Nine          |
| | | | |
|`U+09F0`   | Letter           | CONSONANT         | _null_                     | &#x09F0; Assamese Ra         |
|`U+09F1`   | Letter           | CONSONANT         | _null_                     | &#x09F1; Assamese Wa         |
|`U+09F2`   | Symbol           | SYMBOL            | _null_                     | &#x09F2; Rupee Mark          |
|`U+09F3`   | Symbol           | SYMBOL            | _null_                     | &#x09F3; Rupee Sign          |
|`U+09F4`   | Number           | NUMBER            | _null_                     | &#x09F4; Numerator One       |
|`U+09F5`   | Number           | NUMBER            | _null_                     | &#x09F5; Numerator Two       |
|`U+09F6`   | Number           | NUMBER            | _null_                     | &#x09F6; Numerator Three     |
|`U+09F7`   | Number           | NUMBER            | _null_                     | &#x09F7; Numerator Four      |
|`U+09F8`   | Number           | NUMBER            | _null_                     | &#x09F8; Numerator One Less Than Denominator |
|`U+09F9`   | Number           | NUMBER            | _null_                     | &#x09F9; Denominator Sixteen |
|`U+09FA`   | Symbol           | SYMBOL            | _null_                     | &#x09FA; Isshar              |
|`U+09FB`   | Symbol           | SYMBOL            | _null_                     | &#x09FB; Ganda Mark          |
|`U+09FC`   | Letter           | _null_            | _null_                     | &#x09FC; Vedic Anusvara      |
|`U+09FD`   | Punctuation      | _null_            | _null_                     | &#x09FD; Abbreviation Sign   |
|`U+09FE`   | Mark [Mn]        | SYLLABLE_MODIFIER | TOP_POSITION               | &#x09FE; Sandhi Mark         |
|`U+09FF`   | _unassigned_     |                   |                            |                              |
:::


## Vedic Extensions character table ##

Sanskrit runs written in the Bengali script may also include
characters from the Vedic Extensions block. These characters should be
classified as follows.

> Note: See the [Vedic Extensions](../opentype-shaping-vedic-extensions.md) 
> document for additional information.


:::{table} Vedic Extensions character table

| Codepoint | Unicode category | Shaping class     | Mark-placement subclass    | Glyph                        |
|:----------|:-----------------|:------------------|:---------------------------|:-----------------------------|
|`U+1CD0`   | Mark [Mn]        | CANTILLATION      | TOP_POSITION               | &#x1CD0; Tone Karshana       |
|`U+1CD1`   | Mark [Mn]        | CANTILLATION      | TOP_POSITION               | &#x1CD1; Tone Shara          |
|`U+1CD2`   | Mark [Mn]        | CANTILLATION      | TOP_POSITION               | &#x1CD2; Tone Prenkha        |
|`U+1CD3`   | Punctuation      | _null_            | _null_                     | &#x1CD3; Sign Nihshvasa      |
|`U+1CD4`   | Mark [Mn]        | CANTILLATION      | OVERSTRUCK                 | &#x1CD4; Tone Midline Svarita |
|`U+1CD5`   | Mark [Mn]        | CANTILLATION      | BOTTOM_POSITION            | &#x1CD5; Tone Aggravated Independent Svarita |
|`U+1CD6`   | Mark [Mn]        | CANTILLATION      | BOTTOM_POSITION            | &#x1CD6; Tone Independent Svarita |
|`U+1CD7`   | Mark [Mn]        | CANTILLATION      | BOTTOM_POSITION            | &#x1CD7; Tone Kathaka Independent Svarita |
|`U+1CD8`   | Mark [Mn]        | CANTILLATION      | BOTTOM_POSITION            | &#x1CD8; Tone Candra Below   |
|`U+1CD9`   | Mark [Mn]        | CANTILLATION      | BOTTOM_POSITION            | &#x1CD9; Tone Kathaka Independent Svarita Schroeder |
|`U+1CDA`   | Mark [Mn]        | CANTILLATION      | TOP_POSITION               | &#x1CDA; Tone Double Svarita |
|`U+1CDB`   | Mark [Mn]        | CANTILLATION      | TOP_POSITION               | &#x1CDB; Tone Triple Svarita |
|`U+1CDC`   | Mark [Mn]        | CANTILLATION      | BOTTOM_POSITION            | &#x1CDC; Tone Kathaka Anudatta |
|`U+1CDD`   | Mark [Mn]        | CANTILLATION      | BOTTOM_POSITION            | &#x1CDD; Tone Dot Below      |
|`U+1CDE`   | Mark [Mn]        | CANTILLATION      | BOTTOM_POSITION            | &#x1CDE; Tone Two Dots Below |
|`U+1CDF`   | Mark [Mn]        | CANTILLATION      | BOTTOM_POSITION            | &#x1CDF; Tone Three Dots Below |
| | | | |																		
|`U+1CE0`   | Mark [Mn]        | CANTILLATION      | TOP_POSITION               | &#x1CE0; Tone Rigvedic Kashmiri Independent Svarita |
|`U+1CE1`   | Mark [Mc]        | CANTILLATION      | RIGHT_POSITION             | &#x1CE1; Tone Atharavedic Independent Svarita |
|`U+1CE2`   | Mark [Mn]        | AVAGRAHA          | OVERSTRUCK                 | &#x1CE2; Sign Visarga Svarita |
|`U+1CE3`   | Mark [Mn]        | _null_            | OVERSTRUCK                 | &#x1CE3; Sign Visarga Udatta |
|`U+1CE4`   | Mark [Mn]        | _null_            | OVERSTRUCK                 | &#x1CE4; Sign Reversed Visarga Udatta |
|`U+1CE5`   | Mark [Mn]        | _null_            | OVERSTRUCK                 | &#x1CE5; Sign Visarga Anudatta |
|`U+1CE6`   | Mark [Mn]        | _null_            | OVERSTRUCK                 | &#x1CE6; Sign Reversed Visarga Anudatta |
|`U+1CE7`   | Mark [Mn]        | _null_            | OVERSTRUCK                 | &#x1CE7; Sign Visarga Udatta With Tail |
|`U+1CE8`   | Mark [Mn]        | AVAGRAHA          | OVERSTRUCK                 | &#x1CE8; Sign Visarga Anudatta With Tail |
|`U+1CE9`   | Letter           | SYMBOL            | _null_                     | &#x1CE9; Sign Anusvara Antargomukha |
|`U+1CEA`   | Letter           | _null_            | _null_                     | &#x1CEA; Sign Anusvara Bahirgomukha |
|`U+1CEB`   | Letter           | _null_            | _null_                     | &#x1CEB; Sign Anusvara Vamagomukha |
|`U+1CEC`   | Letter           | SYMBOL            | _null_                     | &#x1CEC; Sign Anusvara Vamagomukha With Tail |
|`U+1CED`   | Mark [Mn]        | AVAGRAHA          | BOTTOM_POSITION            | &#x1CED; Sign Tiryak         |
|`U+1CEE`   | Letter           | SYMBOL            | _null_                     | &#x1CEE; Sign Hexiform Long Anusvara |
|`U+1CEF`   | Letter           | _null_            | _null_                     | &#x1CEF; Sign Long Anusvara  |
| | | | |																		
|`U+1CF0`   | Letter           | _null_            | _null_                     | &#x1CF0; Sign Rthang Long Anusvara |
|`U+1CF2`   | Letter           | CONSONANT_DEAD    | _null_                     | &#x1CF2; Sign Ardhavisarga   |
|`U+1CF3`   | Letter           | CONSONANT_DEAD    | _null_                     | &#x1CF3; Sign Rotated Ardhavisarga |
|`U+1CF3`   | Mark [Mc]        | VISARGA           | _null_                     | &#x1CF3; Sign Rotated Ardhavisarga |
|`U+1CF4`   | Mark [Mn]        | CANTILLATION      | TOP_POSITION               | &#x1CF4; Tone Candra Above   |
|`U+1CF5`   | Letter           | CONSONANT_WITH_STACKER | _null_                | &#x1CF5; Sign Jihvamuliya    |
|`U+1CF6`   | Letter           | CONSONANT_WITH_STACKER | _null_                | &#x1CF6; Sign Upadhmaniya    |
|`U+1CF7`   | Mark [Mc]        | _null_            | _null_                     | &#x1CF7; Sign Atikrama       |
|`U+1CF8`   | Mark [Mn]        | CANTILLATION      | _null_                     | &#x1CF8; Tone Ring Above     |
|`U+1CF9`   | Mark [Mn]        | CANTILLATION      | _null_                     | &#x1CF9; Tone Double Ring Above |
|`U+1CFA`   | Letter           | PLACEHOLDER       | _null_                     | &#x1CFA; Sign Double Anusvara Antargomukha |
|`U+1CFB`   | _unassigned_     |                   |                            |                              |
|`U+1CFC`   | _unassigned_     |                   |                            |                              |
|`U+1CFD`   | _unassigned_     |                   |                            |                              |
|`U+1CFE`   | _unassigned_     |                   |                            |                              |
|`U+1CFF`   | _unassigned_     |                   |                            |                              |
:::


## Miscellaneous character table ##

In addition to general punctuation, runs of Bengali text often use the
danda (`U+0964`) and double danda (`U+0965`) punctuation marks from
the Devanagari block. Bengali text can also incorporate the udatta
(`U+0951`) and anudatta (`U+0952`) signs from the Devanagari block.


:::{table} Additional punctuation character table

| Codepoint | Unicode category | Shaping class     | Mark-placement subclass    | Glyph                          |
|:----------|:-----------------|:------------------|:---------------------------|:-------------------------------|
|`U+0951`   | Mark [Mn]        | CANTILLATION      | TOP_POSITION               | &#x0951; Udatta              |
|`U+0952`   | Mark [Mn]        | CANTILLATION      | BOTTOM_POSITION            | &#x0952; Anudatta            |
|`U+0964`   | Punctuation      | _null_            | _null_                     | &#x0964; Danda               |
|`U+0965`   | Punctuation      | _null_            | _null_                     | &#x0965; Double Danda        |
:::


Other important characters that may be encountered when shaping runs
of Bengali text include the dotted-circle placeholder (`U+25CC`), the
zero-width joiner (`U+200D`) and zero-width non-joiner (`U+200C`), and
the no-break space (`U+00A0`).

The dotted-circle placeholder is frequently used when displaying a
dependent vowel (matra) or a combining mark in isolation. Real-world
text syllables may also use other characters, such as hyphens or dashes,
in a similar placeholder fashion; shaping engines should cope with
this situation gracefully.

:::{table} Miscellaneous character table

| Codepoint | Unicode category | Shaping class     | Mark-placement subclass    | Glyph                          |
|:----------|:-----------------|:------------------|:---------------------------|:-------------------------------|
|`U+00A0`   | Separator        | PLACEHOLDER       | _null_                     | &#x00A0; No-break space        |
|`U+200C`   | Other            | NON_JOINER        | _null_                     | &#x200C; Zero-width non-joiner |
|`U+200D`   | Other            | JOINER            | _null_                     | &#x200D; Zero-width joiner     |
|`U+2010`   | Punctuation      | PLACEHOLDER       | _null_                     | &#x2010; Hyphen                |
|`U+2011`   | Punctuation      | PLACEHOLDER       | _null_                     | &#x2011; No-break hyphen       |
|`U+2012`   | Punctuation      | PLACEHOLDER       | _null_                     | &#x2012; Figure dash           |
|`U+2013`   | Punctuation      | PLACEHOLDER       | _null_                     | &#x2013; En dash               |
|`U+2014`   | Punctuation      | PLACEHOLDER       | _null_                     | &#x2014; Em dash               |
|`U+25CC`   | Symbol           | DOTTED_CIRCLE     | _null_                     | &#x25CC; Dotted circle         |
:::

The zero-width joiner (<abbr>ZWJ</abbr>) is primarily used to prevent the formation
of a conjunct from a "_Consonant_,Halant,_Consonant_" sequence. The
sequence "_Consonant_,Halant,ZWJ,_Consonant_" blocks the formation of
a conjunct between the two consonants. 

Note, however, that the "_Consonant_,Halant" subsequence in the above
example may still trigger a half-forms feature. To prevent the
application of the half-forms feature in addition to preventing the
conjunct, the zero-width non-joiner (<abbr>ZWNJ</abbr>) must be used instead. The
sequence "_Consonant_,Halant,ZWNJ,_Consonant_" should produce the
first consonant in its standard form, followed by an explicit
"Halant".

A secondary usage of the zero-width joiner is to prevent the formation of
"Reph". An initial "Ra,Halant,ZWJ" sequence should not produce a "Reph",
where an initial "Ra,Halant" sequence without the zero-width joiner
otherwise would.

The no-break space (<abbr>NBSP</abbr>) is primarily used to display those
codepoints that are defined as non-spacing (marks, dependent vowels
(matras), below-base consonant forms, and post-base consonant forms)
in an isolated context, as an alternative to displaying them
superimposed on the dotted-circle placeholder. These sequences will
match "NBSP,ZWJ,Halant,_Consonant_", "NBSP,_mark_", or "NBSP,_matra_".


================================================
FILE: character-tables/character-tables-devanagari.md
================================================
# Devanagari character tables #

This document lists the per-character shaping information needed to
[shape Devanagari text](../opentype-shaping-devanagari.md).

**Contents**

  - [Devanagari character table](#devanagari-character-table)
  - [Devanagari Extended character table](#devanagari-extended-character-table)
  - [Vedic Extensions character table](#vedic-extensions-character-table)
  - [Miscellaneous character table](#miscellaneous-character-table)
	  

## Devanagari character table ##

Devanagari glyphs should be classified as in the following
table. Codepoints in the Devanagari block with no assigned meaning are
designated as _unassigned_ in the _Unicode category_ column. 

Assigned codepoints with a _null_ in the _Shaping class_
column evoke no special behavior from the shaping engine. Note that
this does include some valid codepoints, such as currency marks,
punctuation, and other symbols.

> Note: the `NUMBER` and `SYMBOL` _Shaping classes_ are important
> during syllable identification, but generally evoke no further
> special behavior during the rest of the shaping process. 

The _Mark-placement subclass_ column indicates mark-placement
positioning for codepoints in the _Mark_ category. Assigned, non-mark
codepoints have a _null_ in this column and evoke no special
mark-placement behavior. Marks tagged with [Mn] in the _Unicode
category_ column are categorized as non-spacing; marks tagged with
[Mc] are categorized as spacing-combining.

Some codepoints in the following table use a _Shaping class_ that
differs from the codepoint's Unicode _General Category_. The _Shaping
class_ takes precedence during OpenType shaping, as it captures more
specific, script-aware behavior.


:::{table} Devanagari character table

| Codepoint | Unicode category | Shaping class     | Mark-placement subclass    | Glyph                        |
|:----------|:-----------------|:------------------|:---------------------------|:-----------------------------|
|`U+0900`   | Mark [Mn]        | BINDU             | TOP_POSITION               | &#x0900; Inverted Candrabindu|
|`U+0901`   | Mark [Mn]        | BINDU             | TOP_POSITION               | &#x0901; Candrabindu         |
|`U+0902`   | Mark [Mn]        | BINDU             | TOP_POSITION               | &#x0902; Anusvara            |
|`U+0903`   | Mark [Mc]        | VISARGA           | RIGHT_POSITION             | &#x0903; Visarga             |
|`U+0904`   | Letter           | VOWEL_INDEPENDENT | _null_                     | &#x0904; Short A             |
|`U+0905`   | Letter           | VOWEL_INDEPENDENT | _null_                     | &#x0905; A                   |
|`U+0906`   | Letter           | VOWEL_INDEPENDENT | _null_                     | &#x0906; Aa                  |
|`U+0907`   | Letter           | VOWEL_INDEPENDENT | _null_                     | &#x0907; I                   |
|`U+0908`   | Letter           | VOWEL_INDEPENDENT | _null_                     | &#x0908; Ii                  |
|`U+0909`   | Letter           | VOWEL_INDEPENDENT | _null_                     | &#x0909; U                   |
|`U+090A`   | Letter           | VOWEL_INDEPENDENT | _null_                     | &#x090A; Uu                  |
|`U+090B`   | Letter           | VOWEL_INDEPENDENT | _null_                     | &#x090B; Vocalic R           |
|`U+090C`   | Letter           | VOWEL_INDEPENDENT | _null_                     | &#x090C; Vocalic L           |
|`U+090D`   | Letter           | VOWEL_INDEPENDENT | _null_                     | &#x090D; Candra E            |
|`U+090E`   | Letter           | VOWEL_INDEPENDENT | _null_                     | &#x090E; Short E             |
|`U+090F`   | Letter           | VOWEL_INDEPENDENT | _null_                     | &#x090F; E                   |
| | | | |
|`U+0910`   | Letter           | VOWEL_INDEPENDENT | _null_                     | &#x0910; Ai                  |
|`U+0911`   | Letter           | VOWEL_INDEPENDENT | _null_                     | &#x0911; Candra O            |
|`U+0912`   | Letter           | VOWEL_INDEPENDENT | _null_                     | &#x0912; Short O             |
|`U+0913`   | Letter           | VOWEL_INDEPENDENT | _null_                     | &#x0913; O                   |
|`U+0914`   | Letter           | VOWEL_INDEPENDENT | _null_                     | &#x0914; Au                  |
|`U+0915`   | Letter           | CONSONANT         | _null_                     | &#x0915; Ka                  |
|`U+0916`   | Letter           | CONSONANT         | _null_                     | &#x0916; Kha                 |
|`U+0917`   | Letter           | CONSONANT         | _null_                     | &#x0917; Ga                  |
|`U+0918`   | Letter           | CONSONANT         | _null_                     | &#x0918; Gha                 |
|`U+0919`   | Letter           | CONSONANT         | _null_                     | &#x0919; Nga                 |
|`U+091A`   | Letter           | CONSONANT         | _null_                     | &#x091A; Ca                  |
|`U+091B`   | Letter           | CONSONANT         | _null_                     | &#x091B; Cha                 |
|`U+091C`   | Letter           | CONSONANT         | _null_                     | &#x091C; Ja                  |
|`U+091D`   | Letter           | CONSONANT         | _null_                     | &#x091D; Jha                 |
|`U+091E`   | Letter           | CONSONANT         | _null_                     | &#x091E; Nya                 |
|`U+091F`   | Letter           | CONSONANT         | _null_                     | &#x091F; Tta                 |
| | | | |
|`U+0920`   | Letter           | CONSONANT         | _null_                     | &#x0920; Ttha                |
|`U+0921`   | Letter           | CONSONANT         | _null_                     | &#x0921; Dda                 |
|`U+0922`   | Letter           | CONSONANT         | _null_                     | &#x0922; Ddha                |
|`U+0923`   | Letter           | CONSONANT         | _null_                     | &#x0923; Nna                 |
|`U+0924`   | Letter           | CONSONANT         | _null_                     | &#x0924; Ta                  |
|`U+0925`   | Letter           | CONSONANT         | _null_                     | &#x0925; Tha                 |
|`U+0926`   | Letter           | CONSONANT         | _null_                     | &#x0926; Da                  |
|`U+0927`   | Letter           | CONSONANT         | _null_                     | &#x0927; Dha                 |
|`U+0928`   | Letter           | CONSONANT         | _null_                     | &#x0928; Na                  |
|`U+0929`   | Letter           | CONSONANT         | _null_                     | &#x0929; Nnna                |
|`U+092A`   | Letter           | CONSONANT         | _null_                     | &#x092A; Pa                  |
|`U+092B`   | Letter           | CONSONANT         | _null_                     | &#x092B; Pha                 |
|`U+092C`   | Letter           | CONSONANT         | _null_                     | &#x092C; Ba                  |
|`U+092D`   | Letter           | CONSONANT         | _null_                     | &#x092D; Bha                 |
|`U+092E`   | Letter           | CONSONANT         | _null_                     | &#x092E; Ma                  |
|`U+092F`   | Letter           | CONSONANT         | _null_                     | &#x092F; Ya                  |
| | | | |
|`U+0930`   | Letter           | CONSONANT         | _null_                     | &#x0930; Ra                  |
|`U+0931`   | Letter           | CONSONANT         | _null_                     | &#x0931; Rra                 |
|`U+0932`   | Letter           | CONSONANT         | _null_                     | &#x0932; La                  |
|`U+0933`   | Letter           | CONSONANT         | _null_                     | &#x0933; Lla                 |
|`U+0934`   | Letter           | CONSONANT         | _null_                     | &#x0934; Llla                |
|`U+0935`   | Letter           | CONSONANT         | _null_                     | &#x0935; Va                  |
|`U+0936`   | Letter           | CONSONANT         | _null_                     | &#x0936; Sha                 |
|`U+0937`   | Letter           | CONSONANT         | _null_                     | &#x0937; Ssa                 |
|`U+0938`   | Letter           | CONSONANT         | _null_                     | &#x0938; Sa                  |
|`U+0939`   | Letter           | CONSONANT         | _null_                     | &#x0939; Ha                  |
|`U+093A`   | Mark [Mn]        | VOWEL_DEPENDENT   | TOP_POSITION               | &#x093A; Sign Oe             |
|`U+093B`   | Mark [Mc]        | VOWEL_DEPENDENT   | RIGHT_POSITION             | &#x093B; Sign Ooe            |
|`U+093C`   | Mark [Mn]        | NUKTA             | BOTTOM_POSITION            | &#x093C; Nukta               |
|`U+093D`   | Letter           | AVAGRAHA          | _null_                     | &#x093D; Avagraha            |
|`U+093E`   | Mark [Mc]        | VOWEL_DEPENDENT   | RIGHT_POSITION             | &#x093E; Sign Aa             |
|`U+093F`   | Mark [Mc]        | VOWEL_DEPENDENT   | LEFT_POSITION              | &#x093F; Sign I              |
| | | | |
|`U+0940`   | Mark [Mc]        | VOWEL_DEPENDENT   | RIGHT_POSITION             | &#x0940; Sign Ii             |
|`U+0941`   | Mark [Mn]        | VOWEL_DEPENDENT   | BOTTOM_POSITION            | &#x0941; Sign U              |
|`U+0942`   | Mark [Mn]        | VOWEL_DEPENDENT   | BOTTOM_POSITION            | &#x0942; Sign Uu             |
|`U+0943`   | Mark [Mn]        | VOWEL_DEPENDENT   | BOTTOM_POSITION            | &#x0943; Sign Vocalic R      |
|`U+0944`   | Mark [Mn]        | VOWEL_DEPENDENT   | BOTTOM_POSITION            | &#x0944; Sign Vocalic Rr     |
|`U+0945`   | Mark [Mn]        | VOWEL_DEPENDENT   | TOP_POSITION               | &#x0945; Sign Candra E       |
|`U+0946`   | Mark [Mn]        | VOWEL_DEPENDENT   | TOP_POSITION               | &#x0946; Sign Short E        |
|`U+0947`   | Mark [Mn]        | VOWEL_DEPENDENT   | TOP_POSITION               | &#x0947; Sign E              |
|`U+0948`   | Mark [Mn]        | VOWEL_DEPENDENT   | TOP_POSITION               | &#x0948; Sign Ai             |
|`U+0949`   | Mark [Mc]        | VOWEL_DEPENDENT   | RIGHT_POSITION             | &#x0949; Sign Candra O       |
|`U+094A`   | Mark [Mc]        | VOWEL_DEPENDENT   | RIGHT_POSITION             | &#x094A; Sign Short O        |
|`U+094B`   | Mark [Mc]        | VOWEL_DEPENDENT   | RIGHT_POSITION             | &#x094B; Sign O              |
|`U+094C`   | Mark [Mc]        | VOWEL_DEPENDENT   | RIGHT_POSITION             | &#x094C; Sign Au             |
|`U+094D`   | Mark [Mn]        | VIRAMA            | BOTTOM_POSITION            | &#x094D; Virama              |
|`U+094E`   | Mark [Mc]        | VOWEL_DEPENDENT   | LEFT_POSITION              | &#x094E; Sign Prishthamatra E|
|`U+094F`   | Mark [Mc]        | VOWEL_DEPENDENT   | RIGHT_POSITION             | &#x094F; Sign Aw             |
| | | | |
|`U+0950`   | Mark [Mc]        | _null_            | _null_                     | &#x0950; Om                  |
|`U+0951`   | Mark [Mn]        | CANTILLATION      | TOP_POSITION               | &#x0951; Udatta              |
|`U+0952`   | Mark [Mn]        | CANTILLATION      | BOTTOM_POSITION            | &#x0952; Anudatta            |
|`U+0953`   | Mark [Mn]        | SYLLABLE_MODIFIER | TOP_POSITION               | &#x0953; Grave accent        |
|`U+0954`   | Mark [Mn]        | SYLLABLE_MODIFIER | TOP_POSITION               | &#x0954; Acute accent        |
|`U+0955`   | Mark [Mn]        | VOWEL_DEPENDENT   | TOP_POSITION               | &#x0955; Sign Candra Long E  |
|`U+0956`   | Mark [Mn]        | VOWEL_DEPENDENT   | BOTTOM_POSITION            | &#x0956; Sign Ue             |
|`U+0957`   | Mark [Mn]        | VOWEL_DEPENDENT   | BOTTOM_POSITION            | &#x0957; Sign Uue            |
|`U+0958`   | Letter           | CONSONANT         | _null_                     | &#x0958; Qa                  |
|`U+0959`   | Letter           | CONSONANT         | _null_                     | &#x0959; Khha                |
|`U+095A`   | Letter           | CONSONANT         | _null_                     | &#x095A; Ghha                |
|`U+095B`   | Letter           | CONSONANT         | _null_                     | &#x095B; Za                  |
|`U+095C`   | Letter           | CONSONANT         | _null_                     | &#x095C; Dddha               |
|`U+095D`   | Letter           | CONSONANT         | _null_                     | &#x095D; Rha                 |
|`U+095E`   | Letter           | CONSONANT         | _null_                     | &#x095E; Fa                  |
|`U+095F`   | Letter           | CONSONANT         | _null_                     | &#x095F; Yya                 |
| | | | |
|`U+0960`   | Letter           | VOWEL_INDEPENDENT | _null_                     | &#x0960; Vocalic Rr          |
|`U+0961`   | Letter           | VOWEL_INDEPENDENT | _null_                     | &#x0961; Vocalic Ll          |
|`U+0962`   | Mark [Mn]        | VOWEL_DEPENDENT   | BOTTOM_POSITION            | &#x0962; Sign Vocalic L      |
|`U+0963`   | Mark [Mn]        | VOWEL_DEPENDENT   | BOTTOM_POSITION            | &#x0963; Sign Vocalic Ll     |
|`U+0964`   | Punctuation      | _null_            | _null_                     | &#x0964; Danda               |
|`U+0965`   | Punctuation      | _null_            | _null_                     | &#x0965; Double Danda        |
|`U+0966`   | Number           | NUMBER            | _null_                     | &#x0966; Digit Zero          |
|`U+0967`   | Number           | NUMBER            | _null_                     | &#x0967; Digit One           |
|`U+0968`   | Number           | NUMBER            | _null_                     | &#x0968; Digit Two           |
|`U+0969`   | Number           | NUMBER            | _null_                     | &#x0969; Digit Three         |
|`U+096A`   | Number           | NUMBER            | _null_                     | &#x096A; Digit Four          |
|`U+096B`   | Number           | NUMBER            | _null_                     | &#x096B; Digit Five          |
|`U+096C`   | Number           | NUMBER            | _null_                     | &#x096C; Digit Six           |
|`U+096D`   | Number           | NUMBER            | _null_                     | &#x096D; Digit Seven         |
|`U+096E`   | Number           | NUMBER            | _null_                     | &#x096E; Digit Eight         |
|`U+096F`   | Number           | NUMBER            | _null_                     | &#x096F; Digit Nine          |
| | | | |
|`U+0970`   | Punctuation      | _null_            | _null_                     | &#x0970; Abbreviation Sign   |
|`U+0971`   | Punctuation      | _null_            | _null_                     | &#x0971; Sign High Spacing Dot|
|`U+0972`   | Letter           | VOWEL_INDEPENDENT | _null_                     | &#x0972; Candra Aa           |
|`U+0973`   | Letter           | VOWEL_INDEPENDENT | _null_                     | &#x0973; Oe                  |
|`U+0974`   | Letter           | VOWEL_INDEPENDENT | _null_                     | &#x0974; Ooe                 |
|`U+0975`   | Letter           | VOWEL_INDEPENDENT | _null_                     | &#x0975; Aw                  |
|`U+0976`   | Letter           | VOWEL_INDEPENDENT | _null_                     | &#x0976; Ue                  |
|`U+0977`   | Letter           | VOWEL_INDEPENDENT | _null_                     | &#x0977; Uue                 |
|`U+0978`   | Letter           | CONSONANT         | _null_                     | &#x0978; Marwari Dda         |
|`U+0979`   | Letter           | CONSONANT         | _null_                     | &#x0979; Zha                 |
|`U+097A`   | Letter           | CONSONANT         | _null_                     | &#x097A; Heavy Ya            |
|`U+097B`   | Letter           | CONSONANT         | _null_                     | &#x097B; Gga                 |
|`U+097C`   | Letter           | CONSONANT         | _null_                     | &#x097C; Jja                 |
|`U+097D`   | Letter           | CONSONANT         | _null_                     | &#x097D; Glottal Stop        |
|`U+097E`   | Letter           | CONSONANT         | _null_                     | &#x097E; Ddda                |
|`U+097F`   | Letter           | CONSONANT         | _null_                     | &#x097F; Bba                 |
:::


## Devanagari Extended character table ##

> Note: the cantillation marks of the "combining consonant" variety in
> the Devanagari Extended block are _not_ considered consonants for
> shaping purposes (including syllable identification, the
> determination of the base consonant, or positioning "Reph").


:::{table} Devanagari Extended character table

| Codepoint | Unicode category | Shaping class     | Mark-placement subclass    | Glyph                        |
|:----------|:-----------------|:------------------|:---------------------------|:-----------------------------|
|`U+A8E0`   | Mark [Mn]        | CANTILLATION      | TOP_POSITION               | &#xA8E0; Combining Zero      |
|`U+A8E1`   | Mark [Mn]        | CANTILLATION      | TOP_POSITION               | &#xA8E1; Combining One       |
|`U+A8E2`   | Mark [Mn]        | CANTILLATION      | TOP_POSITION               | &#xA8E2; Combining Two       |
|`U+A8E3`   | Mark [Mn]        | CANTILLATION      | TOP_POSITION               | &#xA8E3; Combining Three     |
|`U+A8E4`   | Mark [Mn]        | CANTILLATION      | TOP_POSITION               | &#xA8E4; Combining Four      |
|`U+A8E5`   | Mark [Mn]        | CANTILLATION      | TOP_POSITION               | &#xA8E5; Combining Five      |
|`U+A8E6`   | Mark [Mn]        | CANTILLATION      | TOP_POSITION               | &#xA8E6; Combining Six       |
|`U+A8E7`   | Mark [Mn]        | CANTILLATION      | TOP_POSITION               | &#xA8E7; Combining Seven     |
|`U+A8E8`   | Mark [Mn]        | CANTILLATION      | TOP_POSITION               | &#xA8E8; Combining Eight     |
|`U+A8E9`   | Mark [Mn]        | CANTILLATION      | TOP_POSITION               | &#xA8E9; Combining Nine      |
|`U+A8EA`   | Mark [Mn]        | CANTILLATION      | TOP_POSITION               | &#xA8EA; Combining A         |
|`U+A8EB`   | Mark [Mn]        | CANTILLATION      | TOP_POSITION               | &#xA8EB; Combining U         |
|`U+A8EC`   | Mark [Mn]        | CANTILLATION      | TOP_POSITION               | &#xA8EC; Combining Ka        |
|`U+A8ED`   | Mark [Mn]        | CANTILLATION      | TOP_POSITION               | &#xA8ED; Combining Na        |
|`U+A8EE`   | Mark [Mn]        | CANTILLATION      | TOP_POSITION               | &#xA8EE; Combining Pa        |
|`U+A8EF`   | Mark [Mn]        | CANTILLATION      | TOP_POSITION               | &#xA8EF; Combining Ra        |
| | | | |
|`U+A8F0`   | Mark [Mn]        | CANTILLATION      | TOP_POSITION               | &#xA8F0; Combining Vi        |
|`U+A8F1`   | Mark [Mn]        | CANTILLATION      | TOP_POSITION               | &#xA8F1; Combining Avagraha  |
|`U+A8F2`   | Letter           | SYMBOL            | _null_                     | &#xA8F2; Spacing Candrabindu |
|`U+A8F3`   | Letter           | BINDU             | _null_                     | &#xA8F3; Candrabindu Virama  |
|`U+A8F4`   | Letter           | _null_            | _null_                     | &#xA8F4; Double Candrabindu Virama|
|`U+A8F5`   | Letter           | _null_            | _null_                     | &#xA8F5; Candrabindu Two     |
|`U+A8F6`   | Letter           | _null_            | _null_                     | &#xA8F6; Candrabindu Three   |
|`U+A8F7`   | Letter           | SYMBOL            | _null_                     | &#xA8F7; Candrabindu Avagraha|
|`U+A8F8`   | Punctuation      | _null_            | _null_                     | &#xA8F8; Pushpika            |
|`U+A8F9`   | Punctuation      | _null_            | _null_                     | &#xA8F9; Gap Filler          |
|`U+A8FA`   | Punctuation      | _null_            | _null_                     | &#xA8FA; Caret               |
|`U+A8FB`   | Letter           | _null_            | _null_                     | &#xA8FB; Headstroke          |
|`U+A8FC`   | Punctuation      | _null_            | _null_                     | &#xA8FC; Siddham             |
|`U+A8FD`   | Letter           | _null_            | _null_                     | &#xA8FD; Jain Om             |
|`U+A8FE`   | Letter           | VOWEL_INDEPENDENT | _null_                     | &#xA8FE; Ay                  |
|`U+A8FF`   | Mark [Mn]        | VOWEL_DEPENDENT   | TOP_POSITION               | &#xA8FF; Sign Ay             |
| | | | |
:::


## Devanagari Extended-A character table ##


:::{table} Devanagari Extended-A character table

| Codepoint | Unicode category | Shaping class | Mark-placement subclass | Glyph                                   |
|:----------|:-----------------|:--------------|:------------------------|:----------------------------------------|
| `U+11B00` | Punctuation      | _null_        | _null_                  | &#x11B00; Head Mark                     |
| `U+11B01` | Punctuation      | _null_        | _null_                  | &#x11B01; Head Mark With Headstroke     |
| `U+11B02` | Punctuation      | _null_        | _null_                  | &#x11B02; Sign Bhale                    |
| `U+11B03` | Punctuation      | _null_        | _null_                  | &#x11B03; Sign Bhale With Hook          |
| `U+11B04` | Punctuation      | _null_        | _null_                  | &#x11B04; Sign Extended Bhale           |
| `U+11B05` | Punctuation      | _null_        | _null_                  | &#x11B05; Sign Extended Bhale With Hook |
| `U+11B06` | Punctuation      | _null_        | _null_                  | &#x11B06; Sign Western Five-like Bhale  |
| `U+11B07` | Punctuation      | _null_        | _null_                  | &#x11B07; Sign Western Nine-like Bhale  |
| `U+11B08` | Punctuation      | _null_        | _null_                  | &#x11B08; Sign Reversed Nine-like Bhale |
| `U+11B09` | Punctuation      | _null_        | _null_                  | &#x11B09; Sign Mindu                    |
| `U+11B0A` | _unassigned_     |               |                         |                                         |
| `U+11B0B` | _unassigned_     |               |                         |                                         |
| `U+11B0C` | _unassigned_     |               |                         |                                         |
| `U+11B0D` | _unassigned_     |               |                         |                                         |
| `U+11B0E` | _unassigned_     |               |                         |                                         |
| `U+11B0F` | _unassigned_     |               |                         |                                         |
|           |                  |               |                         |                                         |
| `U+11B10` | _unassigned_     |               |                         |                                         |
| `U+11B11` | _unassigned_     |               |                         |                                         |
| `U+11B12` | _unassigned_     |               |                         |                                         |
| `U+11B13` | _unassigned_     |               |                         |                                         |
| `U+11B14` | _unassigned_     |               |                         |                                         |
| `U+11B15` | _unassigned_     |               |                         |                                         |
| `U+11B16` | _unassigned_     |               |                         |                                         |
| `U+11B17` | _unassigned_     |               |                         |                                         |
| `U+11B18` | _unassigned_     |               |                         |                                         |
| `U+11B19` | _unassigned_     |               |                         |                                         |
| `U+11B1A` | _unassigned_     |               |                         |                                         |
| `U+11B1B` | _unassigned_     |               |                         |                                         |
| `U+11B1C` | _unassigned_     |               |                         |                                         |
| `U+11B1D` | _unassigned_     |               |                         |                                         |
| `U+11B1E` | _unassigned_     |               |                         |                                         |
| `U+11B1F` | _unassigned_     |               |                         |                                         |
|           |                  |               |                         |                                         |
| `U+11B20` | _unassigned_     |               |                         |                                         |
| `U+11B21` | _unassigned_     |               |                         |                                         |
| `U+11B22` | _unassigned_     |               |                         |                                         |
| `U+11B23` | _unassigned_     |               |                         |                                         |
| `U+11B24` | _unassigned_     |               |                         |                                         |
| `U+11B25` | _unassigned_     |               |                         |                                         |
| `U+11B26` | _unassigned_     |               |                         |                                         |
| `U+11B27` | _unassigned_     |               |                         |                                         |
| `U+11B28` | _unassigned_     |               |                         |                                         |
| `U+11B29` | _unassigned_     |               |                         |                                         |
| `U+11B2A` | _unassigned_     |               |                         |                                         |
| `U+11B2B` | _unassigned_     |               |                         |                                         |
| `U+11B2C` | _unassigned_     |               |                         |                                         |
| `U+11B2D` | _unassigned_     |               |                         |                                         |
| `U+11B2E` | _unassigned_     |               |                         |                                         |
| `U+11B2F` | _unassigned_     |               |                         |                                         |
|           |                  |               |                         |                                         |
| `U+11B30` | _unassigned_     |               |                         |                                         |
| `U+11B31` | _unassigned_     |               |                         |                                         |
| `U+11B32` | _unassigned_     |               |                         |                                         |
| `U+11B33` | _unassigned_     |               |                         |                                         |
| `U+11B34` | _unassigned_     |               |                         |                                         |
| `U+11B35` | _unassigned_     |               |                         |                                         |
| `U+11B36` | _unassigned_     |               |                         |                                         |
| `U+11B37` | _unassigned_     |               |                         |                                         |
| `U+11B38` | _unassigned_     |               |                         |                                         |
| `U+11B39` | _unassigned_     |               |                         |                                         |
| `U+11B3A` | _unassigned_     |               |                         |                                         |
| `U+11B3B` | _unassigned_     |               |                         |                                         |
| `U+11B3C` | _unassigned_     |               |                         |                                         |
| `U+11B3D` | _unassigned_     |               |                         |                                         |
| `U+11B3E` | _unassigned_     |               |                         |                                         |
| `U+11B3F` | _unassigned_     |               |                         |                                         |
|           |                  |               |                         |                                         |
| `U+11B40` | _unassigned_     |               |                         |                                         |
| `U+11B41` | _unassigned_     |               |                         |                                         |
| `U+11B42` | _unassigned_     |               |                         |                                         |
| `U+11B43` | _unassigned_     |               |                         |                                         |
| `U+11B44` | _unassigned_     |               |                         |                                         |
| `U+11B45` | _unassigned_     |               |                         |                                         |
| `U+11B46` | _unassigned_     |               |                         |                                         |
| `U+11B47` | _unassigned_     |               |                         |                                         |
| `U+11B48` | _unassigned_     |               |                         |                                         |
| `U+11B49` | _unassigned_     |               |                         |                                         |
| `U+11B4A` | _unassigned_     |               |                         |                                         |
| `U+11B4B` | _unassigned_     |               |                         |                                         |
| `U+11B4C` | _unassigned_     |               |                         |                                         |
| `U+11B4D` | _unassigned_     |               |                         |                                         |
| `U+11B4E` | _unassigned_     |               |                         |                                         |
| `U+11B4F` | _unassigned_     |               |                         |                                         |
|           |                  |               |                         |                                         |
| `U+11B50` | _unassigned_     |               |                         |                                         |
| `U+11B51` | _unassigned_     |               |                         |                                         |
| `U+11B52` | _unassigned_     |               |                         |                                         |
| `U+11B53` | _unassigned_     |               |                         |                                         |
| `U+11B54` | _unassigned_     |               |                         |                                         |
| `U+11B55` | _unassigned_     |               |                         |                                         |
| `U+11B56` | _unassigned_     |               |                         |                                         |
| `U+11B57` | _unassigned_     |               |                         |                                         |
| `U+11B58` | _unassigned_     |               |                         |                                         |
| `U+11B59` | _unassigned_     |               |                         |                                         |
| `U+11B5A` | _unassigned_     |               |                         |                                         |
| `U+11B5B` | _unassigned_     |               |                         |                                         |
| `U+11B5C` | _unassigned_     |               |                         |                                         |
| `U+11B5D` | _unassigned_     |               |                         |                                         |
| `U+11B5E` | _unassigned_     |               |                         |                                         |
| `U+11B5F` | _unassigned_     |               |                         |                                         |
|           |                  |               |                         |                                         |
:::


## Vedic Extensions character table ##

Sanskrit runs written in the Devanagari script may also include
characters from the Vedic Extensions block. These characters should be
classified as follows.

> Note: See the [Vedic Extensions](../opentype-shaping-vedic-extensions.md) 
> document for additional information.


:::{table} Vedic Extensions character table

| Codepoint | Unicode category | Shaping class     | Mark-placement subclass    | Glyph                        |
|:----------|:-----------------|:------------------|:---------------------------|:-----------------------------|
|`U+1CD0`   | Mark [Mn]        | CANTILLATION      | TOP_POSITION               | &#x1CD0; Tone Karshana       |
|`U+1CD1`   | Mark [Mn]        | CANTILLATION      | TOP_POSITION               | &#x1CD1; Tone Shara          |
|`U+1CD2`   | Mark [Mn]        | CANTILLATION      | TOP_POSITION               | &#x1CD2; Tone Prenkha        |
|`U+1CD3`   | Punctuation      | _null_            | _null_                     | &#x1CD3; Sign Nihshvasa      |
|`U+1CD4`   | Mark [Mn]        | CANTILLATION      | OVERSTRUCK                 | &#x1CD4; Tone Midline Svarita |
|`U+1CD5`   | Mark [Mn]        | CANTILLATION      | BOTTOM_POSITION            | &#x1CD5; Tone Aggravated Independent Svarita |
|`U+1CD6`   | Mark [Mn]        | CANTILLATION      | BOTTOM_POSITION            | &#x1CD6; Tone Independent Svarita |
|`U+1CD7`   | Mark [Mn]        | CANTILLATION      | BOTTOM_POSITION            | &#x1CD7; Tone Kathaka Independent Svarita |
|`U+1CD8`   | Mark [Mn]        | CANTILLATION      | BOTTOM_POSITION            | &#x1CD8; Tone Candra Below   |
|`U+1CD9`   | Mark [Mn]        | CANTILLATION      | BOTTOM_POSITION            | &#x1CD9; Tone Kathaka Independent Svarita Schroeder |
|`U+1CDA`   | Mark [Mn]        | CANTILLATION      | TOP_POSITION               | &#x1CDA; Tone Double Svarita |
|`U+1CDB`   | Mark [Mn]        | CANTILLATION      | TOP_POSITION               | &#x1CDB; Tone Triple Svarita |
|`U+1CDC`   | Mark [Mn]        | CANTILLATION      | BOTTOM_POSITION            | &#x1CDC; Tone Kathaka Anudatta |
|`U+1CDD`   | Mark [Mn]        | CANTILLATION      | BOTTOM_POSITION            | &#x1CDD; Tone Dot Below      |
|`U+1CDE`   | Mark [Mn]        | CANTILLATION      | BOTTOM_POSITION            | &#x1CDE; Tone Two Dots Below |
|`U+1CDF`   | Mark [Mn]        | CANTILLATION      | BOTTOM_POSITION            | &#x1CDF; Tone Three Dots Below |
| | | | |																		
|`U+1CE0`   | Mark [Mn]        | CANTILLATION      | TOP_POSITION               | &#x1CE0; Tone Rigvedic Kashmiri Independent Svarita |
|`U+1CE1`   | Mark [Mc]        | CANTILLATION      | RIGHT_POSITION             | &#x1CE1; Tone Atharavedic Independent Svarita |
|`U+1CE2`   | Mark [Mn]        | AVAGRAHA          | OVERSTRUCK                 | &#x1CE2; Sign Visarga Svarita |
|`U+1CE3`   | Mark [Mn]        | _null_            | OVERSTRUCK                 | &#x1CE3; Sign Visarga Udatta |
|`U+1CE4`   | Mark [Mn]        | _null_            | OVERSTRUCK                 | &#x1CE4; Sign Reversed Visarga Udatta |
|`U+1CE5`   | Mark [Mn]        | _null_            | OVERSTRUCK                 | &#x1CE5; Sign Visarga Anudatta |
|`U+1CE6`   | Mark [Mn]        | _null_            | OVERSTRUCK                 | &#x1CE6; Sign Reversed Visarga Anudatta |
|`U+1CE7`   | Mark [Mn]        | _null_            | OVERSTRUCK                 | &#x1CE7; Sign Visarga Udatta With Tail |
|`U+1CE8`   | Mark [Mn]        | AVAGRAHA          | OVERSTRUCK                 | &#x1CE8; Sign Visarga Anudatta With Tail |
|`U+1CE9`   | Letter           | SYMBOL            | _null_                     | &#x1CE9; Sign Anusvara Antargomukha |
|`U+1CEA`   | Letter           | _null_            | _null_                     | &#x1CEA; Sign Anusvara Bahirgomukha |
|`U+1CEB`   | Letter           | _null_            | _null_                     | &#x1CEB; Sign Anusvara Vamagomukha |
|`U+1CEC`   | Letter           | SYMBOL            | _null_                     | &#x1CEC; Sign Anusvara Vamagomukha With Tail |
|`U+1CED`   | Mark [Mn]        | AVAGRAHA          | BOTTOM_POSITION            | &#x1CED; Sign Tiryak         |
|`U+1CEE`   | Letter           | SYMBOL            | _null_                     | &#x1CEE; Sign Hexiform Long Anusvara |
|`U+1CEF`   | Letter           | _null_            | _null_                     | &#x1CEF; Sign Long Anusvara  |
| | | | |																		
|`U+1CF0`   | Letter           | _null_            | _null_                     | &#x1CF0; Sign Rthang Long Anusvara |
|`U+1CF2`   | Letter           | CONSONANT_DEAD    | _null_                     | &#x1CF2; Sign Ardhavisarga   |
|`U+1CF3`   | Letter           | CONSONANT_DEAD    | _null_                     | &#x1CF3; Sign Rotated Ardhavisarga |
|`U+1CF3`   | Mark [Mc]        | VISARGA           | _null_                     | &#x1CF3; Sign Rotated Ardhavisarga |
|`U+1CF4`   | Mark [Mn]        | CANTILLATION      | TOP_POSITION               | &#x1CF4; Tone Candra Above   |
|`U+1CF5`   | Letter           | CONSONANT_WITH_STACKER | _null_                | &#x1CF5; Sign Jihvamuliya    |
|`U+1CF6`   | Letter           | CONSONANT_WITH_STACKER | _null_                | &#x1CF6; Sign Upadhmaniya    |
|`U+1CF7`   | Mark [Mc]        | _null_            | _null_                     | &#x1CF7; Sign Atikrama       |
|`U+1CF8`   | Mark [Mn]        | CANTILLATION      | _null_                     | &#x1CF8; Tone Ring Above     |
|`U+1CF9`   | Mark [Mn]        | CANTILLATION      | _null_                     | &#x1CF9; Tone Double Ring Above |
|`U+1CFA`   | Letter           | PLACEHOLDER       | _null_                     | &#x1CFA; Sign Double Anusvara Antargomukha |
|`U+1CFB`   | _unassigned_     |                   |                            |                              |
|`U+1CFC`   | _unassigned_     |                   |                            |                              |
|`U+1CFD`   | _unassigned_     |                   |                            |                              |
|`U+1CFE`   | _unassigned_     |                   |                            |                              |
|`U+1CFF`   | _unassigned_     |                   |                            |                              |
:::


## Miscellaneous character table ##

Other important characters that may be encountered when shaping runs
of Devanagari text include the dotted-circle placeholder (`U+25CC`), the
zero-width joiner (`U+200D`) and zero-width non-joiner (`U+200C`), and
the no-break space (`U+00A0`).

The dotted-circle placeholder is frequently used when displaying a
dependent vowel (matra) or a combining mark in isolation. Real-world
text syllables may also use other characters, such as hyphens or dashes,
in a similar placeholder fashion; shaping engines should cope with
this situation gracefully.


:::{table} Miscellaneous character table

| Codepoint | Unicode category | Shaping class     | Mark-placement subclass    | Glyph                          |
|:----------|:-----------------|:------------------|:---------------------------|:-------------------------------|
|`U+00A0`   | Separator        | PLACEHOLDER       | _null_                     | &#x00A0; No-break space        |
|`U+200C`   | Other            | NON_JOINER        | _null_                     | &#x200C; Zero-width non-joiner |
|`U+200D`   | Other            | JOINER            | _null_                     | &#x200D; Zero-width joiner     |
|`U+2010`   | Punctuation      | PLACEHOLDER       | _null_                     | &#x2010; Hyphen                |
|`U+2011`   | Punctuation      | PLACEHOLDER       | _null_                     | &#x2011; No-break hyphen       |
|`U+2012`   | Punctuation      | PLACEHOLDER       | _null_                     | &#x2012; Figure dash           |
|`U+2013`   | Punctuation      | PLACEHOLDER       | _null_                     | &#x2013; En dash               |
|`U+2014`   | Punctuation      | PLACEHOLDER       | _null_                     | &#x2014; Em dash               |
|`U+25CC`   | Symbol           | DOTTED_CIRCLE     | _null_                     | &#x25CC; Dotted circle         |
:::


The zero-width joiner (<abbr>ZWJ</abbr>) is primarily used to prevent the formation
of a conjunct from a "_Consonant_,Halant,_Consonant_" sequence. The
sequence "_Consonant_,Halant,ZWJ,_Consonant_" blocks the formation of
a conjunct between the two consonants. 

Note, however, that the "_Consonant_,Halant" subsequence in the above
example may still trigger a half-forms feature. To prevent the
application of the half-forms feature in addition to preventing the
conjunct, the zero-width non-joiner (<abbr>ZWNJ</abbr>) must be used instead. The
sequence "_Consonant_,Halant,ZWNJ,_Consonant_" should produce the
first consonant in its standard form, followed by an explicit
"Halant".

A secondary usage of the zero-width joiner is to prevent the formation of
"Reph". An initial "Ra,Halant,ZWJ" sequence should not produce a "Reph",
where an initial "Ra,Halant" sequence without the zero-width joiner
otherwise would.

The no-break space (<abbr>NBSP</abbr>) is primarily used to display those
codepoints that are defined as non-spacing (marks, dependent vowels
(matras), below-base consonant forms, and post-base consonant forms)
in an isolated context, as an alternative to displaying them
superimposed on the dotted-circle placeholder. These sequences will
match "NBSP,ZWJ,Halant,_Consonant_", "NBSP,_mark_", or "NBSP,_matra_".


================================================
FILE: character-tables/character-tables-gujarati.md
================================================
# Gujarati character tables #

This document lists the per-character shaping information needed to
[shape Gujarati text](../opentype-shaping-gujarati.md).

**Contents**

  - [Gujarati character table](#gujarati-character-table)
  - [Vedic Extensions character table](#vedic-extensions-character-table)
  - [Miscellaneous character table](#miscellaneous-character-table)
	  

## Gujarati character table ##

Gujarati glyphs should be classified as in the following
table. Codepoints in the Gujarati block with no assigned meaning are
designated as _unassigned_ in the _Unicode category_ column. 

Assigned codepoints with a _null_ in the _Shaping class_
column evoke no special behavior from the shaping engine. Note that
this does include some valid codepoints, such as currency marks,
punctuation, and other symbols.

> Note: the `NUMBER` and `SYMBOL` _Shaping classes_ are important
> during syllable identification, but generally evoke no further
> special behavior during the rest of the shaping process. 

The _Mark-placement subclass_ column indicates mark-placement
positioning for codepoints in the _Mark_ category. Assigned, non-mark
codepoints have a _null_ in this column and evoke no special
mark-placement behavior. Marks tagged with [Mn] in the _Unicode
category_ column are categorized as non-spacing; marks tagged with
[Mc] are categorized as spacing-combining.

Some codepoints in the following table use a _Shaping class_ that
differs from the codepoint's Unicode _General Category_. The _Shaping
class_ takes precedence during OpenType shaping, as it captures more
specific, script-aware behavior.

:::{table} Gujarati character table

| Codepoint | Unicode category | Shaping class     | Mark-placement subclass    | Glyph                        |
|:----------|:-----------------|:------------------|:---------------------------|:-----------------------------|
|`U+0A80`   | _unassigned_     |                   |                            |                              |
|`U+0A81`   | Mark [Mn]        | BINDU             | TOP_POSITION               | &#x0A81; Candrabindu         |
|`U+0A82`   | Mark [Mn]        | BINDU             | TOP_POSITION               | &#x0A82; Anusvara            |
|`U+0A83`   | Mark [Mc]        | VISARGA           | RIGHT_POSITION             | &#x0A83; Visarga             |
|`U+0A84`   | _unassigned_     |                   |                            |                              |
|`U+0A85`   | Letter           | VOWEL_INDEPENDENT | _null_                     | &#x0A85; A                   |
|`U+0A86`   | Letter           | VOWEL_INDEPENDENT | _null_                     | &#x0A86; Aa                  |
|`U+0A87`   | Letter           | VOWEL_INDEPENDENT | _null_                     | &#x0A87; I                   |
|`U+0A88`   | Letter           | VOWEL_INDEPENDENT | _null_                     | &#x0A88; Ii                  |
|`U+0A89`   | Letter           | VOWEL_INDEPENDENT | _null_                     | &#x0A89; U                   |
|`U+0A8A`   | Letter           | VOWEL_INDEPENDENT | _null_                     | &#x0A8A; Uu                  |
|`U+0A8B`   | Letter           | VOWEL_INDEPENDENT | _null_                     | &#x0A8B; Vocalic R           |
|`U+0A8C`   | Letter           | VOWEL_INDEPENDENT | _null_                     | &#x0A8C; Vocalic L           |
|`U+0A8D`   | Letter           | VOWEL_INDEPENDENT | _null_                     | &#x0A8D; Candra E            |
|`U+0A8E`   | _unassigned_     |                   |                            |                              |
|`U+0A8F`   | Letter           | VOWEL_INDEPENDENT | _null_                     | &#x0A8F; E                   |
| | | | |																	   
|`U+0A90`   | Letter           | VOWEL_INDEPENDENT | _null_                     | &#x0A90; Ai                  |
|`U+0A91`   | Letter           | VOWEL_INDEPENDENT | _null_                     | &#x0A91; Candra O            |
|`U+0A92`   | _unassigned_     |                   |                            |                              |
|`U+0A93`   | Letter           | VOWEL_INDEPENDENT | _null_                     | &#x0A93; O                   |
|`U+0A94`   | Letter           | VOWEL_INDEPENDENT | _null_                     | &#x0A94; Au                  |
|`U+0A95`   | Letter           | CONSONANT         | _null_                     | &#x0A95; Ka                  |
|`U+0A96`   | Letter           | CONSONANT         | _null_                     | &#x0A96; Kha                 |
|`U+0A97`   | Letter           | CONSONANT         | _null_                     | &#x0A97; Ga                  |
|`U+0A98`   | Letter           | CONSONANT         | _null_                     | &#x0A98; Gha                 |
|`U+0A99`   | Letter           | CONSONANT         | _null_                     | &#x0A99; Nga                 |
|`U+0A9A`   | Letter           | CONSONANT         | _null_                     | &#x0A9A; Ca                  |
|`U+0A9B`   | Letter           | CONSONANT         | _null_                     | &#x0A9B; Cha                 |
|`U+0A9C`   | Letter           | CONSONANT         | _null_                     | &#x0A9C; Ja                  |
|`U+0A9D`   | Letter           | CONSONANT         | _null_                     | &#x0A9D; Jha                 |
|`U+0A9E`   | Letter           | CONSONANT         | _null_                     | &#x0A9E; Nya                 |
|`U+0A9F`   | Letter           | CONSONANT         | _null_                     | &#x0A9F; Tta                 |
| | | | |																	   
|`U+0AA0`   | Letter           | CONSONANT         | _null_                     | &#x0AA0; Ttha                |
|`U+0AA1`   | Letter           | CONSONANT         | _null_                     | &#x0AA1; Dda                 |
|`U+0AA2`   | Letter           | CONSONANT         | _null_                     | &#x0AA2; Ddha                |
|`U+0AA3`   | Letter           | CONSONANT         | _null_                     | &#x0AA3; Nna                 |
|`U+0AA4`   | Letter           | CONSONANT         | _null_                     | &#x0AA4; Ta                  |
|`U+0AA5`   | Letter           | CONSONANT         | _null_                     | &#x0AA5; Tha                 |
|`U+0AA6`   | Letter           | CONSONANT         | _null_                     | &#x0AA6; Da                  |
|`U+0AA7`   | Letter           | CONSONANT         | _null_                     | &#x0AA7; Dha                 |
|`U+0AA8`   | Letter           | CONSONANT         | _null_                     | &#x0AA8; Na                  |
|`U+0AA9`   | _unassigned_     |                   |                            |                              |
|`U+0AAA`   | Letter           | CONSONANT         | _null_                     | &#x0AAA; Pa                  |
|`U+0AAB`   | Letter           | CONSONANT         | _null_                     | &#x0AAB; Pha                 |
|`U+0AAC`   | Letter           | CONSONANT         | _null_                     | &#x0AAC; Ba                  |
|`U+0AAD`   | Letter           | CONSONANT         | _null_                     | &#x0AAD; Bha                 |
|`U+0AAE`   | Letter           | CONSONANT         | _null_                     | &#x0AAE; Ma                  |
|`U+0AAF`   | Letter           | CONSONANT         | _null_                     | &#x0AAF; Ya                  |
| | | | |																	    
|`U+0AB0`   | Letter           | CONSONANT         | _null_                     | &#x0AB0; Ra                  |
|`U+0AB1`   | _unassigned_     |                   |                            |                              |
|`U+0AB2`   | Letter           | CONSONANT         | _null_                     | &#x0AB2; La                  |
|`U+0AB3`   | Letter           | CONSONANT         | _null_                     | &#x0AB3; Lla                 |
|`U+0AB4`   | _unassigned_     |                   |                            |                              |
|`U+0AB5`   | Letter           | CONSONANT         | _null_                     | &#x0AB5; Va                  |
|`U+0AB6`   | Letter           | CONSONANT         | _null_                     | &#x0AB6; Sha                 |
|`U+0AB7`   | Letter           | CONSONANT         | _null_                     | &#x0AB7; Ssa                 |
|`U+0AB8`   | Letter           | CONSONANT         | _null_                     | &#x0AB8; Sa                  |
|`U+0AB9`   | Letter           | CONSONANT         | _null_                     | &#x0AB9; Ha                  |
|`U+0ABA`   | _unassigned_     |                   |                            |                              |
|`U+0ABB`   | _unassigned_     |                   |                            |                              |
|`U+0ABC`   | Mark [Mn]        | NUKTA             | BOTTOM_POSITION            | &#x0ABC; Nukta               |
|`U+0ABD`   | Letter           | AVAGRAHA          | _null_                     | &#x0ABD; Avagraha            |
|`U+0ABE`   | Mark [Mc]        | VOWEL_DEPENDENT   | RIGHT_POSITION             | &#x0ABE; Sign Aa             |
|`U+0ABF`   | Mark [Mc]        | VOWEL_DEPENDENT   | LEFT_POSITION              | &#x0ABF; Sign I              |
| | | | |																	   
|`U+0AC0`   | Mark [Mc]        | VOWEL_DEPENDENT   | RIGHT_POSITION             | &#x0AC0; Sign Ii             |
|`U+0AC1`   | Mark [Mn]        | VOWEL_DEPENDENT   | BOTTOM_POSITION            | &#x0AC1; Sign U              |
|`U+0AC2`   | Mark [Mn]        | VOWEL_DEPENDENT   | BOTTOM_POSITION            | &#x0AC2; Sign Uu             |
|`U+0AC3`   | Mark [Mn]        | VOWEL_DEPENDENT   | BOTTOM_POSITION            | &#x0AC3; Sign Vocalic R      |
|`U+0AC4`   | Mark [Mn]        | VOWEL_DEPENDENT   | BOTTOM_POSITION            | &#x0AC4; Sign Vocalic Rr     |
|`U+0AC5`   | Mark [Mn]        | VOWEL_DEPENDENT   | TOP_POSITION               | &#x0AC5; Sign Candra E       |
|`U+0AC6`   | _unassigned_     |                   |                            |                              |
|`U+0AC7`   | Mark [Mn]        | VOWEL_DEPENDENT   | TOP_POSITION               | &#x0AC7; Sign E              |
|`U+0AC8`   | Mark [Mn]        | VOWEL_DEPENDENT   | TOP_POSITION               | &#x0AC8; Sign Ai             |
|`U+0AC9`   | Mark [Mc]        | VOWEL_DEPENDENT   | TOP_AND_RIGHT_POSITION     | &#x0AC9; Sign Candra O       |
|`U+0ACA`   | _unassigned_     |                   |                            |                              |
|`U+0ACB`   | Mark [Mc]        | VOWEL_DEPENDENT   | RIGHT_POSITION             | &#x0ACB; Sign O              |
|`U+0ACC`   | Mark [Mc]        | VOWEL_DEPENDENT   | RIGHT_POSITION             | &#x0ACC; Sign Au             |
|`U+0ACD`   | Mark [Mn]        | VIRAMA            | BOTTOM_POSITION            | &#x0ACD; Virama              |
|`U+0ACE`   | _unassigned_     |                   |                            |                              |
|`U+0ACF`   | _unassigned_     |                   |                            |                              |
| | | | |																	   
|`U+0AD0`   | Letter           | _null_            | _null_                     | &#x0AD0; Om                  |
|`U+0AD1`   | _unassigned_     |                   |                            |                              |
|`U+0AD2`   | _unassigned_     |                   |                            |                              |
|`U+0AD3`   | _unassigned_     |                   |                            |                              |
|`U+0AD4`   | _unassigned_     |                   |                            |                              |
|`U+0AD5`   | _unassigned_     |                   |                            |                              |
|`U+0AD6`   | _unassigned_     |                   |                            |                              |
|`U+0AD7`   | _unassigned_     |                   |                            |                              |
|`U+0AD8`   | _unassigned_     |                   |                            |                              |
|`U+0AD9`   | _unassigned_     |                   |                            |                              |
|`U+0ADA`   | _unassigned_     |                   |                            |                              |
|`U+0ADB`   | _unassigned_     |                   |                            |                              |
|`U+0ADC`   | _unassigned_     |                   |                            |                              |
|`U+0ADD`   | _unassigned_     |                   |                            |                              |
|`U+0ADE`   | _unassigned_     |                   |                            |                              |
|`U+0ADF`   | _unassigned_     |                   |                            |                              |
| | | | |																	   
|`U+0AE0`   | Letter           | VOWEL_INDEPENDENT | _null_                     | &#x0AE0; Vocalic Rr          |
|`U+0AE1`   | Letter           | VOWEL_INDEPENDENT | _null_                     | &#x0AE1; Vocalic Ll          |
|`U+0AE2`   | Mark [Mn]        | VOWEL_DEPENDENT   | BOTTOM_POSITION            | &#x0AE2; Sign Vocalic L      |
|`U+0AE3`   | Mark [Mn]        | VOWEL_DEPENDENT   | BOTTOM_POSITION            | &#x0AE3; Sign Vocalic Ll     |
|`U+0AE4`   | _unassigned_     |                   |                            |                              |
|`U+0AE5`   | _unassigned_     |                   |                            |                              |
|`U+0AE6`   | Number           | NUMBER            | _null_                     | &#x0AE6; Digit Zero          |
|`U+0AE7`   | Number           | NUMBER            | _null_                     | &#x0AE7; Digit One           |
|`U+0AE8`   | Number           | NUMBER            | _null_                     | &#x0AE8; Digit Two           |
|`U+0AE9`   | Number           | NUMBER            | _null_                     | &#x0AE9; Digit Three         |
|`U+0AEA`   | Number           | NUMBER            | _null_                     | &#x0AEA; Digit Four          |
|`U+0AEB`   | Number           | NUMBER            | _null_                     | &#x0AEB; Digit Five          |
|`U+0AEC`   | Number           | NUMBER            | _null_                     | &#x0AEC; Digit Six           |
|`U+0AED`   | Number           | NUMBER            | _null_                     | &#x0AED; Digit Seven         |
|`U+0AEE`   | Number           | NUMBER            | _null_                     | &#x0AEE; Digit Eight         |
|`U+0AEF`   | Number           | NUMBER            | _null_                     | &#x0AEF; Digit Nine          |
| | | | |
|`U+0AF0`   | Symbol           | SYMBOL            | _null_                     | &#x0AF0; Abbreviation        |
|`U+0AF1`   | Symbol           | SYMBOL            | _null_                     | &#x0AF1; Rupee Sign          |
|`U+0AF2`   | _unassigned_     |                   |                            |                              |
|`U+0AF3`   | _unassigned_     |                   |                            |                              |
|`U+0AF4`   | _unassigned_     |                   |                            |                              |
|`U+0AF5`   | _unassigned_     |                   |                            |                              |
|`U+0AF6`   | _unassigned_     |                   |                            |                              |
|`U+0AF7`   | _unassigned_     |                   |                            |                              |
|`U+0AF8`   | _unassigned_     |                   |                            |                              |
|`U+0AF9`   | Letter           | CONSONANT         | _null_                     | &#x0AF9; Zha                 |
|`U+0AFA`   | Mark [Mn]        | CANTILLATION      | TOP_POSITION               | &#x0AFA; Sukun               |
|`U+0AFB`   | Mark [Mn]        | NUKTA             | TOP_POSITION               | &#x0AFB; Shadda              |
|`U+0AFC`   | Mark [Mn]        | CANTILLATION      | TOP_POSITION               | &#x0AFC; Maddah              |
|`U+0AFD`   | Mark [Mn]        | NUKTA             | TOP_POSITION               | &#x0AFD; Three-Dot Nukta Above|
|`U+0AFE`   | Mark [Mn]        | NUKTA             | TOP_POSITION               | &#x0AFE; Circle Nukta Above  |
|`U+0AFF`   | Mark [Mn]        | NUKTA             | TOP_POSITION               | &#x0AFF; Two-Circle Nukta Above|
:::


## Vedic Extensions character table ##

Sanskrit runs written in the Gujarati script may also include
characters from the Vedic Extensions block. These characters should be
classified as follows.

> Note: See the [Vedic Extensions](../opentype-shaping-vedic-extensions.md) 
> document for additional information.


:::{table} Vedic Extensions character table

| Codepoint | Unicode category | Shaping class     | Mark-placement subclass    | Glyph                        |
|:----------|:-----------------|:------------------|:---------------------------|:-----------------------------|
|`U+1CD0`   | Mark [Mn]        | CANTILLATION      | TOP_POSITION               | &#x1CD0; Tone Karshana       |
|`U+1CD1`   | Mark [Mn]        | CANTILLATION      | TOP_POSITION               | &#x1CD1; Tone Shara          |
|`U+1CD2`   | Mark [Mn]        | CANTILLATION      | TOP_POSITION               | &#x1CD2; Tone Prenkha        |
|`U+1CD3`   | Punctuation      | _null_            | _null_                     | &#x1CD3; Sign Nihshvasa      |
|`U+1CD4`   | Mark [Mn]        | CANTILLATION      | OVERSTRUCK                 | &#x1CD4; Tone Midline Svarita |
|`U+1CD5`   | Mark [Mn]        | CANTILLATION      | BOTTOM_POSITION            | &#x1CD5; Tone Aggravated Independent Svarita |
|`U+1CD6`   | Mark [Mn]        | CANTILLATION      | BOTTOM_POSITION            | &#x1CD6; Tone Independent Svarita |
|`U+1CD7`   | Mark [Mn]        | CANTILLATION      | BOTTOM_POSITION            | &#x1CD7; Tone Kathaka Independent Svarita |
|`U+1CD8`   | Mark [Mn]        | CANTILLATION      | BOTTOM_POSITION            | &#x1CD8; Tone Candra Below   |
|`U+1CD9`   | Mark [Mn]        | CANTILLATION      | BOTTOM_POSITION            | &#x1CD9; Tone Kathaka Independent Svarita Schroeder |
|`U+1CDA`   | Mark [Mn]        | CANTILLATION      | TOP_POSITION               | &#x1CDA; Tone Double Svarita |
|`U+1CDB`   | Mark [Mn]        | CANTILLATION      | TOP_POSITION               | &#x1CDB; Tone Triple Svarita |
|`U+1CDC`   | Mark [Mn]        | CANTILLATION      | BOTTOM_POSITION            | &#x1CDC; Tone Kathaka Anudatta |
|`U+1CDD`   | Mark [Mn]        | CANTILLATION      | BOTTOM_POSITION            | &#x1CDD; Tone Dot Below      |
|`U+1CDE`   | Mark [Mn]        | CANTILLATION      | BOTTOM_POSITION            | &#x1CDE; Tone Two Dots Below |
|`U+1CDF`   | Mark [Mn]        | CANTILLATION      | BOTTOM_POSITION            | &#x1CDF; Tone Three Dots Below |
| | | | |																		
|`U+1CE0`   | Mark [Mn]        | CANTILLATION      | TOP_POSITION               | &#x1CE0; Tone Rigvedic Kashmiri Independent Svarita |
|`U+1CE1`   | Mark [Mc]        | CANTILLATION      | RIGHT_POSITION             | &#x1CE1; Tone Atharavedic Independent Svarita |
|`U+1CE2`   | Mark [Mn]        | AVAGRAHA          | OVERSTRUCK                 | &#x1CE2; Sign Visarga Svarita |
|`U+1CE3`   | Mark [Mn]        | _null_            | OVERSTRUCK                 | &#x1CE3; Sign Visarga Udatta |
|`U+1CE4`   | Mark [Mn]        | _null_            | OVERSTRUCK                 | &#x1CE4; Sign Reversed Visarga Udatta |
|`U+1CE5`   | Mark [Mn]        | _null_            | OVERSTRUCK                 | &#x1CE5; Sign Visarga Anudatta |
|`U+1CE6`   | Mark [Mn]        | _null_            | OVERSTRUCK                 | &#x1CE6; Sign Reversed Visarga Anudatta |
|`U+1CE7`   | Mark [Mn]        | _null_            | OVERSTRUCK                 | &#x1CE7; Sign Visarga Udatta With Tail |
|`U+1CE8`   | Mark [Mn]        | AVAGRAHA          | OVERSTRUCK                 | &#x1CE8; Sign Visarga Anudatta With Tail |
|`U+1CE9`   | Letter           | SYMBOL            | _null_                     | &#x1CE9; Sign Anusvara Antargomukha |
|`U+1CEA`   | Letter           | _null_            | _null_                     | &#x1CEA; Sign Anusvara Bahirgomukha |
|`U+1CEB`   | Letter           | _null_            | _null_                     | &#x1CEB; Sign Anusvara Vamagomukha |
|`U+1CEC`   | Letter           | SYMBOL            | _null_                     | &#x1CEC; Sign Anusvara Vamagomukha With Tail |
|`U+1CED`   | Mark [Mn]        | AVAGRAHA          | BOTTOM_POSITION            | &#x1CED; Sign Tiryak         |
|`U+1CEE`   | Letter           | SYMBOL            | _null_                     | &#x1CEE; Sign Hexiform Long Anusvara |
|`U+1CEF`   | Letter           | _null_            | _null_                     | &#x1CEF; Sign Long Anusvara  |
| | | | |																		
|`U+1CF0`   | Letter           | _null_            | _null_                     | &#x1CF0; Sign Rthang Long Anusvara |
|`U+1CF2`   | Letter           | CONSONANT_DEAD    | _null_                     | &#x1CF2; Sign Ardhavisarga   |
|`U+1CF3`   | Letter           | CONSONANT_DEAD    | _null_                     | &#x1CF3; Sign Rotated Ardhavisarga |
|`U+1CF3`   | Mark [Mc]        | VISARGA           | _null_                     | &#x1CF3; Sign Rotated Ardhavisarga |
|`U+1CF4`   | Mark [Mn]        | CANTILLATION      | TOP_POSITION               | &#x1CF4; Tone Candra Above   |
|`U+1CF5`   | Letter           | CONSONANT_WITH_STACKER | _null_                | &#x1CF5; Sign Jihvamuliya    |
|`U+1CF6`   | Letter           | CONSONANT_WITH_STACKER | _null_                | &#x1CF6; Sign Upadhmaniya    |
|`U+1CF7`   | Mark [Mc]        | _null_            | _null_                     | &#x1CF7; Sign Atikrama       |
|`U+1CF8`   | Mark [Mn]        | CANTILLATION      | _null_                     | &#x1CF8; Tone Ring Above     |
|`U+1CF9`   | Mark [Mn]        | CANTILLATION      | _null_                     | &#x1CF9; Tone Double Ring Above |
|`U+1CFA`   | Letter           | PLACEHOLDER       | _null_                     | &#x1CFA; Sign Double Anusvara Antargomukha |
|`U+1CFB`   | _unassigned_     |                   |                            |                              |
|`U+1CFC`   | _unassigned_     |                   |                            |                              |
|`U+1CFD`   | _unassigned_     |                   |                            |                              |
|`U+1CFE`   | _unassigned_     |                   |                            |                              |
|`U+1CFF`   | _unassigned_     |                   |                            |                              |
:::


## Miscellaneous character table ##

In addition to general punctuation, runs of Gujarati text often use the
danda (`U+0964`) and double danda (`U+0965`) punctuation marks from
the Devanagari block. Gujarati text can also incorporate the udatta
(`U+0951`) and anudatta (`U+0952`) signs from the Devanagari block.


:::{table} Additional punctuation character table

| Codepoint | Unicode category | Shaping class     | Mark-placement subclass    | Glyph                          |
|:----------|:-----------------|:------------------|:---------------------------|:-------------------------------|
|`U+0951`   | Mark [Mn]        | CANTILLATION      | TOP_POSITION               | &#x0951; Udatta              |
|`U+0952`   | Mark [Mn]        | CANTILLATION      | BOTTOM_POSITION            | &#x0952; Anudatta            |
|`U+0964`   | Punctuation      | _null_            | _null_                     | &#x0964; Danda               |
|`U+0965`   | Punctuation      | _null_            | _null_                     | &#x0965; Double Danda        |
:::


Other important characters that may be encountered when shaping runs
of Gujarati text include the dotted-circle placeholder (`U+25CC`), the
zero-width joiner (`U+200D`) and zero-width non-joiner (`U+200C`), and
the no-break space (`U+00A0`).

The dotted-circle placeholder is frequently used when displaying a
dependent vowel (matra) or a combining mark in isolation. Real-world
text syllables may also use other characters, such as hyphens or dashes,
in a similar placeholder fashion; shaping engines should cope with
this situation gracefully.


:::{table} Miscellaneous character table

| Codepoint | Unicode category | Shaping class     | Mark-placement subclass    | Glyph                          |
|:----------|:-----------------|:------------------|:---------------------------|:-------------------------------|
|`U+00A0`   | Separator        | PLACEHOLDER       | _null_                     | &#x00A0; No-break space        |
|`U+200C`   | Other            | NON_JOINER        | _null_                     | &#x200C; Zero-width non-joiner |
|`U+200D`   | Other            | JOINER            | _null_                     | &#x200D; Zero-width joiner     |
|`U+2010`   | Punctuation      | PLACEHOLDER       | _null_                     | &#x2010; Hyphen                |
|`U+2011`   | Punctuation      | PLACEHOLDER       | _null_                     | &#x2011; No-break hyphen       |
|`U+2012`   | Punctuation      | PLACEHOLDER       | _null_                     | &#x2012; Figure dash           |
|`U+2013`   | Punctuation      | PLACEHOLDER       | _null_                     | &#x2013; En dash               |
|`U+2014`   | Punctuation      | PLACEHOLDER       | _null_                     | &#x2014; Em dash               |
|`U+25CC`   | Symbol           | DOTTED_CIRCLE     | _null_                     | &#x25CC; Dotted circle         |
:::

The zero-width joiner (<abbr>ZWJ</abbr>) is primarily used to prevent the formation
of a conjunct from a "_Consonant_,Halant,_Consonant_" sequence. The
sequence "_Consonant_,Halant,ZWJ,_Consonant_" blocks the formation of
a conjunct between the two consonants. 

Note, however, that the "_Consonant_,Halant" subsequence in the above
example may still trigger a half-forms feature. To prevent the
application of the half-forms feature in addition to preventing the
conjunct, the zero-width non-joiner (<abbr>ZWNJ</abbr>) must be used instead. The
sequence "_Consonant_,Halant,ZWNJ,_Consonant_" should produce the
first consonant in its standard form, followed by an explicit
"Halant".

A secondary usage of the zero-width joiner is to prevent the formation of
"Reph". An initial "Ra,Halant,ZWJ" sequence should not produce a "Reph",
where an initial "Ra,Halant" sequence without the zero-width joiner
otherwise would.

The no-break space (<abbr>NBSP</abbr>) is primarily used to display those
codepoints that are defined as non-spacing (marks, dependent vowels
(matras), below-base consonant forms, and post-base consonant forms)
in an isolated context, as an alternative to displaying them
superimposed on the dotted-circle placeholder. These sequences will
match "NBSP,ZWJ,Halant,_Consonant_", "NBSP,_mark_", or "NBSP,_matra_".


================================================
FILE: character-tables/character-tables-gurmukhi.md
================================================
# Gurmukhi character tables #

This document lists the per-character shaping information needed to
[shape Gurmukhi text](../opentype-shaping-gurmukhi.md).

**Contents**

  - [Gurmukhi character table](#gurmukhi-character-table)
  - [Vedic Extensions character table](#vedic-extensions-character-table)
  - [Miscellaneous character table](#miscellaneous-character-table)
	  

## Gurmukhi character table ##

Gurmukhi glyphs should be classified as in the following
table. Codepoints in the Gurmukhi block with no assigned meaning are
designated as _unassigned_ in the _Unicode category_ column. 

Assigned codepoints with a _null_ in the _Shaping class_
column evoke no special behavior from the shaping engine. Note that
this does include some valid codepoints, such as currency marks,
punctuation, and other symbols.

> Note: the `NUMBER` and `SYMBOL` _Shaping classes_ are important
> during syllable identification, but generally evoke no further
> special behavior during the rest of the shaping process. 

The _Mark-placement subclass_ column indicates mark-placement
positioning for codepoints in the _Mark_ category. Assigned, non-mark
codepoints have a _null_ in this column and evoke no special
mark-placement behavior. Marks tagged with [Mn] in the _Unicode
category_ column are categorized as non-spacing; marks tagged with
[Mc] are categorized as spacing-combining.

Some codepoints in the following table use a _Shaping class_ that
differs from the codepoint's Unicode _General Category_. The _Shaping
class_ takes precedence during OpenType shaping, as it captures more
specific, script-aware behavior.


:::{table} Gurmukhi character table

| Codepoint | Unicode category | Shaping class     | Mark-placement subclass    | Glyph                        |
|:----------|:-----------------|:------------------|:---------------------------|:-----------------------------|
|`U+0A00`   | _unassigned_     |                   |                            |                              |
|`U+0A01`   | Mark [Mn]        | BINDU             | TOP_POSITION               | &#x0A01; Adak Bindi          |
|`U+0A02`   | Mark [Mn]        | BINDU             | TOP_POSITION               | &#x0A02; Bindi               |
|`U+0A03`   | Mark [Mc]        | VISARGA           | RIGHT_POSITION             | &#x0A03; Visarga             |
|`U+0A04`   | _unassigned_     |                   |                            |                              |
|`U+0A05`   | Letter           | VOWEL_INDEPENDENT | _null_                     | &#x0A05; A                   |
|`U+0A06`   | Letter           | VOWEL_INDEPENDENT | _null_                     | &#x0A06; Aa                  |
|`U+0A07`   | Letter           | VOWEL_INDEPENDENT | _null_                     | &#x0A07; I                   |
|`U+0A08`   | Letter           | VOWEL_INDEPENDENT | _null_                     | &#x0A08; Ii                  |
|`U+0A09`   | Letter           | VOWEL_INDEPENDENT | _null_                     | &#x0A09; U                   |
|`U+0A0A`   | Letter           | VOWEL_INDEPENDENT | _null_                     | &#x0A0A; Uu                  |
|`U+0A0B`   | _unassigned_     |                   |                            |                              |
|`U+0A0C`   | _unassigned_     |                   |                            |                              |
|`U+0A0D`   | _unassigned_     |                   |                            |                              |
|`U+0A0E`   | _unassigned_     |                   |                            |                              |
|`U+0A0F`   | Letter           | VOWEL_INDEPENDENT | _null_                     | &#x0A0F; Ee                  |
| | | | |
|`U+0A10`   | Letter           | VOWEL_INDEPENDENT | _null_                     | &#x0A10; Ai                  |
|`U+0A11`   | _unassigned_     |                   |                            |                              |
|`U+0A12`   | _unassigned_     |                   |                            |                              |
|`U+0A13`   | Letter           | VOWEL_INDEPENDENT | _null_                     | &#x0A13; Oo                  |
|`U+0A14`   | Letter           | VOWEL_INDEPENDENT | _null_                     | &#x0A14; Au                  |
|`U+0A15`   | Letter           | CONSONANT         | _null_                     | &#x0A15; Ka                  |
|`U+0A16`   | Letter           | CONSONANT         | _null_                     | &#x0A16; Kha                 |
|`U+0A17`   | Letter           | CONSONANT         | _null_                     | &#x0A17; Ga                  |
|`U+0A18`   | Letter           | CONSONANT         | _null_                     | &#x0A18; Gha                 |
|`U+0A19`   | Letter           | CONSONANT         | _null_                     | &#x0A19; Nga                 |
|`U+0A1A`   | Letter           | CONSONANT         | _null_                     | &#x0A1A; Ca                  |
|`U+0A1B`   | Letter           | CONSONANT         | _null_                     | &#x0A1B; Cha                 |
|`U+0A1C`   | Letter           | CONSONANT         | _null_                     | &#x0A1C; Ja                  |
|`U+0A1D`   | Letter           | CONSONANT         | _null_                     | &#x0A1D; Jha                 |
|`U+0A1E`   | Letter           | CONSONANT         | _null_                     | &#x0A1E; Nya                 |
|`U+0A1F`   | Letter           | CONSONANT         | _null_                     | &#x0A1F; Tta                 |
| | | | |
|`U+0A20`   | Letter           | CONSONANT         | _null_                     | &#x0A20; Ttha                |
|`U+0A21`   | Letter           | CONSONANT         | _null_                     | &#x0A21; Dda                 |
|`U+0A22`   | Letter           | CONSONANT         | _null_                     | &#x0A22; Ddha                |
|`U+0A23`   | Letter           | CONSONANT         | _null_                     | &#x0A23; Nna                 |
|`U+0A24`   | Letter           | CONSONANT         | _null_                     | &#x0A24; Ta                  |
|`U+0A25`   | Letter           | CONSONANT         | _null_                     | &#x0A25; Tha                 |
|`U+0A26`   | Letter           | CONSONANT         | _null_                     | &#x0A26; Da                  |
|`U+0A27`   | Letter           | CONSONANT         | _null_                     | &#x0A27; Dha                 |
|`U+0A28`   | Letter           | CONSONANT         | _null_                     | &#x0A28; Na                  |
|`U+0A29`   | _unassigned_     |                   |                            |                              |
|`U+0A2A`   | Letter           | CONSONANT         | _null_                     | &#x0A2A; Pa                  |
|`U+0A2B`   | Letter           | CONSONANT         | _null_                     | &#x0A2B; Pha                 |
|`U+0A2C`   | Letter           | CONSONANT         | _null_                     | &#x0A2C; Ba                  |
|`U+0A2D`   | Letter           | CONSONANT         | _null_                     | &#x0A2D; Bha                 |
|`U+0A2E`   | Letter           | CONSONANT         | _null_                     | &#x0A2E; Ma                  |
|`U+0A2F`   | Letter           | CONSONANT         | _null_                     | &#x0A2F; Ya                  |
| | | | |
|`U+0A30`   | Letter           | CONSONANT         | _null_                     | &#x0A30; Ra                  |
|`U+0A31`   | _unassigned_     |                   |                            |                              |
|`U+0A32`   | Letter           | CONSONANT         | _null_                     | &#x0A32; La                  |
|`U+0A33`   | Letter           | CONSONANT         | _null_                     | &#x0A33; Lla                 |
|`U+0A34`   | _unassigned_     |                   |                            |                              |
|`U+0A35`   | Letter           | CONSONANT         | _null_                     | &#x0A35; Va                  |
|`U+0A36`   | Letter           | CONSONANT         | _null_                     | &#x0A36; Sha                 |
|`U+0A37`   | _unassigned_     |                   |                            |                              |
|`U+0A38`   | Letter           | CONSONANT         | _null_                     | &#x0A38; Sa                  |
|`U+0A39`   | Letter           | CONSONANT         | _null_                     | &#x0A39; Ha                  |
|`U+0A3A`   | _unassigned_     |                   |                            |                              |
|`U+0A3B`   | _unassigned_     |                   |                            |                              |
|`U+0A3C`   | Mark [Mn]        | NUKTA             | BOTTOM_POSITION            | &#x0A3C; Nukta               |
|`U+0A3D`   | _unassigned_     |                   |                            |                              |
|`U+0A3E`   | Mark [Mc]        | VOWEL_DEPENDENT   | RIGHT_POSITION             | &#x0A3E; Sign Aa             |
|`U+0A3F`   | Mark [Mc]        | VOWEL_DEPENDENT   | LEFT_POSITION              | &#x0A3F; Sign I              |
| | | | |
|`U+0A40`   | Mark [Mc]        | VOWEL_DEPENDENT   | RIGHT_POSITION             | &#x0A40; Sign Ii             |
|`U+0A41`   | Mark [Mn]        | VOWEL_DEPENDENT   | BOTTOM_POSITION            | &#x0A41; Sign U              |
|`U+0A42`   | Mark [Mn]        | VOWEL_DEPENDENT   | BOTTOM_POSITION            | &#x0A42; Sign Uu             |
|`U+0A43`   | _unassigned_     |                   |                            |                              |
|`U+0A44`   | _unassigned_     |                   |                            |                              |
|`U+0A45`   | _unassigned_     |                   |                            |                              |
|`U+0A46`   | _unassigned_     |                   |                            |                              |
|`U+0A47`   | Mark [Mn]        | VOWEL_DEPENDENT   | TOP_POSITION               | &#x0A47; Sign Ee             |
|`U+0A48`   | Mark [Mn]        | VOWEL_DEPENDENT   | TOP_POSITION               | &#x0A48; Sign Ai             |
|`U+0A49`   | _unassigned_     |                   |                            |                              |
|`U+0A4A`   | _unassigned_     |                   |                            |                              |
|`U+0A4B`   | Mark [Mn]        | VOWEL_DEPENDENT   | TOP_POSITION               | &#x0A4B; Sign Oo             |
|`U+0A4C`   | Mark [Mn]        | VOWEL_DEPENDENT   | TOP_POSITION               | &#x0A4C; Sign Au             |
|`U+0A4D`   | Mark [Mn]        | VIRAMA            | BOTTOM_POSITION            | &#x0A4D; Virama              |
|`U+0A4E`   | _unassigned_     |                   |                            |                              |
|`U+0A4F`   | _unassigned_     |                   |                            |                              |
| | | | |
|`U+0A50`   | _unassigned_     |                   |                            |                              |
|`U+0A51`   | Mark [Mn]        | CANTILLATION      | _null_                     | &#x0A51; Udaat               |
|`U+0A52`   | _unassigned_     |                   |                            |                              |
|`U+0A53`   | _unassigned_     |                   |                            |                              |
|`U+0A54`   | _unassigned_     |                   |                            |                              |
|`U+0A55`   | _unassigned_     |                   |                            |                              |
|`U+0A56`   | _unassigned_     |                   |                            |                              |
|`U+0A57`   | _unassigned_     |                   |                            |                              |
|`U+0A58`   | _unassigned_     |                   |                            |                              |
|`U+0A59`   | Letter           | CONSONANT         | _null_                     | &#x0A59; Khha                |
|`U+0A5A`   | Letter           | CONSONANT         | _null_                     | &#x0A5A; Ghha                |
|`U+0A5B`   | Letter           | CONSONANT         | _null_                     | &#x0A5B; Za                  |
|`U+0A5C`   | Letter           | CONSONANT         | _null_                     | &#x0A5C; Rra                 |
|`U+0A5D`   | _unassigned_     |                   |                            |                              |
|`U+0A5E`   | Letter           | CONSONANT         | _null_                     | &#x0A5E; Fa                  |
|`U+0A5F`   | _unassigned_     |                   |                            |                              |
| | | | |
|`U+0A60`   | _unassigned_     |                   |                            |                              |
|`U+0A61`   | _unassigned_     |                   |                            |                              |
|`U+0A62`   | _unassigned_     |                   |                            |                              |
|`U+0A63`   | _unassigned_     |                   |                            |                              |
|`U+0A64`   | _unassigned_     |                   |                            |                              |
|`U+0A65`   | _unassigned_     |                   |                            |                              |
|`U+0A66`   | Number           | NUMBER            | _null_                     | &#x0A66; Digit Zero          |
|`U+0A67`   | Number           | NUMBER            | _null_                     | &#x0A67; Digit One           |
|`U+0A68`   | Number           | NUMBER            | _null_                     | &#x0A68; Digit Two           |
|`U+0A69`   | Number           | NUMBER            | _null_                     | &#x0A69; Digit Three         |
|`U+0A6A`   | Number           | NUMBER            | _null_                     | &#x0A6A; Digit Four          |
|`U+0A6B`   | Number           | NUMBER            | _null_                     | &#x0A6B; Digit Five          |
|`U+0A6C`   | Number           | NUMBER            | _null_                     | &#x0A6C; Digit Six           |
|`U+0A6D`   | Number           | NUMBER            | _null_                     | &#x0A6D; Digit Seven         |
|`U+0A6E`   | Number           | NUMBER            | _null_                     | &#x0A6E; Digit Eight         |
|`U+0A6F`   | Number           | NUMBER            | _null_                     | &#x0A6F; Digit Nine          |
| | | | |
|`U+0A70`   | Mark [Mn]        | BINDU             | TOP_POSITION               | &#x0A70; Tippi               |
|`U+0A71`   | Mark [Mn]        | GEMINATION_MARK   | TOP_POSITION               | &#x0A71; Addak               |
|`U+0A72`   | Letter           | CONSONANT         | _null_                     | &#x0A72; Iri                 |
|`U+0A73`   | Letter           | CONSONANT         | _null_                     | &#x0A73; Ura                 |
|`U+0A74`   | Letter           | _null_            | _null_                     | &#x0A74; Ek Onkar            |
|`U+0A75`   | Mark [Mn]        | CONSONANT_MEDIAL  | BOTTOM_POSITION            | &#x0A75; Yakash              |
|`U+0A76`   | Punctuation      | _null_            | _null_                     | &#x0A76; Abbreviation Sign   |
|`U+0A77`   | _unassigned_     |                   |                            |                              |
|`U+0A78`   | _unassigned_     |                   |                            |                              |
|`U+0A79`   | _unassigned_     |                   |                            |                              |
|`U+0A7A`   | _unassigned_     |                   |                            |                              |
|`U+0A7B`   | _unassigned_     |                   |                            |                              |
|`U+0A7C`   | _unassigned_     |                   |                            |                              |
|`U+0A7D`   | _unassigned_     |                   |                            |                              |
|`U+0A7E`   | _unassigned_     |                   |                            |                              |
|`U+0A7F`   | _unassigned_     |                   |                            |                              |
:::


## Vedic Extensions character table ##

Sanskrit runs written in the Gurmukhi script may also include
characters from the Vedic Extensions block. These characters should be
classified as follows.

> Note: See the [Vedic Extensions](../opentype-shaping-vedic-extensions.md) 
> document for additional information.


:::{table} Vedic Extensions character table

| Codepoint | Unicode category | Shaping class     | Mark-placement subclass    | Glyph                        |
|:----------|:-----------------|:------------------|:---------------------------|:-----------------------------|
|`U+1CD0`   | Mark [Mn]        | CANTILLATION      | TOP_POSITION               | &#x1CD0; Tone Karshana       |
|`U+1CD1`   | Mark [Mn]        | CANTILLATION      | TOP_POSITION               | &#x1CD1; Tone Shara          |
|`U+1CD2`   | Mark [Mn]        | CANTILLATION      | TOP_POSITION               | &#x1CD2; Tone Prenkha        |
|`U+1CD3`   | Punctuation      | _null_            | _null_                     | &#x1CD3; Sign Nihshvasa      |
|`U+1CD4`   | Mark [Mn]        | CANTILLATION      | OVERSTRUCK                 | &#x1CD4; Tone Midline Svarita |
|`U+1CD5`   | Mark [Mn]        | CANTILLATION      | BOTTOM_POSITION            | &#x1CD5; Tone Aggravated Independent Svarita |
|`U+1CD6`   | Mark [Mn]        | CANTILLATION      | BOTTOM_POSITION            | &#x1CD6; Tone Independent Svarita |
|`U+1CD7`   | Mark [Mn]        | CANTILLATION      | BOTTOM_POSITION            | &#x1CD7; Tone Kathaka Independent Svarita |
|`U+1CD8`   | Mark [Mn]        | CANTILLATION      | BOTTOM_POSITION            | &#x1CD8; Tone Candra Below   |
|`U+1CD9`   | Mark [Mn]        | CANTILLATION      | BOTTOM_POSITION            | &#x1CD9; Tone Kathaka Independent Svarita Schroeder |
|`U+1CDA`   | Mark [Mn]        | CANTILLATION      | TOP_POSITION               | &#x1CDA; Tone Double Svarita |
|`U+1CDB`   | Mark [Mn]        | CANTILLATION      | TOP_POSITION               | &#x1CDB; Tone Triple Svarita |
|`U+1CDC`   | Mark [Mn]        | CANTILLATION      | BOTTOM_POSITION            | &#x1CDC; Tone Kathaka Anudatta |
|`U+1CDD`   | Mark [Mn]        | CANTILLATION      | BOTTOM_POSITION            | &#x1CDD; Tone Dot Below      |
|`U+1CDE`   | Mark [Mn]        | CANTILLATION      | BOTTOM_POSITION            | &#x1CDE; Tone Two Dots Below |
|`U+1CDF`   | Mark [Mn]        | CANTILLATION      | BOTTOM_POSITION            | &#x1CDF; Tone Three Dots Below |
| | | | |																		
|`U+1CE0`   | Mark [Mn]        | CANTILLATION      | TOP_POSITION               | &#x1CE0; Tone Rigvedic Kashmiri Independent Svarita |
|`U+1CE1`   | Mark [Mc]        | CANTILLATION      | RIGHT_POSITION             | &#x1CE1; Tone Atharavedic Independent Svarita |
|`U+1CE2`   | Mark [Mn]        | AVAGRAHA          | OVERSTRUCK                 | &#x1CE2; Sign Visarga Svarita |
|`U+1CE3`   | Mark [Mn]        | _null_            | OVERSTRUCK                 | &#x1CE3; Sign Visarga Udatta |
|`U+1CE4`   | Mark [Mn]        | _null_            | OVERSTRUCK                 | &#x1CE4; Sign Reversed Visarga Udatta |
|`U+1CE5`   | Mark [Mn]        | _null_            | OVERSTRUCK                 | &#x1CE5; Sign Visarga Anudatta |
|`U+1CE6`   | Mark [Mn]        | _null_            | OVERSTRUCK                 | &#x1CE6; Sign Reversed Visarga Anudatta |
|`U+1CE7`   | Mark [Mn]        | _null_            | OVERSTRUCK                 | &#x1CE7; Sign Visarga Udatta With Tail |
|`U+1CE8`   | Mark [Mn]        | AVAGRAHA          | OVERSTRUCK                 | &#x1CE8; Sign Visarga Anudatta With Tail |
|`U+1CE9`   | Letter           | SYMBOL            | _null_                     | &#x1CE9; Sign Anusvara Antargomukha |
|`U+1CEA`   | Letter           | _null_            | _null_                     | &#x1CEA; Sign Anusvara Bahirgomukha |
|`U+1CEB`   | Letter           | _null_            | _null_                     | &#x1CEB; Sign Anusvara Vamagomukha |
|`U+1CEC`   | Letter           | SYMBOL            | _null_                     | &#x1CEC; Sign Anusvara Vamagomukha With Tail |
|`U+1CED`   | Mark [Mn]        | AVAGRAHA          | BOTTOM_POSITION            | &#x1CED; Sign Tiryak         |
|`U+1CEE`   | Letter           | SYMBOL            | _null_                     | &#x1CEE; Sign Hexiform Long Anusvara |
|`U+1CEF`   | Letter           | _null_            | _null_                     | &#x1CEF; Sign Long Anusvara  |
| | | | |																		
|`U+1CF0`   | Letter           | _null_            | _null_                     | &#x1CF0; Sign Rthang Long Anusvara |
|`U+1CF2`   | Letter           | CONSONANT_DEAD    | _null_                     | &#x1CF2; Sign Ardhavisarga   |
|`U+1CF3`   | Letter           | CONSONANT_DEAD    | _null_                     | &#x1CF3; Sign Rotated Ardhavisarga |
|`U+1CF3`   | Mark [Mc]        | VISARGA           | _null_                     | &#x1CF3; Sign Rotated Ardhavisarga |
|`U+1CF4`   | Mark [Mn]        | CANTILLATION      | TOP_POSITION               | &#x1CF4; Tone Candra Above   |
|`U+1CF5`   | Letter           | CONSONANT_WITH_STACKER | _null_                | &#x1CF5; Sign Jihvamuliya    |
|`U+1CF6`   | Letter           | CONSONANT_WITH_STACKER | _null_                | &#x1CF6; Sign Upadhmaniya    |
|`U+1CF7`   | Mark [Mc]        | _null_            | _null_                     | &#x1CF7; Sign Atikrama       |
|`U+1CF8`   | Mark [Mn]        | CANTILLATION      | _null_                     | &#x1CF8; Tone Ring Above     |
|`U+1CF9`   | Mark [Mn]        | CANTILLATION      | _null_                     | &#x1CF9; Tone Double Ring Above |
|`U+1CFA`   | Letter           | PLACEHOLDER       | _null_                     | &#x1CFA; Sign Double Anusvara Antargomukha |
|`U+1CFB`   | _unassigned_     |                   |                            |                              |
|`U+1CFC`   | _unassigned_     |                   |                            |                              |
|`U+1CFD`   | _unassigned_     |                   |                            |                              |
|`U+1CFE`   | _unassigned_     |                   |                            |                              |
|`U+1CFF`   | _unassigned_     |                   |                            |                              |
:::


## Miscellaneous character table ##

In addition to general punctuation, runs of Gurmukhi text often use the
danda (`U+0964`) and double danda (`U+0965`) punctuation marks from
the Devanagari block. Gurmukhi text can also incorporate the udatta
(`U+0951`) and anudatta (`U+0952`) signs from the Devanagari block.

:::{table} Additional punctuation character table

| Codepoint | Unicode category | Shaping class     | Mark-placement subclass    | Glyph                          |
|:----------|:-----------------|:------------------|:---------------------------|:-------------------------------|
|`U+0951`   | Mark [Mn]        | CANTILLATION      | TOP_POSITION               | &#x0951; Udatta              |
|`U+0952`   | Mark [Mn]        | CANTILLATION      | BOTTOM_POSITION            | &#x0952; Anudatta            |
|`U+0964`   | Punctuation      | _null_            | _null_                     | &#x0964; Danda               |
|`U+0965`   | Punctuation      | _null_            | _null_                     | &#x0965; Double Danda        |
:::


Other important characters that may be encountered when shaping runs
of Gurmukhi text include the dotted-circle placeholder (`U+25CC`), the
zero-width joiner (`U+200D`) and zero-width non-joiner (`U+200C`), and
the no-break space (`U+00A0`).

The dotted-circle placeholder is frequently used when displaying a
dependent vowel (matra) or a combining mark in isolation. Real-world
text syllables may also use other characters, such as hyphens or dashes,
in a similar placeholder fashion; shaping engines should cope with
this situation gracefully.


:::{table} Miscellaneous character table

| Codepoint | Unicode category | Shaping class     | Mark-placement subclass    | Glyph                          |
|:----------|:-----------------|:------------------|:---------------------------|:-------------------------------|
|`U+00A0`   | Separator        | PLACEHOLDER       | _null_                     | &#x00A0; No-break space        |
|`U+200C`   | Other            | NON_JOINER        | _null_                     | &#x200C; Zero-width non-joiner |
|`U+200D`   | Other            | JOINER            | _null_                     | &#x200D; Zero-width joiner     |
|`U+2010`   | Punctuation      | PLACEHOLDER       | _null_                     | &#x2010; Hyphen                |
|`U+2011`   | Punctuation      | PLACEHOLDER       | _null_                     | &#x2011; No-break hyphen       |
|`U+2012`   | Punctuation      | PLACEHOLDER       | _null_                     | &#x2012; Figure dash           |
|`U+2013`   | Punctuation      | PLACEHOLDER       | _null_                     | &#x2013; En dash               |
|`U+2014`   | Punctuation      | PLACEHOLDER       | _null_                     | &#x2014; Em dash               |
|`U+25CC`   | Symbol           | DOTTED_CIRCLE     | _null_                     | &#x25CC; Dotted circle         |
:::


The zero-width joiner (<abbr>ZWJ</abbr>) is primarily used to prevent the formation
of a conjunct from a "_Consonant_,Halant,_Consonant_" sequence. The
sequence "_Consonant_,Halant,ZWJ,_Consonant_" blocks the formation of
a conjunct between the two consonants. 

Note, however, that the "_Consonant_,Halant" subsequence in the above
example may still trigger a half-forms feature. To prevent the
application of the half-forms feature in addition to preventing the
conjunct, the zero-width non-joiner (<abbr>ZWNJ</abbr>) must be used instead. The
sequence "_Consonant_,Halant,ZWNJ,_Consonant_" should produce the
first consonant in its standard form, followed by an explicit
"Halant".

A secondary usage of the zero-width joiner is to prevent the formation of
"Reph". An initial "Ra,Halant,ZWJ" sequence should not produce a "Reph",
where an initial "Ra,Halant" sequence without the zero-width joiner
otherwise would.

The no-break space (<abbr>NBSP</abbr>) is primarily used to display those
codepoints that are defined as non-spacing (marks, dependent vowels
(matras), below-base consonant forms, and post-base consonant forms)
in an isolated context, as an alternative to displaying them
superimposed on the dotted-circle placeholder. These sequences will
match "NBSP,ZWJ,Halant,_Consonant_", "NBSP,_mark_", or "NBSP,_matra_".


================================================
FILE: character-tables/character-tables-hangul.md
================================================
# Hangul character tables #

This document lists the per-character shaping information needed to
[shape Hangul text](../opentype-shaping-hangul.md).

**Contents**

  - [Hangul Jamo character table](#hangul-jamo-character-table)
  - [Hangul Jamo Extended-A character table](#hangul-jamo-extended-a-character-table)
  - [Hangul Jamo Extended-B character table](#hangul-jamo-extended-b-character-table)
  - [Miscellaneous character table](#miscellaneous-character-table)
  
  - [Hangul Syllables: summary](#hangul-syllables-character-table)
      

## Hangul Jamo character table ##

Hangul Jamo should be classified as in the following
table. Codepoints in the Hangul Jamo block with no assigned meaning are
designated as _unassigned_ in the _Unicode category_ column. 

The _Jamo type_ column indicates the syllable-component type of the
jamo. "L" for leading consonants (choseong), "V" for vowels
(jungseong), and "T" for trailing consonants (jongseong).

In addition, the filler codepoints `U+115F` (Choseong Filler) and `U+1160`
(Jungseong Filler) are classified as type "Lf" and "Vf", respectively.

The _Composing_ column indicates whether or not the jamo is capable of
being canonically composed into a syllable included in the Hangul
Syllables block. Jamo in the modern Korean alphabet are designated
`YES`, while fillers and archaic jamo from the old Korean alphabet are
designated `NO`.


:::{table} Hangul Jamo character table

| Codepoint | Unicode category | Jamo type | Composing | Glyph                            |
|:----------|:-----------------|:----------|:----------|:---------------------------------|
|`U+1100`   | Letter           | L         | YES       | &#x1100; Kiyeok                  |
|`U+1101`   | Letter           | L         | YES       | &#x1101; Ssangkiyeok             |
|`U+1102`   | Letter           | L         | YES       | &#x1102; Nieun                   |
|`U+1103`   | Letter           | L         | YES       | &#x1103; Tikeut                  |
|`U+1104`   | Letter           | L         | YES       | &#x1104; Ssangtikeut             |
|`U+1105`   | Letter           | L         | YES       | &#x1105; Rieul                   |
|`U+1106`   | Letter           | L         | YES       | &#x1106; Mieum                   |
|`U+1107`   | Letter           | L         | YES       | &#x1107; Pieup                   |
|`U+1108`   | Letter           | L         | YES       | &#x1108; Ssangpieup              |
|`U+1109`   | Letter           | L         | YES       | &#x1109; Sios                    |
|`U+110A`   | Letter           | L         | YES       | &#x110A; Ssangsios               |
|`U+110B`   | Letter           | L         | YES       | &#x110B; Ieung                   |
|`U+110C`   | Letter           | L         | YES       | &#x110C; Cieuc                   |
|`U+110D`   | Letter           | L         | YES       | &#x110D; Ssangcieuc              |
|`U+110E`   | Letter           | L         | YES       | &#x110E; Chieuch                 |
|`U+110F`   | Letter           | L         | YES       | &#x110F; Khieukh                 |
| | | | | | 
|`U+1110`   | Letter           | L         | YES       | &#x1110; Thieuth                 |
|`U+1111`   | Letter           | L         | YES       | &#x1111; Phieuph                 |
|`U+1112`   | Letter           | L         | YES       | &#x1112; Hieuh                   |
|`U+1113`   | Letter           | L         | NO        | &#x1113; Nieun-Kiyeok            |
|`U+1114`   | Letter           | L         | NO        | &#x1114; Ssangnieun              |
|`U+1115`   | Letter           | L         | NO        | &#x1115; Nieun-Tikeut            |
|`U+1116`   | Letter           | L         | NO        | &#x1116; Nieun-Pieup             |
|`U+1117`   | Letter           | L         | NO        | &#x1117; Tikeut-Kiyeok           |
|`U+1118`   | Letter           | L         | NO        | &#x1118; Rieul-Nieun             |
|`U+1119`   | Letter           | L         | NO        | &#x1119; Ssangrieul              |
|`U+111A`   | Letter           | L         | NO        | &#x111A; Rieul-Hieuh             |
|`U+111B`   | Letter           | L         | NO        | &#x111B; Kapyeounrieul           |
|`U+111C`   | Letter           | L         | NO        | &#x111C; Mieum-Pieup             |
|`U+111D`   | Letter           | L         | NO        | &#x111D; Kapyeounmieum           |
|`U+111E`   | Letter           | L         | NO        | &#x111E; Pieup-Kiyeok            |
|`U+111F`   | Letter           | L         | NO        | &#x111F; Pieup-Nieun             |
| | | | | |
|`U+1120`   | Letter           | L         | NO        | &#x1120; Pieup-Tikeut            |
|`U+1121`   | Letter           | L         | NO        | &#x1121; Pieup-Sios              |
|`U+1122`   | Letter           | L         | NO        | &#x1122; Pieup-Sios-Kiyeok       |
|`U+1123`   | Letter           | L         | NO        | &#x1123; Pieup-Sios-Tikeut       |
|`U+1124`   | Letter           | L         | NO        | &#x1124; Pieup-Sios-Pieup        |
|`U+1125`   | Letter           | L         | NO        | &#x1125; Pieup-Ssangsios         |
|`U+1126`   | Letter           | L         | NO        | &#x1126; Pieup-Sios-Cieuc        |
|`U+1127`   | Letter           | L         | NO        | &#x1127; Pieup-Cieuc             |
|`U+1128`   | Letter           | L         | NO        | &#x1128; Pieup-Chieuch           |
|`U+1129`   | Letter           | L         | NO        | &#x1129; Pieup-Thieuth           |
|`U+112A`   | Letter           | L         | NO        | &#x112A; Pieup-Phieuph           |
|`U+112B`   | Letter           | L         | NO        | &#x112B; Kapyeounpieup           |
|`U+112C`   | Letter           | L         | NO        | &#x112C; Kapyeounssangpieup      |
|`U+112D`   | Letter           | L         | NO        | &#x112D; Sios-Kiyeok             |
|`U+112E`   | Letter           | L         | NO        | &#x112E; Sios-Nieun              |
|`U+112F`   | Letter           | L         | NO        | &#x112F; Sios-Tikeut             |
| | | | | |
|`U+1130`   | Letter           | L         | NO        | &#x1130; Sios-Rieul              |
|`U+1131`   | Letter           | L         | NO        | &#x1131; Sios-Mieum              |
|`U+1132`   | Letter           | L         | NO        | &#x1132; Sios-Pieup              |
|`U+1133`   | Letter           | L         | NO        | &#x1133; Sios-Pieup-Kiyeok       |
|`U+1134`   | Letter           | L         | NO        | &#x1134; Sios-Ssangsios          |
|`U+1135`   | Letter           | L         | NO        | &#x1135; Sios-Ieung              |
|`U+1136`   | Letter           | L         | NO        | &#x1136; Sios-Cieuc              |
|`U+1137`   | Letter           | L         | NO        | &#x1137; Sios-Chieuch            |
|`U+1138`   | Letter           | L         | NO        | &#x1138; Sios-Khieukh            |
|`U+1139`   | Letter           | L         | NO        | &#x1139; Sios-Thieuth            |
|`U+113A`   | Letter           | L         | NO        | &#x113A; Sios-Phieuph            |
|`U+113B`   | Letter           | L         | NO        | &#x113B; Sios-Hieuh              |
|`U+113C`   | Letter           | L         | NO        | &#x113C; Chitueumsios            |
|`U+113D`   | Letter           | L         | NO        | &#x113D; Chitueumssangsios       |
|`U+113E`   | Letter           | L         | NO        | &#x113E; Ceongchieumsios         |
|`U+113F`   | Letter           | L         | NO        | &#x113F; Ceongchieumssangsios    |
| | | | | |
|`U+1140`   | Letter           | L         | NO        | &#x1140; Pansios                 |
|`U+1141`   | Letter           | L         | NO        | &#x1141; Ieung-Kiyeok            |
|`U+1142`   | Letter           | L         | NO        | &#x1142; Ieung-Tikeut            |
|`U+1143`   | Letter           | L         | NO        | &#x1143; Ieung-Mieum             |
|`U+1144`   | Letter           | L         | NO        | &#x1144; Ieung-Pieup             |
|`U+1145`   | Letter           | L         | NO        | &#x1145; Ieung-Sios              |
|`U+1146`   | Letter           | L         | NO        | &#x1146; Ieung-Pansios           |
|`U+1147`   | Letter           | L         | NO        | &#x1147; Ssangieung              |
|`U+1148`   | Letter           | L         | NO        | &#x1148; Ieung-Cieuc             |
|`U+1149`   | Letter           | L         | NO        | &#x1149; Ieung-Chieuch           |
|`U+114A`   | Letter           | L         | NO        | &#x114A; Ieung-Thieuth           |
|`U+114B`   | Letter           | L         | NO        | &#x114B; Ieung-Phieuph           |
|`U+114C`   | Letter           | L         | NO        | &#x114C; Yesieung                |
|`U+114D`   | Letter           | L         | NO        | &#x114D; Cieuc-Ieung             |
|`U+114E`   | Letter           | L         | NO        | &#x114E; Chitueumcieuc           |
|`U+114F`   | Letter           | L         | NO        | &#x114F; Chitueumssangcieuc      |
| | | | | |
|`U+1150`   | Letter           | L         | NO        | &#x1150; Ceongchieumcieuc        |
|`U+1151`   | Letter           | L         | NO        | &#x1151; Ceongchieumssangcieuc   |
|`U+1152`   | Letter           | L         | NO        | &#x1152; Chieuch-Khieukh         |
|`U+1153`   | Letter           | L         | NO        | &#x1153; Chieuch-Hieuh           |
|`U+1154`   | Letter           | L         | NO        | &#x1154; Chitueumchieuch         |
|`U+1155`   | Letter           | L         | NO        | &#x1155; Ceongchieumchieuch      |
|`U+1156`   | Letter           | L         | NO        | &#x1156; Phieuph-Pieup           |
|`U+1157`   | Letter           | L         | NO        | &#x1157; Kapyeounphieuph         |
|`U+1158`   | Letter           | L         | NO        | &#x1158; Ssanghieuh              |
|`U+1159`   | Letter           | L         | NO        | &#x1159; Yeorinhieuh             |
|`U+115A`   | Letter           | L         | NO        | &#x115A; Kiyeok-Tikeut           |
|`U+115B`   | Letter           | L         | NO        | &#x115B; Nieun-Sios              |
|`U+115C`   | Letter           | L         | NO        | &#x115C; Nieun-Cieuc             |
|`U+115D`   | Letter           | L         | NO        | &#x115D; Nieun-Hieuh             |
|`U+115E`   | Letter           | L         | NO        | &#x115E; Tikeut-Rieul            |
|`U+115F`   | Letter           | Lf        | NO        | &#x115F; Choseong Filler         |
| | | | | |
|`U+1160`   | Letter           | Vf        | NO        | &#x1160; Jungseong Filler        |
|`U+1161`   | Letter           | V         | YES       | &#x1161; A                       |
|`U+1162`   | Letter           | V         | YES       | &#x1162; Ae                      |
|`U+1163`   | Letter           | V         | YES       | &#x1163; Ya                      |
|`U+1164`   | Letter           | V         | YES       | &#x1164; Yae                     |
|`U+1165`   | Letter           | V         | YES       | &#x1165; Eo                      |
|`U+1166`   | Letter           | V         | YES       | &#x1166; E                       |
|`U+1167`   | Letter           | V         | YES       | &#x1167; Yeo                     |
|`U+1168`   | Letter           | V         | YES       | &#x1168; Ye                      |
|`U+1169`   | Letter           | V         | YES       | &#x1169; O                       |
|`U+116A`   | Letter           | V         | YES       | &#x116A; Wa                      |
|`U+116B`   | Letter           | V         | YES       | &#x116B; Wae                     |
|`U+116C`   | Letter           | V         | YES       | &#x116C; Oe                      |
|`U+116D`   | Letter           | V         | YES       | &#x116D; Yo                      |
|`U+116E`   | Letter           | V         | YES       | &#x116E; U                       |
|`U+116F`   | Letter           | V         | YES       | &#x116F; Weo                     |
| | | | | |
|`U+1170`   | Letter           | V         | YES       | &#x1170; We                      |
|`U+1171`   | Letter           | V         | YES       | &#x1171; Wi                      |
|`U+1172`   | Letter           | V         | YES       | &#x1172; Yu                      |
|`U+1173`   | Letter           | V         | YES       | &#x1173; Eu                      |
|`U+1174`   | Letter           | V         | YES       | &#x1174; Yi                      |
|`U+1175`   | Letter           | V         | YES       | &#x1175; I                       |
|`U+1176`   | Letter           | V         | NO        | &#x1176; A-O                     |
|`U+1177`   | Letter           | V         | NO        | &#x1177; A-U                     |
|`U+1178`   | Letter           | V         | NO        | &#x1178; Ya-O                    |
|`U+1179`   | Letter           | V         | NO        | &#x1179; Ya-Yo                   |
|`U+117A`   | Letter           | V         | NO        | &#x117A; Eo-O                    |
|`U+117B`   | Letter           | V         | NO        | &#x117B; Eo-U                    |
|`U+117C`   | Letter           | V         | NO        | &#x117C; Eo-Eu                   |
|`U+117D`   | Letter           | V         | NO        | &#x117D; Yeo-O                   |
|`U+117E`   | Letter           | V         | NO        | &#x117E; Yeo-U                   |
|`U+117F`   | Letter           | V         | NO        | &#x117F; O-Eo                    |
| | | | | |
|`U+1180`   | Letter           | V         | NO        | &#x1180; O-E                     |
|`U+1181`   | Letter           | V         | NO        | &#x1181; O-Ye                    |
|`U+1182`   | Letter           | V         | NO        | &#x1182; O-O                     |
|`U+1183`   | Letter           | V         | NO        | &#x1183; O-U                     |
|`U+1184`   | Letter           | V         | NO        | &#x1184; Yo-Ya                   |
|`U+1185`   | Letter           | V         | NO        | &#x1185; Yo-Yae                  |
|`U+1186`   | Letter           | V         | NO        | &#x1186; Yo-Yeo                  |
|`U+1187`   | Letter           | V         | NO        | &#x1187; Yo-O                    |
|`U+1188`   | Letter           | V         | NO        | &#x1188; Yo-I                    |
|`U+1189`   | Letter           | V         | NO        | &#x1189; U-A                     |
|`U+118A`   | Letter           | V         | NO        | &#x118A; U-Ae                    |
|`U+118B`   | Letter           | V         | NO        | &#x118B; U-Eo-Eu                 |
|`U+118C`   | Letter           | V         | NO        | &#x118C; U-Ye                    |
|`U+118D`   | Letter           | V         | NO        | &#x118D; U-U                     |
|`U+118E`   | Letter           | V         | NO        | &#x118E; Yu-A                    |
|`U+118F`   | Letter           | V         | NO        | &#x118F; Yu-Eo                   |
| | | | | |
|`U+1190`   | Letter           | V         | NO        | &#x1190; Yu-E                    |
|`U+1191`   | Letter           | V         | NO        | &#x1191; Yu-Yeo                  |
|`U+1192`   | Letter           | V         | NO        | &#x1192; Yu-Ye                   |
|`U+1193`   | Letter           | V         | NO        | &#x1193; Yu-U                    |
|`U+1194`   | Letter           | V         | NO        | &#x1194; Yu-I                    |
|`U+1195`   | Letter           | V         | NO        | &#x1195; Eu-U                    |
|`U+1196`   | Letter           | V         | NO        | &#x1196; Eu-Eu                   |
|`U+1197`   | Letter           | V         | NO        | &#x1197; Yi-U                    |
|`U+1198`   | Letter           | V         | NO        | &#x1198; I-A                     |
|`U+1199`   | Letter           | V         | NO        | &#x1199; I-Ya                    |
|`U+119A`   | Letter           | V         | NO        | &#x119A; I-O                     |
|`U+119B`   | Letter           | V         | NO        | &#x119B; I-U                     |
|`U+119C`   | Letter           | V         | NO        | &#x119C; I-Eu                    |
|`U+119D`   | Letter           | V         | NO        | &#x119D; I-Araea                 |
|`U+119E`   | Letter           | V         | NO        | &#x119E; Araea                   |
|`U+119F`   | Letter           | V         | NO        | &#x119F; Araea-Eo                |
| | | | | |
|`U+11A0`   | Letter           | V         | NO        | &#x11A0; Araea-U                 |
|`U+11A1`   | Letter           | V         | NO        | &#x11A1; Araea-I                 |
|`U+11A2`   | Letter           | V         | NO        | &#x11A2; Ssangaraea              |
|`U+11A3`   | Letter           | V         | NO        | &#x11A3; A-Eu                    |
|`U+11A4`   | Letter           | V         | NO        | &#x11A4; Ya-U                    |
|`U+11A5`   | Letter           | V         | NO        | &#x11A5; Yeo-Ya                  |
|`U+11A6`   | Letter           | V         | NO        | &#x11A6; O-Ya                    |
|`U+11A7`   | Letter           | V         | NO        | &#x11A7; O-Yae                   |
|`U+11A8`   | Letter           | T         | YES       | &#x11A8; Kiyeok                  |
|`U+11A9`   | Letter           | T         | YES       | &#x11A9; Ssangkiyeok             |
|`U+11AA`   | Letter           | T         | YES       | &#x11AA; Kiyeok-Sios             |
|`U+11AB`   | Letter           | T         | YES       | &#x11AB; Nieun                   |
|`U+11AC`   | Letter           | T         | YES       | &#x11AC; Nieun-Cieuc             |
|`U+11AD`   | Letter           | T         | YES       | &#x11AD; Nieun-Hieuh             |
|`U+11AE`   | Letter           | T         | YES       | &#x11AE; Tikeut                  |
|`U+11AF`   | Letter           | T         | YES       | &#x11AF; Rieul                   |
| | | | | |
|`U+11B0`   | Letter           | T         | YES       | &#x11B0; Rieul-Kiyeok            |
|`U+11B1`   | Letter           | T         | YES       | &#x11B1; Rieul-Mieum             |
|`U+11B2`   | Letter           | T         | YES       | &#x11B2; Rieul-Pieup             |
|`U+11B3`   | Letter           | T         | YES       | &#x11B3; Rieul-Sios              |
|`U+11B4`   | Letter           | T         | YES       | &#x11B4; Rieul-Thieuth           |
|`U+11B5`   | Letter           | T         | YES       | &#x11B5; Rieul-Phieuph           |
|`U+11B6`   | Letter           | T         | YES       | &#x11B6; Rieul-Hieuh             |
|`U+11B7`   | Letter           | T         | YES       | &#x11B7; Mieum                   |
|`U+11B8`   | Letter           | T         | YES       | &#x11B8; Pieup                   |
|`U+11B9`   | Letter           | T         | YES       | &#x11B9; Pieup-Sios              |
|`U+11BA`   | Letter           | T         | YES       | &#x11BA; Sios                    |
|`U+11BB`   | Letter           | T         | YES       | &#x11BB; Ssangsios               |
|`U+11BC`   | Letter           | T         | YES       | &#x11BC; Ieung                   |
|`U+11BD`   | Letter           | T         | YES       | &#x11BD; Cieuc                   |
|`U+11BE`   | Letter           | T         | YES       | &#x11BE; Chieuch                 |
|`U+11BF`   | Letter           | T         | YES       | &#x11BF; Khieukh                 |
| | | | | |
|`U+11C0`   | Letter           | T         | YES       | &#x11C0; Thieuth                 |
|`U+11C1`   | Letter           | T         | YES       | &#x11C1; Phieuph                 |
|`U+11C2`   | Letter           | T         | YES       | &#x11C2; Hieuh                   |
|`U+11C3`   | Letter           | T         | NO        | &#x11C3; Kiyeok-Rieul            |
|`U+11C4`   | Letter           | T         | NO        | &#x11C4; Kiyeok-Sios-Kiyeok      |
|`U+11C5`   | Letter           | T         | NO        | &#x11C5; Nieun-Kiyeok            |
|`U+11C6`   | Letter           | T         | NO        | &#x11C6; Nieun-Tikeut            |
|`U+11C7`   | Letter           | T         | NO        | &#x11C7; Nieun-Sios              |
|`U+11C8`   | Letter           | T         | NO        | &#x11C8; Nieun-Pansios           |
|`U+11C9`   | Letter           | T         | NO        | &#x11C9; Nieun-Thieuth           |
|`U+11CA`   | Letter           | T         | NO        | &#x11CA; Tikeut-Kiyeok           |
|`U+11CB`   | Letter           | T         | NO        | &#x11CB; Tikeut-Rieul            |
|`U+11CC`   | Letter           | T         | NO        | &#x11CC; Rieul-Kiyeok-Sios       |
|`U+11CD`   | Letter           | T         | NO        | &#x11CD; Rieul-Nieun             |
|`U+11CE`   | Letter           | T         | NO        | &#x11CE; Rieul-Tikeut            |
|`U+11CF`   | Letter           | T         | NO        | &#x11CF; Rieul-Tikeut-Hieuh      |
| | | | | |
|`U+11D0`   | Letter           | T         | NO        | &#x11D0; Ssangrieul              |
|`U+11D1`   | Letter           | T         | NO        | &#x11D1; Rieul-Mieum-Kiyeok      |
|`U+11D2`   | Letter           | T         | NO        | &#x11D2; Rieul-Mieum-Sios        |
|`U+11D3`   | Letter           | T         | NO        | &#x11D3; Rieul-Pieup-Sios        |
|`U+11D4`   | Letter           | T         | NO        | &#x11D4; Rieul-Pieup-Hieuh       |
|`U+11D5`   | Letter           | T         | NO        | &#x11D5; Rieul-Kapyeounpieup     |
|`U+11D6`   | Letter           | T         | NO        | &#x11D6; Rieul-Ssangsios         |
|`U+11D7`   | Letter           | T         | NO        | &#x11D7; Rieul-Pansios           |
|`U+11D8`   | Letter           | T         | NO        | &#x11D8; Rieul-Khieukh           |
|`U+11D9`   | Letter           | T         | NO        | &#x11D9; Rieul-Yeorinhieuh       |
|`U+11DA`   | Letter           | T         | NO        | &#x11DA; Mieum-Kiyeok            |
|`U+11DB`   | Letter           | T         | NO        | &#x11DB; Mieum-Rieul             |
|`U+11DC`   | Letter           | T         | NO        | &#x11DC; Mieum-Pieup             |
|`U+11DD`   | Letter           | T         | NO        | &#x11DD; Mieum-Sios              |
|`U+11DE`   | Letter           | T         | NO        | &#x11DE; Mieum-Ssangsios         |
|`U+11DF`   | Letter           | T         | NO        | &#x11DF; Mieum-Pansios           |
| | | | | |
|`U+11E0`   | Letter           | T         | NO        | &#x11E0; Mieum-Chieuch           |
|`U+11E1`   | Letter           | T         | NO        | &#x11E1; Mieum-Hieuh             |
|`U+11E2`   | Letter           | T         | NO        | &#x11E2; Kapyeounmieum           |
|`U+11E3`   | Letter           | T         | NO        | &#x11E3; Pieup-Rieul             |
|`U+11E4`   | Letter           | T         | NO        | &#x11E4; Pieup-Phieuph           |
|`U+11E5`   | Letter           | T         | NO        | &#x11E5; Pieup-Hieuh             |
|`U+11E6`   | Letter           | T         | NO        | &#x11E6; Kapyeounpieup           |
|`U+11E7`   | Letter           | T         | NO        | &#x11E7; Sios-Kiyeok             |
|`U+11E8`   | Letter           | T         | NO        | &#x11E8; Sios-Tikeut             |
|`U+11E9`   | Letter           | T         | NO        | &#x11E9; Sios-Rieul              |
|`U+11EA`   | Letter           | T         | NO        | &#x11EA; Sios-Pieup              |
|`U+11EB`   | Letter           | T         | NO        | &#x11EB; Pansios                 |
|`U+11EC`   | Letter           | T         | NO        | &#x11EC; Ieung-Kiyeok            |
|`U+11ED`   | Letter           | T         | NO        | &#x11ED; Ieung-Ssangkiyeok       |
|`U+11EE`   | Letter           | T         | NO        | &#x11EE; Ssangieung              |
|`U+11EF`   | Letter           | T         | NO        | &#x11EF; Ieung-Khieukh           |
| | | | | |
|`U+11F0`   | Letter           | T         | NO        | &#x11F0; Yesieung                |
|`U+11F1`   | Letter           | T         | NO        | &#x11F1; Yesieung-Sios           |
|`U+11F2`   | Letter           | T         | NO        | &#x11F2; Yesieung-Pansios        |
|`U+11F3`   | Letter           | T         | NO        | &#x11F3; Phieuph-Pieup           |
|`U+11F4`   | Letter           | T         | NO        | &#x11F4; Kapyeounphieuph         |
|`U+11F5`   | Letter           | T         | NO        | &#x11F5; Hieuh-Nieun             |
|`U+11F6`   | Letter           | T         | NO        | &#x11F6; Hieuh-Rieul             |
|`U+11F7`   | Letter           | T         | NO        | &#x11F7; Hieuh-Mieum             |
|`U+11F8`   | Letter           | T         | NO        | &#x11F8; Hieuh-Pieup             |
|`U+11F9`   | Letter           | T         | NO        | &#x11F9; Yeorinhieuh             |
|`U+11FA`   | Letter           | T         | NO        | &#x11FA; Kiyeok-Nieun            |
|`U+11FB`   | Letter           | T         | NO        | &#x11FB; Kiyeok-Pieup            |
|`U+11FC`   | Letter           | T         | NO        | &#x11FC; Kiyeok-Chieuch          |
|`U+11FD`   | Letter           | T         | NO        | &#x11FD; Kiyeok-Khieukh          |
|`U+11FE`   | Letter           | T         | NO        | &#x11FE; Kiyeok-Hieuh            |
|`U+11FF`   | Letter           | T         | NO        | &#x11FF; Ssangnieun              |
:::


## Hangul Jamo Extended-A character table ##

Hangul Jamo should be classified as in the following
table. Codepoints in the Hangul Jamo Extended-A block with no assigned
meaning are designated as _unassigned_ in the _Unicode category_ column. 

The _Jamo type_ column indicates the syllable-component type of the
jamo. All assigned codepoints in the Hangul Jamo Extended-A block are
classified as type "L" for leading consonants (choseong).


:::{table} Hangul Jamo Extended-A character table

| Codepoint | Unicode category | Jamo type | Composing | Glyph                            |
|:----------|:-----------------|:----------|:----------|:---------------------------------|
|`U+A960`   | Letter           | L         | NO        | &#xA960; Tikeut-Mieum            |
|`U+A961`   | Letter           | L         | NO        | &#xA961; Tikeut-Pieup            |
|`U+A962`   | Letter           | L         | NO        | &#xA962; Tikeut-Sios             |
|`U+A963`   | Letter           | L         | NO        | &#xA963; Tikeut-Cieuc            |
|`U+A964`   | Letter           | L         | NO        | &#xA964; Rieul-Kiyeok            |
|`U+A965`   | Letter           | L         | NO        | &#xA965; Rieul-Ssangkiyeok       |
|`U+A966`   | Letter           | L         | NO        | &#xA966; Rieul-Tikeut            |
|`U+A967`   | Letter           | L         | NO        | &#xA967; Rieul-Ssangtikeut       |
|`U+A968`   | Letter           | L         | NO        | &#xA968; Rieul-Mieum             |
|`U+A969`   | Letter           | L         | NO        | &#xA969; Rieul-Pieup             |
|`U+A96A`   | Letter           | L         | NO        | &#xA96A; Rieul-Ssangpieup        |
|`U+A96B`   | Letter           | L         | NO        | &#xA96B; Rieul-Kapyeounpieup     |
|`U+A96C`   | Letter           | L         | NO        | &#xA96C; Rieul-Sios              |
|`U+A96D`   | Letter           | L         | NO        | &#xA96D; Rieul-Cieuc             |
|`U+A96E`   | Letter           | L         | NO        | &#xA96E; Rieul-Khieukh           |
|`U+A96F`   | Letter           | L         | NO        | &#xA96F; Mieum-Kiyeok            |
| | | | | | 
|`U+A970`   | Letter           | L         | NO        | &#xA970; Mieum-Tikeut            |
|`U+A971`   | Letter           | L         | NO        | &#xA971; Mieum-Sios              |
|`U+A972`   | Letter           | L         | NO        | &#xA972; Pieup-Sios-Thieuth      |
|`U+A973`   | Letter           | L         | NO        | &#xA973; Pieup-Khieukh           |
|`U+A974`   | Letter           | L         | NO        | &#xA974; Pieup-Hieuh             |
|`U+A975`   | Letter           | L         | NO        | &#xA975; Ssangsios-Pieup         |
|`U+A976`   | Letter           | L         | NO        | &#xA976; Ieung-Rieul             |
|`U+A977`   | Letter           | L         | NO        | &#xA977; Ieung-Hieuh             |
|`U+A978`   | Letter           | L         | NO        | &#xA978; Ssangcieuc-Hieuh        |
|`U+A979`   | Letter           | L         | NO        | &#xA979; Ssangthieuth            |
|`U+A97A`   | Letter           | L         | NO        | &#xA97A; Phieuph-Hieuh           |
|`U+A97B`   | Letter           | L         | NO        | &#xA97B; Hieuh-Sios              |
|`U+A97C`   | Letter           | L         | NO        | &#xA97C; Ssangyeorinhieuh        |
|`U+A97D`   | _unassigned_     |           |           |                                  |
|`U+A97E`   | _unassigned_     |           |           |                                  |
|`U+A97F`   | _unassigned_     |           |           |                                  |
:::


## Hangul Jamo Extended-B character table ##

Hangul Jamo should be classified as in the following
table. Codepoints in the Hangul Jamo Extended-B block with no assigned
meaning are designated as _unassigned_ in the _Unicode category_ column. 

The _Jamo type_ column indicates the syllable-component type of the
jamo. "V" for vowels (jungseong) and "T" for trailing consonants (jongseong).


:::{table} Hangul Jamo Extended-B character table

| Codepoint | Unicode category | Jamo type | Composing | Glyph                            |
|:----------|:-----------------|:----------|:----------|:---------------------------------|
|`U+D7B0`   | Letter           | V         | NO        | &#xD7B0; O-Yeo                   |
|`U+D7B1`   | Letter           | V         | NO        | &#xD7B1; O-O-I                   |
|`U+D7B2`   | Letter           | V         | NO        | &#xD7B2; Yo-A                    |
|`U+D7B3`   | Letter           | V         | NO        | &#xD7B3; Yo-Ae                   |
|`U+D7B4`   | Letter           | V         | NO        | &#xD7B4; Yo-Eo                   |
|`U+D7B5`   | Letter           | V         | NO        | &#xD7B5; U-Yeo                   |
|`U+D7B6`   | Letter           | V         | NO        | &#xD7B6; U-I-I                   |
|`U+D7B7`   | Letter           | V         | NO        | &#xD7B7; Yu-Ae                   |
|`U+D7B8`   | Letter           | V         | NO        | &#xD7B8; Yu-O                    |
|`U+D7B9`   | Letter           | V         | NO        | &#xD7B9; Eu-A                    |
|`U+D7BA`   | Letter           | V         | NO        | &#xD7BA; Eu-Eo                   |
|`U+D7BB`   | Letter           | V         | NO        | &#xD7BB; Eu-E                    |
|`U+D7BC`   | Letter           | V         | NO        | &#xD7BC; Eu-O                    |
|`U+D7BD`   | Letter           | V         | NO        | &#xD7BD; I-Ya-O                  |
|`U+D7BE`   | Letter           | V         | NO        | &#xD7BE; I-Yae                   |
|`U+D7BF`   | Letter           | V         | NO        | &#xD7BF; I-Yeo                   |
| | | | | | 
|`U+D7C0`   | Letter           | V         | NO        | &#xD7C0; I-Ye                    |
|`U+D7C1`   | Letter           | V         | NO        | &#xD7C1; I-O-I                   |
|`U+D7C2`   | Letter           | V         | NO        | &#xD7C2; I-Yo                    |
|`U+D7C3`   | Letter           | V         | NO        | &#xD7C3; I-Yu                    |
|`U+D7C4`   | Letter           | V         | NO        | &#xD7C4; I-I                     |
|`U+D7C5`   | Letter           | V         | NO        | &#xD7C5; Araea-A                 |
|`U+D7C6`   | Letter           | V         | NO        | &#xD7C6; Araea-E                 |
|`U+D7C7`   | _unassigned_     |           |           |                                  |
|`U+D7C8`   | _unassigned_     |           |           |                                  |
|`U+D7C9`   | _unassigned_     |           |           |                                  |
|`U+D7CA`   | _unassigned_     |           |           |                                  |
|`U+D7CB`   | Letter           | T         | NO        | &#xD7CB; Nieun-Rieul             |
|`U+D7CC`   | Letter           | T         | NO        | &#xD7CC; Nieun-Chieuch           |
|`U+D7CD`   | Letter           | T         | NO        | &#xD7CD; Ssangtikeut             |
|`U+D7CE`   | Letter           | T         | NO        | &#xD7CE; Ssangtikeut-Pieup       |
|`U+D7CF`   | Letter           | T         | NO        | &#xD7CF; Tikeut-Pieup            |
| | | | | | 
|`U+D7D0`   | Letter           | T         | NO        | &#xD7D0; Tikeut-Sios             |
|`U+D7D1`   | Letter           | T         | NO        | &#xD7D1; Tikeut-Sios-Kiyeok      |
|`U+D7D2`   | Letter           | T         | NO        | &#xD7D2; Tikeut-Cieuc            |
|`U+D7D3`   | Letter           | T         | NO        | &#xD7D3; Tikeut-Chieuch          |
|`U+D7D4`   | Letter           | T         | NO        | &#xD7D4; Tikeut-Thieuth          |
|`U+D7D5`   | Letter           | T         | NO        | &#xD7D5; Rieul-Ssangkiyeok       |
|`U+D7D6`   | Letter           | T         | NO        | &#xD7D6; Rieul-Kiyeok-Hieuh      |
|`U+D7D7`   | Letter           | T         | NO        | &#xD7D7; Ssangrieul-Khieukh      |
|`U+D7D8`   | Letter           | T         | NO        | &#xD7D8; Rieul-Mieum-Hieuh       |
|`U+D7D9`   | Letter           | T         | NO        | &#xD7D9; Rieul-Pieup-Tikeut      |
|`U+D7DA`   | Letter           | T         | NO        | &#xD7DA; Rieul-Pieup-Phieuph     |
|`U+D7DB`   | Letter           | T         | NO        | &#xD7DB; Rieul-Yesieung          |
|`U+D7DC`   | Letter           | T         | NO        | &#xD7DC; Rieul-Yeorinhieuh-Hieuh |
|`U+D7DD`   | Letter           | T         | NO        | &#xD7DD; Kapyeounrieul           |
|`U+D7DE`   | Letter           | T         | NO        | &#xD7DE; Mieum-Nieun             |
|`U+D7DF`   | Letter           | T         | NO        | &#xD7DF; Mieum-Ssangnieun        |
| | | | | | 
|`U+D7E0`   | Letter           | T         | NO        | &#xD7E0; Ssangmieum              |
|`U+D7E1`   | Letter           | T         | NO        | &#xD7E1; Mieum-Pieup-Sios        |
|`U+D7E2`   | Letter           | T         | NO        | &#xD7E2; Mieum-Cieuc             |
|`U+D7E3`   | Letter           | T         | NO        | &#xD7E3; Pieup-Tikeut            |
|`U+D7E4`   | Letter           | T         | NO        | &#xD7E4; Pieup-Rieul-Phieuph     |
|`U+D7E5`   | Letter           | T         | NO        | &#xD7E5; Pieup-Mieum             |
|`U+D7E6`   | Letter           | T         | NO        | &#xD7E6; Ssangpieup              |
|`U+D7E7`   | Letter           | T         | NO        | &#xD7E7; Pieup-Sios-Tikeut       |
|`U+D7E8`   | Letter           | T         | NO        | &#xD7E8; Pieup-Cieuc             |
|`U+D7E9`   | Letter           | T         | NO        | &#xD7E9; Pieup-Chieuch           |
|`U+D7EA`   | Letter           | T         | NO        | &#xD7EA; Sios-Mieum              |
|`U+D7EB`   | Letter           | T         | NO        | &#xD7EB; Sios-Kapyeounpieup      |
|`U+D7EC`   | Letter           | T         | NO        | &#xD7EC; Ssangsios-Kiyeok        |
|`U+D7ED`   | Letter           | T         | NO        | &#xD7ED; Ssangsios-Tikeut        |
|`U+D7EE`   | Letter           | T         | NO        | &#xD7EE; Sios-Pansios            |
|`U+D7EF`   | Letter           | T         | NO        | &#xD7EF; Sios-Cieuc              |
| | | | | | 
|`U+D7F0`   | Letter           | T         | NO        | &#xD7F0; Sios-Chieuch            |
|`U+D7F1`   | Letter           | T         | NO        | &#xD7F1; Sios-Thieuth            |
|`U+D7F2`   | Letter           | T         | NO        | &#xD7F2; Sios-Hieuh              |
|`U+D7F3`   | Letter           | T         | NO        | &#xD7F3; Pansios-Pieup           |
|`U+D7F4`   | Letter           | T         | NO        | &#xD7F4; Pansios-Kapyeounpieup   |
|`U+D7F5`   | Letter           | T         | NO        | &#xD7F5; Yesieung-Mieum          |
|`U+D7F6`   | Letter           | T         | NO        | &#xD7F6; Yesieung-Hieuh          |
|`U+D7F7`   | Letter           | T         | NO        | &#xD7F7; Cieuc-Pieup             |
|`U+D7F8`   | Letter           | T         | NO        | &#xD7F8; Cieuc-Ssangpieup        |
|`U+D7F9`   | Letter           | T         | NO        | &#xD7F9; Ssangcieuc              |
|`U+D7FA`   | Letter           | T         | NO        | &#xD7FA; Phieuph-Sios            |
|`U+D7FB`   | Letter           | T         | NO        | &#xD7FB; Phieuph-Thieuth         |
|`U+D7FC`   | _unassigned_     |           |           |                                  |
|`U+D7FD`   | _unassigned_     |           |           |                                  |
|`U+D7FE`   | _unassigned_     |           |           |                                  |
|`U+D7FF`   | _unassigned_     |           |           |                                  |
:::


## Miscellaneous character table ##

In addition to general punctuation, runs of Hangul text may use
punctuation marks from the CJK Symbols And Punctuation block. 

Of particular note are the single-dot tone mark (single-dot bangjeom)
and double-dot tone mark (double-dot bangjeom), `U+302E` and
`U+302F`. These non-spacing marks are common in Old Korean.


:::{table} Additional punctuation character table

| Codepoint | Unicode category | Jamo type | Composing | Glyph                            |
|:----------|:-----------------|:----------|:----------|:---------------------------------|
|`U+302E`   | Mark [Mn]        | _null_    | _null_    | &#x302E; Single Dot Tone Mark    |
|`U+302F`   | Mark [Mn]        | _null_    | _null_    | &#x302F; Double Dot Tone Mark    |
:::


Other important characters that may be encountered when shaping runs
of Hangul text include the dotted-circle placeholder (`U+25CC`), the
zero-width joiner (`U+200D`), and zero-width non-joiner (`U+200C`).

The dotted-circle placeholder is frequently used when displaying a
mark in isolation. Real-world text may also use other characters, such
as hyphens or dashes, in a similar placeholder fashion; shaping
engines should cope with this situation gracefully.

The zero-width space (`U+200B`) or word joiner (`U+2060`) may be used
between two jamo to prevent them from being conjoined into a
syllable. The zero-width space allows a line break to happen between
the jamo, while the word joiner prevents the jamo from being separated
by a line break.


:::{table} Miscellaneous character table

| Codepoint | Unicode category | Jamo type | Composing | Glyph                            |
|:----------|:-----------------|:----------|:----------|:---------------------------------|
|`U+200B`   | Separator        | _null_    | _null_    | &#x200B; Zero-width space        |
|`U+200C`   | Other            | _null_    | _null_    | &#x200C; Zero-width non-joiner   |
|`U+200D`   | Other            | _null_    | _null_    | &#x200D; Zero-width joiner       |
|`U+2060`   | Other            | _null_    | _null_    | &#x2060; Word joiner             |
|`U+25CC`   | Symbol           | _null_    | _null_    | &#x25CC; Dotted circle           |
:::


## Hangul Syllables character table ##

The Hangul Syllables block is too large to include a full character
table in this document.

Each syllable codepoint is classified either as type `LV` or type `LVT`,
indicating whether or not the syllable includes a trailing consonant
(jongseong) at the end.

Syllable codepoints are sorted in Hangul alphabetic order, first by
leading consonant (choseong), followed by vowel (jungseong), followed
by trailing consonant (jongseong).

This enables the algorithmic composition and decomposition of combining
jamo sequences and syllable codepoints.


:::{table} Hangul Syllables character table

| Codepoint | Unicode category | Syllable type | Glyph                            |
|:----------|:-----------------|:--------------|:---------------------------------|
|`U+AC00`   | Letter [Lo]      | LV            | &#xac00; G-A                     |
| | | | |
|`U+D5CC`   | Letter [Lo]      | LVT           | &#xd5cc; H-A-N                   |
:::


================================================
FILE: character-tables/character-tables-hebrew.md
================================================

# Hebrew character tables #

This document lists the per-character shaping information needed to
[shape Hebrew text](../opentype-shaping-hebrew.md).

**Contents**

Separate character tables are provided for the Hebrew block, the
Hebrew letters included in the Alphabetic Presentation Forms block,
and for other miscellaneous characters that are used in `<hebr>` text
runs:

  - [Hebrew character table](#hebrew-character-table)
  - [Alphabetic Presentation Forms character table](#alphabetic-presentation-forms-character-table)
  - [Miscellaneous character table](#miscellaneous-character-table)


The tables list each codepoint along with its Unicode general
category. For marks, the table lists the codepoint's mark combining
class. The codepoint's Unicode name and an example glyph are also provided.

Codepoints with no assigned meaning are
designated as _unassigned_ in the _Unicode category_ column. 


## Hebrew character table ##


:::{table} Hebrew character table

| Codepoint | Unicode category | Mark class | Glyph                                |
|:----------|:-----------------|:-----------|:-------------------------------------|
| `U+0590`  | _unassigned_     |            |                                      |
| `U+0591`  | Mark [Mn]        | 220        | &#x0591; Accent Etnahta              |
| `U+0592`  | Mark [Mn]        | 230        | &#x0592; Accent Segol                |
| `U+0593`  | Mark [Mn]        | 230        | &#x0593; Accent Shalshelet           |
| `U+0594`  | Mark [Mn]        | 230        | &#x0594; Accent Zaqef Qatan          |
| `U+0595`  | Mark [Mn]        | 230        | &#x0595; Accent Zaqef Gadol          |
| `U+0596`  | Mark [Mn]        | 220        | &#x0596; Accent Tipeha               |
| `U+0597`  | Mark [Mn]        | 230        | &#x0597; Accent Revia                |
| `U+0598`  | Mark [Mn]        | 230        | &#x0598; Accent Zarqa                |
| `U+0599`  | Mark [Mn]        | 230        | &#x0599; Accent Pashta               |
| `U+059A`  | Mark [Mn]        | 222        | &#x059A; Accent Yetiv                |
| `U+059B`  | Mark [Mn]        | 220        | &#x059B; Accent Tevir                |
| `U+059C`  | Mark [Mn]        | 230        | &#x059C; Accent Geresh               |
| `U+059D`  | Mark [Mn]        | 230        | &#x059D; Accent Geresh Muqdam        |
| `U+059E`  | Mark [Mn]        | 230        | &#x059E; Accent Gershayim            |
| `U+059F`  | Mark [Mn]        | 230        | &#x059F; Accent Qarney Para          |
| | | | |
| `U+05A0`  | Mark [Mn]        | 230        | &#x05A0; Accent Telisha Gedola       |
| `U+05A1`  | Mark [Mn]        | 230        | &#x05A1; Accent Pazer                |
| `U+05A2`  | Mark [Mn]        | 220        | &#x05A2; Accent Atnah Hafukh         |
| `U+05A3`  | Mark [Mn]        | 220        | &#x05A3; Accent Munah                |
| `U+05A4`  | Mark [Mn]        | 220        | &#x05A4; Accent Mahapakh             |
| `U+05A5`  | Mark [Mn]        | 220        | &#x05A5; Accent Merkha               |
| `U+05A6`  | Mark [Mn]        | 220        | &#x05A6; Accent Merkha Kefula        |
| `U+05A7`  | Mark [Mn]        | 220        | &#x05A7; Accent Darga                |
| `U+05A8`  | Mark [Mn]        | 230        | &#x05A8; Accent Qadma                |
| `U+05A9`  | Mark [Mn]        | 230        | &#x05A9; Accent Telisha Qetana       |
| `U+05AA`  | Mark [Mn]        | 220        | &#x05AA; Accent Yerah Ben Yomo       |
| `U+05AB`  | Mark [Mn]        | 230        | &#x05AB; Accent Ole                  |
| `U+05AC`  | Mark [Mn]        | 230        | &#x05AC; Accent Iluy                 |
| `U+05AD`  | Mark [Mn]        | 222        | &#x05AD; Accent Dehi                 |
| `U+05AE`  | Mark [Mn]        | 228        | &#x05AE; Accent Zinor                |
| `U+05AF`  | Mark [Mn]        | 230        | &#x05AF; Mark Masora Circle          |
| | | | |
| `U+05B0`  | Mark [Mn]        | 10         | &#x05B0; Point Sheva                 |
| `U+05B1`  | Mark [Mn]        | 11         | &#x05B1; Point Hataf Segol           |
| `U+05B2`  | Mark [Mn]        | 12         | &#x05B2; Point Hataf Patah           |
| `U+05B3`  | Mark [Mn]        | 13         | &#x05B3; Point Hataf Qamats          |
| `U+05B4`  | Mark [Mn]        | 14         | &#x05B4; Point Hiriq                 |
| `U+05B5`  | Mark [Mn]        | 15         | &#x05B5; Point Tsere                 |
| `U+05B6`  | Mark [Mn]        | 16         | &#x05B6; Point Segol                 |
| `U+05B7`  | Mark [Mn]        | 17         | &#x05B7; Point Patah                 |
| `U+05B8`  | Mark [Mn]        | 18         | &#x05B8; Point Qamats                |
| `U+05B9`  | Mark [Mn]        | 19         | &#x05B9; Point Holam                 |
| `U+05BA`  | Mark [Mn]        | 19         | &#x05BA; Point Holam Haser For Vav   |
| `U+05BB`  | Mark [Mn]        | 20         | &#x05BB; Point Qubuts                |
| `U+05BC`  | Mark [Mn]        | 21         | &#x05BC; Point Dagesh Or Mapiq       |
| `U+05BD`  | Mark [Mn]        | 22         | &#x05BD; Point Meteg                 |
| `U+05BE`  | Punctuation Dash | _0_        | &#x05BE; Punctuation Maqaf           |
| `U+05BF`  | Mark [Mn]        | 23         | &#x05BF; Point Rafe                  |
| | | | |
| `U+05C0`  | Punctuation      | _0_        | &#x05C0; Punctuation Paseq           |
| `U+05C1`  | Mark [Mn]        | 24         | &#x05C1; Point Shin Dot              |
| `U+05C2`  | Mark [Mn]        | 25         | &#x05C2; Point Sin Dot               |
| `U+05C3`  | Punctuation      | _0_        | &#x05C3; Punctuation Sof Pasuq       |
| `U+05C4`  | Mark [Mn]        | 230        | &#x05C4; Mark Upper Dot              |
| `U+05C5`  | Mark [Mn]        | 220        | &#x05C5; Mark Lower Dot              |
| `U+05C6`  | Punctuation      | _0_        | &#x05C6; Punctuation Nun Hafuka      |
| `U+05C7`  | Mark [Mn]        | 18         | &#x05C7; Point Qamats Qatan          |
| `U+05C8`  | _unassigned_     |            |                                      |
| `U+05C9`  | _unassigned_     |            |                                      |
| `U+05CA`  | _unassigned_     |            |                                      |
| `U+05CB`  | _unassigned_     |            |                                      |
| `U+05CC`  | _unassigned_     |            |                                      |
| `U+05CD`  | _unassigned_     |            |                                      |
| `U+05CE`  | _unassigned_     |            |                                      |
| `U+05CF`  | _unassigned_     |            |                                      |
| | | | |
| `U+05D0`  | Letter           | _0_        | &#x05D0; Alef                        |
| `U+05D1`  | Letter           | _0_        | &#x05D1; Bet                         |
| `U+05D2`  | Letter           | _0_        | &#x05D2; Gimel                       |
| `U+05D3`  | Letter           | _0_        | &#x05D3; Dalet                       |
| `U+05D4`  | Letter           | _0_        | &#x05D4; He                          |
| `U+05D5`  | Letter           | _0_        | &#x05D5; Vav                         |
| `U+05D6`  | Letter           | _0_        | &#x05D6; Zayin                       |
| `U+05D7`  | Letter           | _0_        | &#x05D7; Het                         |
| `U+05D8`  | Letter           | _0_        | &#x05D8; Tet                         |
| `U+05D9`  | Letter           | _0_        | &#x05D9; Yod                         |
| `U+05DA`  | Letter           | _0_        | &#x05DA; Final Kaf                   |
| `U+05DB`  | Letter           | _0_        | &#x05DB; Kaf                         |
| `U+05DC`  | Letter           | _0_        | &#x05DC; Lamed                       |
| `U+05DD`  | Letter           | _0_        | &#x05DD; Final Mem                   |
| `U+05DE`  | Letter           | _0_        | &#x05DE; Mem                         |
| `U+05DF`  | Letter           | _0_        | &#x05DF; Final Nun                   |
| | | | |
| `U+05E0`  | Letter           | _0_        | &#x05E0; Nun                         |
| `U+05E1`  | Letter           | _0_        | &#x05E1; Samekh                      |
| `U+05E2`  | Letter           | _0_        | &#x05E2; Ayin                        |
| `U+05E3`  | Letter           | _0_        | &#x05E3; Final Pe                    |
| `U+05E4`  | Letter           | _0_        | &#x05E4; Pe                          |
| `U+05E5`  | Letter           | _0_        | &#x05E5; Final Tsadi                 |
| `U+05E6`  | Letter           | _0_        | &#x05E6; Tsadi                       |
| `U+05E7`  | Letter           | _0_        | &#x05E7; Qof                         |
| `U+05E8`  | Letter           | _0_        | &#x05E8; Resh                        |
| `U+05E9`  | Letter           | _0_        | &#x05E9; Shin                        |
| `U+05EA`  | Letter           | _0_        | &#x05EA; Tav                         |
| `U+05EB`  | _unassigned_     |            |                                      |
| `U+05EC`  | _unassigned_     |            |                                      |
| `U+05ED`  | _unassigned_     |            |                                      |
| `U+05EE`  | _unassigned_     |            |                                      |
| `U+05EF`  | Letter           | _0_        | &#x05EF; Yod Triangle                |
| | | | |
| `U+05F0`  | Letter           | _0_        | &#x05F0; Ligature Yiddish Double Vav |
| `U+05F1`  | Letter           | _0_        | &#x05F1; Ligature Yiddish Vav Yod    |
| `U+05F2`  | Letter           | _0_        | &#x05F2; Ligature Yiddish Double Yod |
| `U+05F3`  | Punctuation      | _0_        | &#x05F3; Punctuation Geresh          |
| `U+05F4`  | Punctuation      | _0_        | &#x05F4; Punctuation Gershayim       |
| `U+05F5`  | _unassigned_     |            |                                      |
| `U+05F6`  | _unassigned_     |            |                                      |
| `U+05F7`  | _unassigned_     |            |                                      |
| `U+05F8`  | _unassigned_     |            |                                      |
| `U+05F9`  | _unassigned_     |            |                                      |
| `U+05FA`  | _unassigned_     |            |                                      |
| `U+05FB`  | _unassigned_     |            |                                      |
| `U+05FC`  | _unassigned_     |            |                                      |
| `U+05FD`  | _unassigned_     |            |                                      |
| `U+05FE`  | _unassigned_     |            |                                      |
| `U+05FF`  | _unassigned_     |            |                                      |
:::


## Alphabetic Presentation Forms character table ##

This chart includes only the Hebrew codepoints from the Alphabetic
Presentation Forms block in Unicode.

The _Composition_ column lists the codepoints from the Hebrew block
that compose into the listed Alphabetic Presentation Form. These
presentation form compositions are not covered by the standard Unicode
composition algorithm.

Entries with a _null_ in this column do not need to be composed by the
shaping engine. 


:::{table} Alphabetic Presentation Forms character table

| Codepoint | Unicode category | Mark class | Composition     | Glyph                                   |
|:----------|:-----------------|:-----------|:----------------|:----------------------------------------|
| `U+FB1D`  | Letter           | _0_        |`U+05D9`,`U+05B4`| &#xFB1D; Yod With Hiriq                 |
| `U+FB1E`  | Mark [Mn]        | 26         | _null_          | &#xFB1E; Point Juedo-Spanish Varika     |
| `U+FB1F`  | Letter           | _0_        |`U+05F2`,`U+05B7`| &#xFB1F; Ligature Yiddish Yod Yod Patah |
| | | | | |
| `U+FB20`  | Letter           | _0_        | _null_          | &#xFB20; Alternative Ayin               |
| `U+FB21`  | Letter           | _0_        | _null_          | &#xFB21; Wide Alef                      |
| `U+FB22`  | Letter           | _0_        | _null_          | &#xFB22; Wide Dalet                     |
| `U+FB23`  | Letter           | _0_        | _null_          | &#xFB23; Wide He                        |
| `U+FB24`  | Letter           | _0_        | _null_          | &#xFB24; Wide Kaf                       |
| `U+FB25`  | Letter           | _0_        | _null_          | &#xFB25; Wide Lamed                     |
| `U+FB26`  | Letter           | _0_        | _null_          | &#xFB26; Wide Final Mem                 |
| `U+FB27`  | Letter           | _0_        | _null_          | &#xFB27; Wide Resh                      |
| `U+FB28`  | Letter           | _0_        | _null_          | &#xFB28; Wide Tav                       |
| `U+FB29`  | Letter           | _0_        | _null_          | &#xFB29; Alternative Plus Sign          |
| `U+FB2A`  | Letter           | _0_        |`U+05E9`,`U+05C1`| &#xFB2A; Shin With Shin Dot             |
| `U+FB2B`  | Letter           | _0_        |`U+05E9`,`U+05C2`| &#xFB2B; Shin With Sin Dot              |
| `U+FB2C`  | Letter           | _0_        |`U+FB2A`,`U+05BC` OR `U+FB49`,`U+05C1`| &#xFB2C; Shin With Dagesh And Shin Dot  |
| `U+FB2D`  | Letter           | _0_        |`U+FB2B`,`U+05BC` OR `U+FB49`,`U+05C2`| &#xFB2D; Shin With Dagesh And Sin Dot   |
| `U+FB2E`  | Letter           | _0_        |`U+05D0`,`U+05B7`| &#xFB2E; Alef With Patah                |
| `U+FB2F`  | Letter           | _0_        |`U+05D0`,`U+05B8`| &#xFB2F; Alef With Qamats               |
| | | | | |
| `U+FB30`  | Letter           | _0_        |`U+05D0`,`U+05BC`| &#xFB30; Alef With Mapiq                |
| `U+FB31`  | Letter           | _0_        |`U+05D1`,`U+05BC`| &#xFB31; Bet With Dagesh                |
| `U+FB32`  | Letter           | _0_        |`U+05D2`,`U+05BC`| &#xFB32; Gimel With Dagesh              |
| `U+FB33`  | Letter           | _0_        |`U+05D3`,`U+05BC`| &#xFB33; Dalet With Dagesh              |
| `U+FB34`  | Letter           | _0_        |`U+05D4`,`U+05BC`| &#xFB34; He With Mapiq                  |
| `U+FB35`  | Letter           | _0_        |`U+05D5`,`U+05BC`| &#xFB35; Vav With Dagesh                |
| `U+FB36`  | Letter           | _0_        |`U+05D6`,`U+05BC`| &#xFB36; Zayin With Dagesh              |
| `U+FB37`  | _unassigned_     |            |                 |                                         |
| `U+FB38`  | Letter           | _0_        |`U+05D8`,`U+05BC`| &#xFB38; Tet With Dagesh                |
| `U+FB39`  | Letter           | _0_        |`U+05D9`,`U+05BC`| &#xFB39; Yod With Dagesh                |
| `U+FB3A`  | Letter           | _0_        |`U+05DA`,`U+05BC`| &#xFB3A; Final Kaf With Dagesh          |
| `U+FB3B`  | Letter           | _0_        |`U+05DB`,`U+05BC`| &#xFB3B; Kaf With Dagesh                |
| `U+FB3C`  | Letter           | _0_        |`U+05DC`,`U+05BC`| &#xFB3C; Lamed With Dagesh              |
| `U+FB3D`  | _unassigned_     |            |                 |                                         |
| `U+FB3E`  | Letter           | _0_        |`U+05DE`,`U+05BC`| &#xFB3E; Mem With Dagesh                |
| `U+FB3F`  | _unassigned_     |            |                 |                                         |
| | | | | |										   
| `U+FB40`  | Letter           | _0_        |`U+05E0`,`U+05BC`| &#xFB40; Nun With Dagesh                |
| `U+FB41`  | Letter           | _0_        |`U+05E1`,`U+05BC`| &#xFB41; Samekh With Dagesh             |
| `U+FB42`  | _unassigned_     |            |                 |                                         |
| `U+FB43`  | Letter           | _0_        |`U+05E3`,`U+05BC`| &#xFB43; Final Pe With Dagesh           |
| `U+FB44`  | Letter           | _0_        |`U+05E4`,`U+05BC`| &#xFB44; Pe With Dagesh                 |
| `U+FB45`  | _unassigned_     |            |                 |                                         |
| `U+FB46`  | Letter           | _0_        |`U+05E6`,`U+05BC`| &#xFB46; Tsadi With Dagesh              |
| `U+FB47`  | Letter           | _0_        |`U+05E7`,`U+05BC`| &#xFB47; Qof With Dagesh                |
| `U+FB48`  | Letter           | _0_        |`U+05E8`,`U+05BC`| &#xFB48; Resh With Dagesh               |
| `U+FB49`  | Letter           | _0_        |`U+05E9`,`U+05BC`| &#xFB49; Shin With Dagesh               |
| `U+FB4A`  | Letter           | _0_        |`U+05EA`,`U+05BC`| &#xFB4A; Tav With Dagesh                |
| `U+FB4B`  | Letter           | _0_        |`U+05D5`,`U+05B9`| &#xFB4B; Vav With Holam                 |
| `U+FB4C`  | Letter           | _0_        |`U+05D1`,`U+05BF`| &#xFB4C; Bet With Rafe                  |
| `U+FB4D`  | Letter           | _0_        |`U+05DB`,`U+05BF`| &#xFB4D; Kaf With Rafe                  |
| `U+FB4E`  | Letter           | _0_        |`U+05E4`,`U+05BF`| &#xFB4E; Pe With Rafe                   |
| `U+FB4F`  | Letter           | _0_        | _null_          | &#xFB4F; Ligature Alef Lamed            |
:::


## Miscellaneous character table ##

Other important characters that may be encountered when shaping runs
of Hebrew text include the dotted-circle placeholder (`U+25CC`), the
zero-width joiner (`U+200D`), and zero-width non-joiner (`U+200C`).

The dotted-circle placeholder is frequently used when displaying a
mark in isolation. Real-world text may also use other characters, such
as hyphens or dashes, in a similar placeholder fashion; shaping
engines should cope with this situation gracefully.


:::{table} Miscellaneous character table

| Codepoint | Unicode category | Mark class | Glyph                              |
|:----------|:-----------------|:-----------|:-----------------------------------|
|`U+00A0`   | Separator        | _0_        | &#x00A0; No-break space            |
|`U+034F`   | Other            | _0_        | &#x034F; Combining grapheme joiner |
|`U+200C`   | Other            | _0_        | &#x200C; Zero-width non-joiner     |
|`U+200D`   | Other            | _0_        | &#x200D; Zero-width joiner         |
|`U+200E`   | Other            | _0_        | &#x200E; Left-to-Right marker      |
|`U+200F`   | Other            | _0_        | &#x200F; Right-to-Left marker      |
|`U+25CC`   | Symbol           | _0_        | &#x25CC; Dotted circle             |
:::


================================================
FILE: character-tables/character-tables-kannada.md
================================================
# Kannada character tables #

This document lists the per-character shaping information needed to
[shape Kannada text](../opentype-shaping-kannada.md).

**Contents**

  - [Kannada character table](#kannada-character-table)
  - [Vedic Extensions character table](#vedic-extensions-character-table)
  - [Miscellaneous character table](#miscellaneous-character-table)
	  

## Kannada character table ##

Kannada glyphs should be classified as in the following
table. Codepoints in the Kannada block with no assigned meaning are
designated as _unassigned_ in the _Unicode category_ column. 

Assigned codepoints with a _null_ in the _Shaping class_
column evoke no special behavior from the shaping engine. Note that
this does include some valid codepoints, such as currency marks,
punctuation, and other symbols.

> Note: the `NUMBER` and `SYMBOL` _Shaping classes_ are important
> during syllable identification, but generally evoke no further
> special behavior during the rest of the shaping process. 

The _Mark-placement subclass_ column indicates mark-placement
positioning for codepoints in the _Mark_ category. Assigned, non-mark
codepoints have a _null_ in this column and evoke no special
mark-placement behavior. Marks tagged with [Mn] in the _Unicode
category_ column are categorized as non-spacing; marks tagged with
[Mc] are categorized as spacing-combining.

Some codepoints in the following table use a _Shaping class_ that
differs from the codepoint's Unicode _General Category_. The _Shaping
class_ takes precedence during OpenType shaping, as it captures more
specific, script-aware behavior.


:::{table} Kannada character table

| Codepoint | Unicode category | Shaping class     | Mark-placement subclass    | Glyph                        |
|:----------|:-----------------|:------------------|:---------------------------|:-----------------------------|
|`U+0C80`   | Letter           | PLACEHOLDER       | _null_                     | &#x0C80; Spacing Candrabindu |
|`U+0C81`   | Mark [Mn]        | BINDU             | TOP_POSITION               | &#x0C81; Candrabindu         |
|`U+0C82`   | Mark [Mc]        | BINDU             | RIGHT_POSITION             | &#x0C82; Anusvara            |
|`U+0C83`   | Mark [Mc]        | VISARGA           | RIGHT_POSITION             | &#x0C83; Visarga             |
|`U+0C84`   | Punctuation      | _null_            | _null_                     | &#x0C84; Siddham             |
|`U+0C85`   | Letter           | VOWEL_INDEPENDENT | _null_                     | &#x0C85; A                   |
|`U+0C86`   | Letter           | VOWEL_INDEPENDENT | _null_                     | &#x0C86; Aa                  |
|`U+0C87`   | Letter           | VOWEL_INDEPENDENT | _null_                     | &#x0C87; I                   |
|`U+0C88`   | Letter           | VOWEL_INDEPENDENT | _null_                     | &#x0C88; Ii                  |
|`U+0C89`   | Letter           | VOWEL_INDEPENDENT | _null_                     | &#x0C89; U                   |
|`U+0C8A`   | Letter           | VOWEL_INDEPENDENT | _null_                     | &#x0C8A; Uu                  |
|`U+0C8B`   | Letter           | VOWEL_INDEPENDENT | _null_                     | &#x0C8B; Vocalic R           |
|`U+0C8C`   | Letter           | VOWEL_INDEPENDENT | _null_                     | &#x0C8C; Vocalic L           |
|`U+0C8D`   | _unassigned_     |                   |                            |                              |
|`U+0C8E`   | Letter           | VOWEL_INDEPENDENT | _null_                     | &#x0C8E; E                   |
|`U+0C8F`   | Letter           | VOWEL_INDEPENDENT | _null_                     | &#x0C8F; Ee                  |
| | | | |																		
|`U+0C90`   | Letter           | VOWEL_INDEPENDENT | _null_                     | &#x0C90; Ai                  |
|`U+0C91`   | _unassigned_     |                   |                            |                              |
|`U+0C92`   | Letter           | VOWEL_INDEPENDENT | _null_                     | &#x0C92; O                   |
|`U+0C93`   | Letter           | VOWEL_INDEPENDENT | _null_                     | &#x0C93; Oo                  |
|`U+0C94`   | Letter           | VOWEL_INDEPENDENT | _null_                     | &#x0C94; Au                  |
|`U+0C95`   | Letter           | CONSONANT         | _null_                     | &#x0C95; Ka                  |
|`U+0C96`   | Letter           | CONSONANT         | _null_                     | &#x0C96; Kha                 |
|`U+0C97`   | Letter           | CONSONANT         | _null_                     | &#x0C97; Ga                  |
|`U+0C98`   | Letter           | CONSONANT         | _null_                     | &#x0C98; Gha                 |
|`U+0C99`   | Letter           | CONSONANT         | _null_                     | &#x0C99; Nga                 |
|`U+0C9A`   | Letter           | CONSONANT         | _null_                     | &#x0C9A; Ca                  |
|`U+0C9B`   | Letter           | CONSONANT         | _null_                     | &#x0C9B; Cha                 |
|`U+0C9C`   | Letter           | CONSONANT         | _null_                     | &#x0C9C; Ja                  |
|`U+0C9D`   | Letter           | CONSONANT         | _null_                     | &#x0C9D; Jha                 |
|`U+0C9E`   | Letter           | CONSONANT         | _null_                     | &#x0C9E; Nya                 |
|`U+0C9F`   | Letter           | CONSONANT         | _null_                     | &#x0C9F; Tta                 |
| | | | |																		
|`U+0CA0`   | Letter           | CONSONANT         | _null_                     | &#x0CA0; Ttha                |
|`U+0CA1`   | Letter           | CONSONANT         | _null_                     | &#x0CA1; Dda                 |
|`U+0CA2`   | Letter           | CONSONANT         | _null_                     | &#x0CA2; Ddha                |
|`U+0CA3`   | Letter           | CONSONANT         | _null_                     | &#x0CA3; Nna                 |
|`U+0CA4`   | Letter           | CONSONANT         | _null_                     | &#x0CA4; Ta                  |
|`U+0CA5`   | Letter           | CONSONANT         | _null_                     | &#x0CA5; Tha                 |
|`U+0CA6`   | Letter           | CONSONANT         | _null_                     | &#x0CA6; Da                  |
|`U+0CA7`   | Letter           | CONSONANT         | _null_                     | &#x0CA7; Dha                 |
|`U+0CA8`   | Letter           | CONSONANT         | _null_                     | &#x0CA8; Na                  |
|`U+0CA9`   | _unassigned_     |                   |                            |                              |
|`U+0CAA`   | Letter           | CONSONANT         | _null_                     | &#x0CAA; Pa                  |
|`U+0CAB`   | Letter           | CONSONANT         | _null_                     | &#x0CAB; Pha                 |
|`U+0CAC`   | Letter           | CONSONANT         | _null_                     | &#x0CAC; Ba                  |
|`U+0CAD`   | Letter           | CONSONANT         | _null_                     | &#x0CAD; Bha                 |
|`U+0CAE`   | Letter           | CONSONANT         | _null_                     | &#x0CAE; Ma                  |
|`U+0CAF`   | Letter           | CONSONANT         | _null_                     | &#x0CAF; Ya                  |
| | | | |																		
|`U+0CB0`   | Letter           | CONSONANT         | _null_                     | &#x0CB0; Ra                  |
|`U+0CB1`   | Letter           | CONSONANT         | _null_                     | &#x0CB1; Rra                 |
|`U+0CB2`   | Letter           | CONSONANT         | _null_                     | &#x0CB2; La                  |
|`U+0CB3`   | Letter           | CONSONANT         | _null_                     | &#x0CB3; Lla                 |
|`U+0CB4`   | _unassigned_     |                   |                            |                              |
|`U+0CB5`   | Letter           | CONSONANT         | _null_                     | &#x0CB5; Va                  |
|`U+0CB6`   | Letter           | CONSONANT         | _null_                     | &#x0CB6; Sha                 |
|`U+0CB7`   | Letter           | CONSONANT         | _null_                     | &#x0CB7; Ssa                 |
|`U+0CB8`   | Letter           | CONSONANT         | _null_                     | &#x0CB8; Sa                  |
|`U+0CB9`   | Letter           | CONSONANT         | _null_                     | &#x0CB9; Ha                  |
|`U+0CBA`   | _unassigned_     |                   |                            |                              |
|`U+0CBB`   | _unassigned_     |                   |                            |                              |
|`U+0CBC`   | Mark [Mn]        | NUKTA             | BOTTOM_POSITION            | &#x0CBC; Nukta               |
|`U+0CBD`   | Letter           | AVAGRAHA          | _null_                     | &#x0CBD; Avagraha            |
|`U+0CBE`   | Mark [Mc]        | VOWEL_DEPENDENT   | RIGHT_POSITION             | &#x0CBE; Sign Aa             |
|`U+0CBF`   | Mark [Mn]        | VOWEL_DEPENDENT   | TOP_POSITION               | &#x0CBF; Sign I              |
| | | | |																		
|`U+0CC0`   | Mark [Mc]        | VOWEL_DEPENDENT   | TOP_AND_RIGHT_POSITION     | &#x0CC0; Sign Ii             |
|`U+0CC1`   | Mark [Mc]        | VOWEL_DEPENDENT   | RIGHT_POSITION             | &#x0CC1; Sign U              |
|`U+0CC2`   | Mark [Mc]        | VOWEL_DEPENDENT   | RIGHT_POSITION             | &#x0CC2; Sign Uu             |
|`U+0CC3`   | Mark [Mc]        | VOWEL_DEPENDENT   | RIGHT_POSITION             | &#x0CC3; Sign Vocalic R      |
|`U+0CC4`   | Mark [Mc]        | VOWEL_DEPENDENT   | RIGHT_POSITION             | &#x0CC4; Sign Vocalic Rr     |
|`U+0CC5`   | _unassigned_     |                   |                            |                              |
|`U+0CC6`   | Mark [Mn]        | VOWEL_DEPENDENT   | TOP_POSITION               | &#x0CC6; Sign E              |
|`U+0CC7`   | Mark [Mc]        | VOWEL_DEPENDENT   | TOP_AND_RIGHT_POSITION     | &#x0CC7; Sign Ee             |
|`U+0CC8`   | Mark [Mc]        | VOWEL_DEPENDENT   | TOP_AND_RIGHT_POSITION     | &#x0CC8; Sign Ai             |
|`U+0CC9`   | _unassigned_     |                   |                            |                              |
|`U+0CCA`   | Mark [Mc]        | VOWEL_DEPENDENT   | TOP_AND_RIGHT_POSITION     | &#x0CCA; Sign O              |
|`U+0CCB`   | Mark [Mc]        | VOWEL_DEPENDENT   | TOP_AND_RIGHT_POSITION     | &#x0CCB; Sign Oo             |
|`U+0CCC`   | Mark [Mn]        | VOWEL_DEPENDENT   | TOP_POSITION               | &#x0CCC; Sign Au             |
|`U+0CCD`   | Mark [Mn]        | VIRAMA            | TOP_POSITION               | &#x0CCD; Virama              |
|`U+0CCE`   | _unassigned_     |                   |                            |                              |
|`U+0CCF`   | _unassigned_     |                   |                            |                              |
| | | | |																		
|`U+0CD0`   | _unassigned_     |                   |                            |                              |
|`U+0CD1`   | _unassigned_     |                   |                            |                              |
|`U+0CD2`   | _unassigned_     |                   |                            |                              |
|`U+0CD3`   | _unassigned_     |                   |                            |                              |
|`U+0CD4`   | _unassigned_     |                   |                            |                              |
|`U+0CD5`   | Mark [Mc]        | VOWEL_DEPENDENT   | RIGHT_POSITION             | &#x0CD5; Length Mark         |
|`U+0CD6`   | Mark [Mc]        | VOWEL_DEPENDENT   | RIGHT_POSITION             | &#x0CD6; Ai Length Mark      |
|`U+0CD7`   | _unassigned_     |                   |                            |                              |
|`U+0CD8`   | _unassigned_     |                   |                            |                              |
|`U+0CD9`   | _unassigned_     |                   |                            |                              |
|`U+0CDA`   | _unassigned_     |                   |                            |                              |
|`U+0CDB`   | _unassigned_     |                   |                            |                              |
|`U+0CDC`   | _unassigned_     |                   |                            |                              |
|`U+0CDD`   | _unassigned_     |                   |                            |                              |
|`U+0CDE`   | Letter           | CONSONANT         | _null_                     | &#x0CDE; Fa                  |
|`U+0CDF`   | _unassigned_     |                   |                            |                              |
| | | | |																		
|`U+0CE0`   | Letter           | VOWEL_INDEPENDENT | _null_                     | &#x0CE0; Vocalic Rr          |
|`U+0CE1`   | Letter           | VOWEL_INDEPENDENT | _null_                     | &#x0CE1; Vocalic Ll          |
|`U+0CE2`   | Mark [Mn]        | VOWEL_DEPENDENT   | BOTTOM_POSITION            | &#x0CE2; Sign Vocalic L      |
|`U+0CE3`   | Mark [Mn]        | VOWEL_DEPENDENT   | BOTTOM_POSITION            | &#x0CE3; Sign Vocalic Ll     |
|`U+0CE4`   | _unassigned_     |                   |                            |                              |
|`U+0CE5`   | _unassigned_     |                   |                            |                              |
|`U+0CE6`   | Number           | NUMBER            | _null_                     | &#x0CE6; Digit Zero          |
|`U+0CE7`   | Number           | NUMBER            | _null_                     | &#x0CE7; Digit One           |
|`U+0CE8`   | Number           | NUMBER            | _null_                     | &#x0CE8; Digit Two           |
|`U+0CE9`   | Number           | NUMBER            | _null_                     | &#x0CE9; Digit Three         |
|`U+0CEA`   | Number           | NUMBER            | _null_                     | &#x0CEA; Digit Four          |
|`U+0CEB`   | Number           | NUMBER            | _null_                     | &#x0CEB; Digit Five          |
|`U+0CEC`   | Number           | NUMBER            | _null_                     | &#x0CEC; Digit Six           |
|`U+0CED`   | Number           | NUMBER            | _null_                     | &#x0CED; Digit Seven         |
|`U+0CEE`   | Number           | NUMBER            | _null_                     | &#x0CEE; Digit Eight         |
|`U+0CEF`   | Number           | NUMBER            | _null_                     | &#x0CEF; Digit Nine          |
| | | | |																		
|`U+0CF0`   | _unassigned_     |                   |                            |                              |
|`U+0CF1`   | Letter           | CONSONANT_WITH_STACKER | _null_                | &#x0CF1; Jihvamuliya         |
|`U+0CF2`   | Letter           | CONSONANT_WITH_STACKER | _null_                | &#x0CF2; Upadhmaniya         |
|`U+0CF3`   | Mark [Mc]        | BINDU             | RIGHT_POSITION             | &#x0CF3; Combining Anusvara Above Right|
|`U+0CF4`   | _unassigned_     |                   |                            |                              |
|`U+0CF5`   | _unassigned_     |                   |                            |                              |
|`U+0CF6`   | _unassigned_     |                   |                            |                              |
|`U+0CF7`   | _unassigned_     |                   |                            |                              |
|`U+0CF8`   | _unassigned_     |                   |                            |                              |
|`U+0CF9`   | _unassigned_     |                   |                            |                              |
|`U+0CFA`   | _unassigned_     |                   |                            |                              |
|`U+0CFB`   | _unassigned_     |                   |                            |                              |
|`U+0CFC`   | _unassigned_     |                   |                            |                              |
|`U+0CFD`   | _unassigned_     |                   |                            |                              |
|`U+0CFE`   | _unassigned_     |                   |                            |                              |
|`U+0CFF`   | _unassigned_     |                   |                            |                              |
:::


## Vedic Extensions character table ##

Sanskrit runs written in the Kannada script may also include
characters from the Vedic Extensions block. These characters should be
classified as follows.

> Note: See the [Vedic Extensions](../opentype-shaping-vedic-extensions.md) 
> document for additional information.


:::{table} Vedic Extensions character table

| Codepoint | Unicode category | Shaping class     | Mark-placement subclass    | Glyph                        |
|:----------|:-----------------|:------------------|:---------------------------|:-----------------------------|
|`U+1CD0`   | Mark [Mn]        | CANTILLATION      | TOP_POSITION               | &#x1CD0; Tone Karshana       |
|`U+1CD1`   | Mark [Mn]        | CANTILLATION      | TOP_POSITION               | &#x1CD1; Tone Shara          |
|`U+1CD2`   | Mark [Mn]        | CANTILLATION      | TOP_POSITION               | &#x1CD2; Tone Prenkha        |
|`U+1CD3`   | Punctuation      | _null_            | _null_                     | &#x1CD3; Sign Nihshvasa      |
|`U+1CD4`   | Mark [Mn]        | CANTILLATION      | OVERSTRUCK                 | &#x1CD4; Tone Midline Svarita |
|`U+1CD5`   | Mark [Mn]        | CANTILLATION      | BOTTOM_POSITION            | &#x1CD5; Tone Aggravated Independent Svarita |
|`U+1CD6`   | Mark [Mn]        | CANTILLATION      | BOTTOM_POSITION            | &#x1CD6; Tone Independent Svarita |
|`U+1CD7`   | Mark [Mn]        | CANTILLATION      | BOTTOM_POSITION            | &#x1CD7; Tone Kathaka Independent Svarita |
|`U+1CD8`   | Mark [Mn]        | CANTILLATION      | BOTTOM_POSITION            | &#x1CD8; Tone Candra Below   |
|`U+1CD9`   | Mark [Mn]        | CANTILLATION      | BOTTOM_POSITION            | &#x1CD9; Tone Kathaka Independent Svarita Schroeder |
|`U+1CDA`   | Mark [Mn]        | CANTILLATION      | TOP_POSITION               | &#x1CDA; Tone Double Svarita |
|`U+1CDB`   | Mark [Mn]        | CANTILLATION      | TOP_POSITION               | &#x1CDB; Tone Triple Svarita |
|`U+1CDC`   | Mark [Mn]        | CANTILLATION      | BOTTOM_POSITION            | &#x1CDC; Tone Kathaka Anudatta |
|`U+1CDD`   | Mark [Mn]        | CANTILLATION      | BOTTOM_POSITION            | &#x1CDD; Tone Dot Below      |
|`U+1CDE`   | Mark [Mn]        | CANTILLATION      | BOTTOM_POSITION            | &#x1CDE; Tone Two Dots Below |
|`U+1CDF`   | Mark [Mn]        | CANTILLATION      | BOTTOM_POSITION            | &#x1CDF; Tone Three Dots Below |
| | | | |																		
|`U+1CE0`   | Mark [Mn]        | CANTILLATION      | TOP_POSITION               | &#x1CE0; Tone Rigvedic Kashmiri Independent Svarita |
|`U+1CE1`   | Mark [Mc]        | CANTILLATION      | RIGHT_POSITION             | &#x1CE1; Tone Atharavedic Independent Svarita |
|`U+1CE2`   | Mark [Mn]        | AVAGRAHA          | OVERSTRUCK                 | &#x1CE2; Sign Visarga Svarita |
|`U+1CE3`   | Mark [Mn]        | _null_            | OVERSTRUCK                 | &#x1CE3; Sign Visarga Udatta |
|`U+1CE4`   | Mark [Mn]        | _null_            | OVERSTRUCK                 | &#x1CE4; Sign Reversed Visarga Udatta |
|`U+1CE5`   | Mark [Mn]        | _null_            | OVERSTRUCK                 | &#x1CE5; Sign Visarga Anudatta |
|`U+1CE6`   | Mark [Mn]        | _null_            | OVERSTRUCK                 | &#x1CE6; Sign Reversed Visarga Anudatta |
|`U+1CE7`   | Mark [Mn]        | _null_            | OVERSTRUCK                 | &#x1CE7; Sign Visarga Udatta With Tail |
|`U+1CE8`   | Mark [Mn]        | AVAGRAHA          | OVERSTRUCK                 | &#x1CE8; Sign Visarga Anudatta With Tail |
|`U+1CE9`   | Letter           | SYMBOL            | _null_                     | &#x1CE9; Sign Anusvara Antargomukha |
|`U+1CEA`   | Letter           | _null_            | _null_                     | &#x1CEA; Sign Anusvara Bahirgomukha |
|`U+1CEB`   | Letter           | _null_            | _null_                     | &#x1CEB; Sign Anusvara Vamagomukha |
|`U+1CEC`   | Letter           | SYMBOL            | _null_                     | &#x1CEC; Sign Anusvara Vamagomukha With Tail |
|`U+1CED`   | Mark [Mn]        | AVAGRAHA          | BOTTOM_POSITION            | &#x1CED; Sign Tiryak         |
|`U+1CEE`   | Letter           | SYMBOL            | _null_                     | &#x1CEE; Sign Hexiform Long Anusvara |
|`U+1CEF`   | Letter           | _null_            | _null_                     | &#x1CEF; Sign Long Anusvara  |
| | | | |																		
|`U+1CF0`   | Letter           | _null_            | _null_                     | &#x1CF0; Sign Rthang Long Anusvara |
|`U+1CF2`   | Letter           | CONSONANT_DEAD    | _null_                     | &#x1CF2; Sign Ardhavisarga   |
|`U+1CF3`   | Letter           | CONSONANT_DEAD    | _null_                     | &#x1CF3; Sign Rotated Ardhavisarga |
|`U+1CF3`   | Mark [Mc]        | VISARGA           | _null_                     | &#x1CF3; Sign Rotated Ardhavisarga |
|`U+1CF4`   | Mark [Mn]        | CANTILLATION      | TOP_POSITION               | &#x1CF4; Tone Candra Above   |
|`U+1CF5`   | Letter           | CONSONANT_WITH_STACKER | _null_                | &#x1CF5; Sign Jihvamuliya    |
|`U+1CF6`   | Letter           | CONSONANT_WITH_STACKER | _null_                | &#x1CF6; Sign Upadhmaniya    |
|`U+1CF7`   | Mark [Mc]        | _null_            | _null_                     | &#x1CF7; Sign Atikrama       |
|`U+1CF8`   | Mark [Mn]        | CANTILLATION      | _null_                     | &#x1CF8; Tone Ring Above     |
|`U+1CF9`   | Mark [Mn]        | CANTILLATION      | _null_                     | &#x1CF9; Tone Double Ring Above |
|`U+1CFA`   | Letter           | PLACEHOLDER       | _null_                     | &#x1CFA; Sign Double Anusvara Antargomukha |
|`U+1CFB`   | _unassigned_     |                   |                            |                              |
|`U+1CFC`   | _unassigned_     |                   |                            |                              |
|`U+1CFD`   | _unassigned_     |                   |                            |                              |
|`U+1CFE`   | _unassigned_     |                   |                            |                              |
|`U+1CFF`   | _unassigned_     |                   |                            |                              |
:::


## Miscellaneous character table ##

In addition to general punctuation, runs of Kannada text often use the
danda (`U+0964`) and double danda (`U+0965`) punctuation marks from
the Devanagari block. Kannada text can also incorporate the udatta
(`U+0951`) and anudatta (`U+0952`) signs from the Devanagari block.


:::{table} Additional punctuation character table

| Codepoint | Unicode category | Shaping class     | Mark-placement subclass    | Glyph                          |
|:----------|:-----------------|:------------------|:---------------------------|:-------------------------------|
|`U+0951`   | Mark [Mn]        | CANTILLATION      | TOP_POSITION               | &#x0951; Udatta              |
|`U+0952`   | Mark [Mn]        | CANTILLATION      | BOTTOM_POSITION            | &#x0952; Anudatta            |
|`U+0964`   | Punctuation      | _null_            | _null_                     | &#x0964; Danda               |
|`U+0965`   | Punctuation      | _null_            | _null_                     | &#x0965; Double Danda        |
:::


Other important characters that may be encountered when shaping runs
of Kannada text include the dotted-circle placeholder (`U+25CC`), the
zero-width joiner (`U+200D`) and zero-width non-joiner (`U+200C`), and
the no-break space (`U+00A0`).

The dotted-circle placeholder is frequently used when displaying a
dependent vowel (matra) or a combining mark in isolation. Real-world
text syllables may also use other characters, such as hyphens or dashes,
in a similar placeholder fashion; shaping engines should cope with
this situation gracefully.


:::{table} Miscellaneous character table

| Codepoint | Unicode category | Shaping class     | Mark-placement subclass    | Glyph                          |
|:----------|:-----------------|:------------------|:---------------------------|:-------------------------------|
|`U+00A0`   | Separator        | PLACEHOLDER       | _null_                     | &#x00A0; No-break space        |
|`U+200C`   | Other            | NON_JOINER        | _null_                     | &#x200C; Zero-width non-joiner |
|`U+200D`   | Other            | JOINER            | _null_                     | &#x200D; Zero-width joiner     |
|`U+2010`   | Punctuation      | PLACEHOLDER       | _null_                     | &#x2010; Hyphen                |
|`U+2011`   | Punctuation      | PLACEHOLDER       | _null_                     | &#x2011; No-break hyphen       |
|`U+2012`   | Punctuation      | PLACEHOLDER       | _null_                     | &#x2012; Figure dash           |
|`U+2013`   | Punctuation      | PLACEHOLDER       | _null_                     | &#x2013; En dash               |
|`U+2014`   | Punctuation      | PLACEHOLDER       | _null_                     | &#x2014; Em dash               |
|`U+25CC`   | Symbol           | DOTTED_CIRCLE     | _null_                     | &#x25CC; Dotted circle         |
:::


The zero-width joiner (<abbr>ZWJ</abbr>) is primarily used to prevent the formation
of a conjunct from a "_Consonant_,Halant,_Consonant_" sequence. The
sequence "_Consonant_,Halant,ZWJ,_Consonant_" blocks the formation of
a conjunct between the two consonants. 

Note, however, that the "_Consonant_,Halant" subsequence in the above
example may still trigger a half-forms feature. To prevent the
application of the half-forms feature in addition to preventing the
conjunct, the zero-width non-joiner (<abbr>ZWNJ</abbr>) must be used instead. The
sequence "_Consonant_,Halant,ZWNJ,_Consonant_" should produce the
first consonant in its standard form, followed by an explicit

A secondary usage of the zero-width joiner is to prevent the formation of
"Reph". An initial "Ra,Halant,ZWJ" sequence should not produce a "Reph",
where an initial "Ra,Halant" sequence without the zero-width joiner
otherwise would.

The no-break space (<abbr>NBSP</abbr>) is primarily used to display those
codepoints that are defined as non-spacing (marks, dependent vowels
(matras), below-base consonant forms, and post-base consonant forms)
in an isolated context, as an alternative to displaying them
superimposed on the dotted-circle placeholder. These sequences will
match "NBSP,ZWJ,Halant,_Consonant_", "NBSP,_mark_", or "NBSP,_matra_".


================================================
FILE: character-tables/character-tables-khmer.md
================================================
# Khmer character tables #

This document lists the per-character shaping information needed to
[shape Khmer text](../opentype-shaping-khmer.md).

**Contents**

  - [Khmer character table](#khmer-character-table)
  - [Khmer Symbols character table](#khmer-symbols-character-table)
  - [Miscellaneous character table](#miscellaneous-character-table)
	  

## Khmer character table ##

Khmer glyphs should be classified as in the following
table. Codepoints in the Khmer block with no assigned meaning are
designated as _unassigned_ in the _Unicode category_ column. 

Assigned codepoints with a _null_ in the _Shaping class_
column evoke no special behavior from the shaping engine. Note that
this does include some valid codepoints, such as currency marks,
punctuation, and other symbols.

> Note: the `NUMBER` and `SYMBOL` _Shaping classes_ are important
> during syllable identification, but generally evoke no further
> special behavior during the rest of the shaping process. 

The _Mark-placement subclass_ column indicates mark-placement
positioning for codepoints in the _Mark_ category. Assigned, non-mark
codepoints have a _null_ in this column and evoke no special
mark-placement behavior. Marks tagged with [Mn] in the _Unicode
category_ column are categorized as non-spacing; marks tagged with
[Mc] are categorized as spacing-combining.

Some codepoints in the following table use a _Shaping class_ that
differs from the codepoint's Unicode _General Category_. The _Shaping
class_ takes precedence during OpenType shaping, as it captures more
specific, script-aware behavior.


:::{table} Khmer character table

| Codepoint | Unicode category | Shaping class     | Mark-placement subclass    | Glyph                        |
|:----------|:-----------------|:------------------|:---------------------------|:-----------------------------|
|`U+1780`   | Letter           | CONSONANT         | _null_                     | &#x1780; Ka                  |
|`U+1781`   | Letter           | CONSONANT         | _null_                     | &#x1781; Kha                 |
|`U+1782`   | Letter           | CONSONANT         | _null_                     | &#x1782; Ko                  |
|`U+1783`   | Letter           | CONSONANT         | _null_                     | &#x1783; Kho                 |
|`U+1784`   | Letter           | CONSONANT         | _null_                     | &#x1784; Ngo                 |
|`U+1785`   | Letter           | CONSONANT         | _null_                     | &#x1785; Ca                  |
|`U+1786`   | Letter           | CONSONANT         | _null_                     | &#x1786; Cha                 |
|`U+1787`   | Letter           | CONSONANT         | _null_                     | &#x1787; Co                  |
|`U+1788`   | Letter           | CONSONANT         | _null_                     | &#x1788; Cho                 |
|`U+1789`   | Letter           | CONSONANT         | _null_                     | &#x1789; Nyo                 |
|`U+178A`   | Letter           | CONSONANT         | _null_                     | &#x178A; Da                  |
|`U+178B`   | Letter           | CONSONANT         | _null_                     | &#x178B; Ttha                |
|`U+178C`   | Letter           | CONSONANT         | _null_                     | &#x178C; Do                  |
|`U+178D`   | Letter           | CONSONANT         | _null_                     | &#x178D; Ttho                |
|`U+178E`   | Letter           | CONSONANT         | _null_                     | &#x178E; Nno                 |
|`U+178F`   | Letter           | CONSONANT         | _null_                     | &#x178F; Ta                  |
| | | | |																	   
|`U+1790`   | Letter           | CONSONANT         | _null_                     | &#x1790; Tha                 |
|`U+1791`   | Letter           | CONSONANT         | _null_                     | &#x1791; To                  |
|`U+1792`   | Letter           | CONSONANT         | _null_                     | &#x1792; Tho                 |
|`U+1793`   | Letter           | CONSONANT         | _null_                     | &#x1793; No                  |
|`U+1794`   | Letter           | CONSONANT         | _null_                     | &#x1794; Ba                  |
|`U+1795`   | Letter           | CONSONANT         | _null_                     | &#x1795; Pha                 |
|`U+1796`   | Letter           | CONSONANT         | _null_                     | &#x1796; Po                  |
|`U+1797`   | Letter           | CONSONANT         | _null_                     | &#x1797; Pho                 |
|`U+1798`   | Letter           | CONSONANT         | _null_                     | &#x1798; Mo                  |
|`U+1799`   | Letter           | CONSONANT         | _null_                     | &#x1799; Yo                  |
|`U+179A`   | Letter           | CONSONANT         | _null_                     | &#x179A; Ro                  |
|`U+179B`   | Letter           | CONSONANT         | _null_                     | &#x179B; Lo                  |
|`U+179C`   | Letter           | CONSONANT         | _null_                     | &#x179C; Vo                  |
|`U+179D`   | Letter           | CONSONANT         | _null_                     | &#x179D; Sha                 |
|`U+179E`   | Letter           | CONSONANT         | _null_                     | &#x179E; Sso                 |
|`U+179F`   | Letter           | CONSONANT         | _null_                     | &#x179F; Sa                  |
| | | | |																	   
|`U+17A0`   | Letter           | CONSONANT         | _null_                     | &#x17A0; Ha                  |
|`U+17A1`   | Letter           | CONSONANT         | _null_                     | &#x17A1; La                  |
|`U+17A2`   | Letter           | CONSONANT         | _null_                     | &#x17A2; Qa                  |
|`U+17A3`   | Letter           | VOWEL_INDEPENDENT | _null_                     | &#x17A3; Qaq                 |
|`U+17A4`   | Letter           | VOWEL_INDEPENDENT | _null_                     | &#x17A4; Qaa                 |
|`U+17A5`   | Letter           | VOWEL_INDEPENDENT | _null_                     | &#x17A5; Qi                  |
|`U+17A6`   | Letter           | VOWEL_INDEPENDENT | _null_                     | &#x17A6; Qii                 |
|`U+17A7`   | Letter           | VOWEL_INDEPENDENT | _null_                     | &#x17A7; Qu                  |
|`U+17A8`   | Letter           | VOWEL_INDEPENDENT | _null_                     | &#x17A8; Quk                 |
|`U+17A9`   | Letter           | VOWEL_INDEPENDENT | _null_                     | &#x17A9; Quu                 |
|`U+17AA`   | Letter           | VOWEL_INDEPENDENT | _null_                     | &#x17AA; Quuv                |
|`U+17AB`   | Letter           | VOWEL_INDEPENDENT | _null_                     | &#x17AB; Ry                  |
|`U+17AC`   | Letter           | VOWEL_INDEPENDENT | _null_                     | &#x17AC; Ryy                 |
|`U+17AD`   | Letter           | VOWEL_INDEPENDENT | _null_                     | &#x17AD; Ly                  |
|`U+17AE`   | Letter           | VOWEL_INDEPENDENT | _null_                     | &#x17AE; Lyy                 |
|`U+17AF`   | Letter           | VOWEL_INDEPENDENT | _null_                     | &#x17AF; Qe                  |
| | | | |																	    
|`U+17B0`   | Letter           | VOWEL_INDEPENDENT | _null_                     | &#x17B0; Qai                 |
|`U+17B1`   | Letter           | VOWEL_INDEPENDENT | _null_                     | &#x17B1; Qoo Type One        |
|`U+17B2`   | Letter           | VOWEL_INDEPENDENT | _null_                     | &#x17B2; Qoo Type Two        |
|`U+17B3`   | Letter           | VOWEL_INDEPENDENT | _null_                     | &#x17B3; Qau                 |
|`U+17B4`   | Mark [Mn]        | _null_            | _null_                     | &#x17B4; Inherent Aq         |
|`U+17B5`   | Mark [Mn]        | _null_            | _null_                     | &#X17B5; Inherent Aa         |
|`U+17B6`   | Mark [Mc]        | VOWEL_DEPENDENT   | RIGHT_POSITION             | &#x17B6; Sign Aa             |
|`U+17B7`   | Mark [Mn]        | VOWEL_DEPENDENT   | TOP_POSITION               | &#x17B7; Sign I              |
|`U+17B8`   | Mark [Mn]        | VOWEL_DEPENDENT   | TOP_POSITION               | &#x17B8; Sign Ii             |
|`U+17B9`   | Mark [Mn]        | VOWEL_DEPENDENT   | TOP_POSITION               | &#x17B9; Sign Y              |
|`U+17BA`   | Mark [Mn]        | VOWEL_DEPENDENT   | TOP_POSITION               | &#x17BA; Sign Yy             |
|`U+17BB`   | Mark [Mn]        | VOWEL_DEPENDENT   | BOTTOM_POSITION            | &#x17BB; Sign U              |
|`U+17BC`   | Mark [Mn]        | VOWEL_DEPENDENT   | BOTTOM_POSITION            | &#x17BC; Sign Uu             |
|`U+17BD`   | Mark [Mn]        | VOWEL_DEPENDENT   | BOTTOM_POSITION            | &#x17BD; Sign Ua             |
|`U+17BE`   | Mark [Mc]        | VOWEL_DEPENDENT   | TOP_AND_LEFT_POSITION      | &#x17BE; Sign Oe             |
|`U+17BF`   | Mark [Mc]        | VOWEL_DEPENDENT   | TOP_LEFT_AND_RIGHT_POSITION| &#x17BF; Sign Ya             |
| | | | |																	   
|`U+17C0`   | Mark [Mc]        | VOWEL_DEPENDENT   | LEFT_AND_RIGHT_POSITION    | &#x17C0; Sign Ie             |
|`U+17C1`   | Mark [Mc]        | VOWEL_DEPENDENT   | LEFT_POSITION              | &#x17C1; Sign E              |
|`U+17C2`   | Mark [Mc]        | VOWEL_DEPENDENT   | LEFT_POSITION              | &#x17C2; Sign Ae             |
|`U+17C3`   | Mark [Mc]        | VOWEL_DEPENDENT   | LEFT_POSITION              | &#x17C3; Sign Ai             |
|`U+17C4`   | Mark [Mc]        | VOWEL_DEPENDENT   | LEFT_AND_RIGHT_POSITION    | &#x17C4; Sign Oo             |
|`U+17C5`   | Mark [Mc]        | VOWEL_DEPENDENT   | LEFT_AND_RIGHT_POSITION    | &#x17C5; Sign Au             |
|`U+17C6`   | Mark [Mn]        | NUKTA             | TOP_POSITION               | &#x17C6; Nikahit             |
|`U+17C7`   | Mark [Mc]        | VISARGA           | RIGHT_POSITION             | &#x17C7; Reahmuk             |
|`U+17C8`   | Mark [Mc]        | VOWEL_DEPENDENT   | RIGHT_POSITION             | &#x17C8; Yuukaleapintu       |
|`U+17C9`   | Mark [Mn]        | REGISTER_SHIFTER  | TOP_POSITION               | &#x17C9; Muusikatoan         |
|`U+17CA`   | Mark [Mn]        | REGISTER_SHIFTER  | TOP_POSITION               | &#x17CA; Triisap             |
|`U+17CB`   | Mark [Mn]        | SYLLABLE_MODIFIER | TOP_POSITION               | &#x17CB; Bantoc              |
|`U+17CC`   | Mark [Mn]        | CONSONANT_POST_REPHA| TOP_POSITION             | &#x17CC; Robat               |
|`U+17CD`   | Mark [Mn]        | CONSONANT_KILLER  | TOP_POSITION               | &#x17CD; Toandakhiat         |
|`U+17CE`   | Mark [Mn]        | SYLLABLE_MODIFIER | TOP_POSITION               | &#x17CE; Kakabat             |
|`U+17CF`   | Mark [Mn]        | SYLLABLE_MODIFIER | TOP_POSITION               | &#x17CF; Ahsda               |
| | | | |																	   
|`U+17D0`   | Mark [Mn]        | SYLLABLE_MODIFIER | TOP_POSITION               | &#x17D0; Samyok Sannya       |
|`U+17D1`   | Mark [Mn]        | PURE_KILLER       | TOP_POSITION               | &#x17D1; Viriam              |
|`U+17D2`   | Mark [Mn]        | INVISIBLE_STACKER | _null_                     | &#x17D2; Sign Coeng          |
|`U+17D3`   | Mark [Mn]        | SYLLABLE_MODIFIER | TOP_POSITION               | &#x17D3; Bathamasat          |
|`U+17D4`   | Punctuation      | _null_            | _null_                     | &#x17D4; Khan                |
|`U+17D5`   | Punctuation      | _null_            | _null_                     | &#x17D5; Bariyoosan          |
|`U+17D6`   | Punctuation      | _null_            | _null_                     | &#x17D6; Camnuc Pii Kuuh     |
|`U+17D7`   | Letter           | _null_            | _null_                     | &#x17D7; Lek Too             |
|`U+17D8`   | Punctuation      | _null_            | _null_                     | &#x17D8; Beyyal              |
|`U+17D9`   | Punctuation      | _null_            | _null_                     | &#x17D9; Phnaek Muan         |
|`U+17DA`   | Punctuation      | _null_            | _null_                     | &#x17DA; Koomuut             |
|`U+17DB`   | Symbol           | SYMBOL            | _null_                     | &#x17DB; Riel                |
|`U+17DC`   | Letter           | AVAGRAHA          | _null_                     | &#x17DC; Avakrahasanya       |
|`U+17DD`   | Mark [Mn]        | SYLLABLE_MODIFIER | TOP_POSITION               | &#x17DD; Atthacan            |
|`U+17DE`   | _unassigned_     |                   |                            |                              |
|`U+17DF`   | _unassigned_     |                   |                            |                              |
| | | | |																	   	  
|`U+17E0`   | Number           | NUMBER            | _null_                     | &#x17E0; Digit Zero          |
|`U+17E1`   | Number           | NUMBER            | _null_                     | &#x17E1; Digit One           |
|`U+17E2`   | Number           | NUMBER            | _null_                     | &#x17E2; Digit Two           |
|`U+17E3`   | Number           | NUMBER            | _null_                     | &#x17E3; Digit Three         |
|`U+17E4`   | Number           | NUMBER            | _null_                     | &#x17E4; Digit Four          |
|`U+17E5`   | Number           | NUMBER            | _null_                     | &#x17E5; Digit Five          |
|`U+17E6`   | Number           | NUMBER            | _null_                     | &#x17E6; Digit Six           |
|`U+17E7`   | Number           | NUMBER            | _null_                     | &#x17E7; Digit Seven         |
|`U+17E8`   | Number           | NUMBER            | _null_                     | &#x17E8; Digit Eight         |
|`U+17E9`   | Number           | NUMBER            | _null_                     | &#x17E9; Digit Nine          |
|`U+17EA`   | _unassigned_     |                   |                            |                              |
|`U+17EB`   | _unassigned_     |                   |                            |                              |
|`U+17EC`   | _unassigned_     |                   |                            |                              |
|`U+17ED`   | _unassigned_     |                   |                            |                              |
|`U+17EE`   | _unassigned_     |                   |                            |                              |
|`U+17EF`   | _unassigned_     |                   |                            |                              |
| | | | |
|`U+17F0`   | Number           | _null_            | _null_                     | &#x17F0; Lek Attak Son       |
|`U+17F1`   | Number           | _null_            | _null_                     | &#x17F1; Lek Attak Muoy      |
|`U+17F2`   | Number           | _null_            | _null_                     | &#x17F2; Lek Attak Pii       |
|`U+17F3`   | Number           | _null_            | _null_                     | &#x17F3; Lek Attak Bei       |
|`U+17F4`   | Number           | _null_            | _null_                     | &#x17F4; Lek Attak Buon      |
|`U+17F5`   | Number           | _null_            | _null_                     | &#x17F5; Lek Attak Pram      |
|`U+17F6`   | Number           | _null_            | _null_                     | &#x17F6; Lek Attak Pram-Muoy |
|`U+17F7`   | Number           | _null_            | _null_                     | &#x17F7; Lek Attak Pram-Pii  |
|`U+17F8`   | Number           | _null_            | _null_                     | &#x17F8; Lek Attak Pram-Bei  |
|`U+17F9`   | Number           | _null_            | _null_                     | &#x17F9; Lek Attak Pram-Buon |
|`U+17FA`   | _unassigned_     |                   |                            |                              |
|`U+17FB`   | _unassigned_     |                   |                            |                              |
|`U+17FC`   | _unassigned_     |                   |                            |                              |
|`U+17FD`   | _unassigned_     |                   |                            |                              |
|`U+17FE`   | _unassigned_     |                   |                            |                              |
|`U+17FF`   | _unassigned_     |                   |                            |                              |
:::


## Khmer Symbols character table ##

The Khmer Symbols block contains miscellaneous symbols used for
lunar-date calendars. None evoke any special behavior from the shaping engine.


:::{table} Khmer Symbols character table

| Codepoint | Unicode category | Shaping class     | Mark-placement subclass    | Glyph                        |
|:----------|:-----------------|:------------------|:---------------------------|:-----------------------------|
|`U+19E0`   | Symbol           | _null_            | _null_                     | &#x19E0; Pathamasat          |
|`U+19E1`   | Symbol           | _null_            | _null_                     | &#x19E1; Muoy Koet           |
|`U+19E2`   | Symbol           | _null_            | _null_                     | &#x19E2; Pii Koet            |
|`U+19E3`   | Symbol           | _null_            | _null_                     | &#x19E3; Bei Koet            |
|`U+19E4`   | Symbol           | _null_            | _null_                     | &#x19E4; Buon Koet           |
|`U+19E5`   | Symbol           | _null_            | _null_                     | &#x19E5; Pram Koet           |
|`U+19E6`   | Symbol           | _null_            | _null_                     | &#x19E6; Pram-Muoy Koet      |
|`U+19E7`   | Symbol           | _null_            | _null_                     | &#x19E7; Pram-Pii Koet       |
|`U+19E8`   | Symbol           | _null_            | _null_                     | &#x19E8; Pram-Bei Koet       |
|`U+19E9`   | Symbol           | _null_            | _null_                     | &#x19E9; Pram-Buon Koet      |
|`U+19EA`   | Symbol           | _null_            | _null_                     | &#x19EA; Dap Koet            |
|`U+19EB`   | Symbol           | _null_            | _null_                     | &#x19EB; Dap-Muoy Koet       |
|`U+19EC`   | Symbol           | _null_            | _null_                     | &#x19EC; Dap-Pii Koet        |
|`U+19ED`   | Symbol           | _null_            | _null_                     | &#x19ED; Dap-Bei Koet        |
|`U+19EE`   | Symbol           | _null_            | _null_                     | &#x19EE; Dap-Buon Koet       |
|`U+19EF`   | Symbol           | _null_            | _null_                     | &#x19EF; Dap-Pram Koet       |
| | | | |
|`U+19F0`   | Symbol           | _null_            | _null_                     | &#x19F0; Tuteyasat           |
|`U+19F1`   | Symbol           | _null_            | _null_                     | &#x19F1; Muoy ROC            |
|`U+19F2`   | Symbol           | _null_            | _null_                     | &#x19F2; Pii Roc             |
|`U+19F3`   | Symbol           | _null_            | _null_                     | &#x19F3; Bei Roc             |
|`U+19F4`   | Symbol           | _null_            | _null_                     | &#x19F4; Buon Roc            |
|`U+19F5`   | Symbol           | _null_            | _null_                     | &#x19F5; Pram Roc            |
|`U+19F6`   | Symbol           | _null_            | _null_                     | &#x19F6; Pram-Muoy Roc       |
|`U+19F7`   | Symbol           | _null_            | _null_                     | &#x19F7; Pram-Pii Roc        |
|`U+19F8`   | Symbol           | _null_            | _null_                     | &#x19F8; Pram-Bei Roc        |
|`U+19F9`   | Symbol           | _null_            | _null_                     | &#x19F9; Pram-Buon Roc       |
|`U+19FA`   | Symbol           | _null_            | _null_                     | &#x19FA; Dap Roc             |
|`U+19FB`   | Symbol           | _null_            | _null_                     | &#x19FB; Dap-Muoy Roc        |
|`U+19FC`   | Symbol           | _null_            | _null_                     | &#x19FC; Dap-Pii Roc         |
|`U+19FD`   | Symbol           | _null_            | _null_                     | &#x19FD; Dap-Bei Roc         |
|`U+19FE`   | Symbol           | _null_            | _null_                     | &#x19FE; Dap-Buon Roc        |
|`U+19FF`   | Symbol           | _null_            | _null_                     | &#x19FF; Dap-Pram Roc        |
:::


## Miscellaneous character table ##

Other important characters that may be encountered when shaping runs
of Khmer text include the dotted-circle placeholder (`U+25CC`), the
zero-width joiner (`U+200D`) and zero-width non-joiner (`U+200C`), and
the no-break space (`U+00A0`).

The dotted-circle placeholder is frequently used when displaying a
dependent vowel (matra) or a combining mark in isolation. Real-world
text syllables may also use other characters, such as hyphens or dashes,
in a similar placeholder fashion; shaping engines should cope with
this situation gracefully.


:::{table} Miscellaneous character table

| Codepoint | Unicode category | Shaping class     | Mark-placement subclass    | Glyph                          |
|:----------|:-----------------|:------------------|:---------------------------|:-------------------------------|
|`U+00A0`   | Separator        | PLACEHOLDER       | _null_                     | &#x00A0; No-break space        |
|`U+200C`   | Other            | NON_JOINER        | _null_                     | &#x200C; Zero-width non-joiner |
|`U+200D`   | Other            | JOINER            | _null_                     | &#x200D; Zero-width joiner     |
|`U+2010`   | Punctuation      | PLACEHOLDER       | _null_                     | &#x2010; Hyphen                |
|`U+2011`   | Punctuation      | PLACEHOLDER       | _null_                     | &#x2011; No-break hyphen       |
|`U+2012`   | Punctuation      | PLACEHOLDER       | _null_                     | &#x2012; Figure dash           |
|`U+2013`   | Punctuation      | PLACEHOLDER       | _null_                     | &#x2013; En dash               |
|`U+2014`   | Punctuation      | PLACEHOLDER       | _null_                     | &#x2014; Em dash               |
|`U+25CC`   | Symbol           | DOTTED_CIRCLE     | _null_                     | &#x25CC; Dotted circle         |
:::


The zero-width joiner (<abbr>ZWJ</abbr>) is primarily used to prevent the formation of a
conjunct from a "_Consonant_,Halant,_Consonant_" sequence. The sequence
"_Consonant_,Halant,ZWJ,_Consonant_" blocks the formation of a
conjunct between the two consonants. 

Note, however, that the "_Consonant_,Halant" subsequence in the above
example may still trigger a half-forms feature. To prevent the
application of the half-forms feature in addition to preventing the
conjunct, the zero-width non-joiner (<abbr>ZWNJ</abbr>) must be used instead. The sequence
"_Consonant_,Halant,ZWNJ,_Consonant_" should produce the first
consonant in its standard form, followed by an explicit "Halant".

A secondary usage of the zero-width joiner is to prevent the formation of
"Reph". An initial "Ra,Halant,ZWJ" sequence should not produce a "Reph",
where an initial "Ra,Halant" sequence without the zero-width joiner
otherwise would.

The no-break space (<abbr>NBSP<.abbr>) is primarily used to display
those codepoints that are defined as non-spacing (marks, dependent
vowels (matras), below-base consonant forms, and post-base consonant
forms) in an isolated context, as an alternative to displaying them
superimposed on the dotted-circle placeholder. These sequences will
match "NBSP,ZWJ,Halant,_Consonant_", "NBSP,_mark_", or
"NBSP,_matra_".

In addition to general punctuation, runs of Khmer text often use the
danda (`U+0964`) and double danda (`U+0965`) punctuation marks from
the Devanagari block.


================================================
FILE: character-tables/character-tables-lao.md
================================================
# Lao character tables #

This document lists the per-character shaping information needed to
[shape Lao text](../opentype-shaping-thai-lao.md#the-thailao-shaping-model).

**Contents**

  - [Lao character table](#lao-character-table)
  - [Miscellaneous character table](#miscellaneous-character-table)


## Lao character table ##

Lao glyphs should be classified as in the following
table. Codepoints in the Lao block with no assigned meaning are
designated as _unassigned_ in the _Unicode category_ column.

Assigned codepoints with a _null_ in the _Shaping class_
column evoke no special behavior from the shaping engine. Note that
this does include some valid codepoints, such as currency marks,
punctuation, and other symbols.

> Note: the `NUMBER` and `SYMBOL` _Shaping classes_ are important
> during syllable identification, but generally evoke no further
> special behavior during the rest of the shaping process.

The _Mark-placement subclass_ column indicates mark-placement
positioning for codepoints in the _Mark_ category. Assigned, non-mark
codepoints have a _null_ in this column and evoke no special
mark-placement behavior. Marks tagged with [Mn] in the _Unicode
category_ column are categorized as non-spacing; marks tagged with
[Mc] are categorized as spacing-combining.

Some codepoints in the following table use a _Shaping class_ that
differs from the codepoint's Unicode _General Category_. The _Shaping
class_ takes precedence during OpenType shaping, as it captures more
specific, script-aware behavior.


:::{table} Lao character table

| Codepoint | Unicode category | Shaping class     | Mark-placement subclass | Combining class | PUA    | Glyph                         |
|:----------|:-----------------|:------------------|:------------------------|:----------------|:-------|:------------------------------|
|`U+0E80`   | _unassigned_     |                   |                         |                 |        |                               |
|`U+0E81`   | Letter           | CONSONANT         | _null_                  | _0_             | _null_ | &#x0E81; Ko                   |
|`U+0E82`   | Letter           | CONSONANT         | _null_                  | _0_             | _null_ | &#x0E82; Kho Sung             |
|`U+0E83`   | _unassigned_     |                   |                         |                 |        |                               |
|`U+0E84`   | Letter           | CONSONANT         | _null_                  | _0_             | _null_ | &#x0E84; Kho Tam              |
|`U+0E85`   | _unassigned_     |                   |                         |                 |        |                               |
|`U+0E86`   | Letter           | CONSONANT         | _null_                  | _0_             | _null_ | &#x0E86; Pali Gha             |
|`U+0E87`   | Letter           | CONSONANT         | _null_                  | _0_             | _null_ | &#x0E87; Ngo                  |
|`U+0E88`   | Letter           | CONSONANT         | _null_                  | _0_             | _null_ | &#x0E88; Co                   |
|`U+0E89`   | Letter           | CONSONANT         | _null_                  | _0_             | _null_ | &#x0E89; Pali Cha             |
|`U+0E8A`   | Letter           | CONSONANT         | _null_                  | _0_             | _null_ | &#x0E8A; So Tam               |
|`U+0E8B`   | _unassigned_     |                   |                         |                 |        |                               |
|`U+0E8C`   | Letter           | CONSONANT         | _null_                  | _0_             | _null_ | &#x0E8C; Pali Jha             |
|`U+0E8D`   | Letter           | CONSONANT         | _null_                  | _0_             | _null_ | &#x0E8D; Nyo                  |
|`U+0E8E`   | Letter           | CONSONANT         | _null_                  | _0_             | _null_ | &#x0E8E; Pali Nya             |
|`U+0E8F`   | Letter           | CONSONANT         | _null_                  | _0_             | _null_ | &#x0E8F; Pali Tta             |
| | | | | | | |
|`U+0E90`   | Letter           | CONSONANT         | _null_                  | _0_             | _null_ | &#x0E90; Pali Ttha            |
|`U+0E91`   | Letter           | CONSONANT         | _null_                  | _0_             | _null_ | &#x0E91; Pali Dda             |
|`U+0E92`   | Letter           | CONSONANT         | _null_                  | _0_             | _null_ | &#x0E92; Pali Ddha            |
|`U+0E93`   | Letter           | CONSONANT         | _null_                  | _0_             | _null_ | &#x0E93; Pali Nna             |
|`U+0E94`   | Letter           | CONSONANT         | _null_                  | _0_             | _null_ | &#x0E94; Do                   |
|`U+0E95`   | Letter           | CONSONANT         | _null_                  | _0_             | _null_ | &#x0E95; To                   |
|`U+0E96`   | Letter           | CONSONANT         | _null_                  | _0_             | _null_ | &#x0E96; Tho Sung             |
|`U+0E97`   | Letter           | CONSONANT         | _null_                  | _0_             | _null_ | &#x0E97; Tho Tam              |
|`U+0E98`   | Letter           | CONSONANT         |  _null_                 | _0_             | _null_ | &#x0E98; Pali Dha             |
|`U+0E99`   | Letter           | CONSONANT         | _null_                  | _0_             | _null_ | &#x0E99; No                   |
|`U+0E9A`   | Letter           | CONSONANT         | _null_                  | _0_             | _null_ | &#x0E9A; Bo                   |
|`U+0E9B`   | Letter           | CONSONANT         | _null_                  | _0_             | _null_ | &#x0E9B; Po                   |
|`U+0E9C`   | Letter           | CONSONANT         | _null_                  | _0_             | _null_ | &#x0E9C; Pho Sung             |
|`U+0E9D`   | Letter           | CONSONANT         | _null_                  | _0_             | _null_ | &#x0E9D; Fo Tam               |
|`U+0E9E`   | Letter           | CONSONANT         | _null_                  | _0_             | _null_ | &#x0E9E; Pho Tam              |
|`U+0E9F`   | Letter           | CONSONANT         | _null_                  | _0_             | _null_ | &#x0E9F; Fo Sung              |
| | | | | | | |																      
|`U+0EA0`   | Letter           | CONSONANT         | _null_                  | _0_             | _null_ | &#x0EA0; Pali Bha             |
|`U+0EA1`   | Letter           | CONSONANT         | _null_                  | _0_             | _null_ | &#x0EA1; Mo                   |
|`U+0EA2`   | Letter           | CONSONANT         | _null_                  | _0_             | _null_ | &#x0EA2; Yo                   |
|`U+0EA3`   | Letter           | CONSONANT         | _null_                  | _0_             | _null_ | &#x0EA3; Lo Ling              |
|`U+0EA4`   | _unassigned_     |                   |                         |                 |        |                               |
|`U+0EA5`   | Letter           | CONSONANT         | _null_                  | _0_             | _null_ | &#x0EA5; Lo Loot              |
|`U+0EA6`   | _unassigned_     |                   |                         |                 |        |                               |
|`U+0EA7`   | Letter           | CONSONANT         | _null_                  | _0_             | _null_ | &#x0EA7; Wo                   |
|`U+0EA8`   | Letter           | CONSONANT         | _null_                  | _0_             | _null_ | &#x0EA8; Sanskrit Sha         |
|`U+0EA9`   | Letter           | CONSONANT         | _null_                  | _0_             | _null_ | &#x0EA9; Sanskrit Ssa         |
|`U+0EAA`   | Letter           | CONSONANT         | _null_                  | _0_             | _null_ | &#x0EAA; So Sung              |
|`U+0EAB`   | Letter           | CONSONANT         | _null_                  | _0_             | _null_ | &#x0EAB; Ho Sung              |
|`U+0EAC`   | Letter           | CONSONANT         | _null_                  | _0_             | _null_ | &#x0EAC; Pali Lla             |
|`U+0EAD`   | Letter           | CONSONANT         | _null_                  | _0_             | _null_ | &#x0EAD; O                    |
|`U+0EAE`   | Letter           | CONSONANT         | _null_                  | _0_             | _null_ | &#x0EAE; Ho Tam               |
|`U+0EAF`   | Letter           | _null_            | _null_                  | _0_             | _null_ | &#x0EAF; Ellipsis             |
| | | | | | | |																      
|`U+0EB0`   | Letter           | VOWEL_DEPENDENT   | RIGHT_POSITION          | _0_             | _null_ | &#x0EB0; Sign A               |
|`U+0EB1`   | Mark [Mn]        | VOWEL_DEPENDENT   | TOP_POSITION            | _0_             | _null_ | &#x0EB1; Sign Mai Kan         |
|`U+0EB2`   | Letter           | VOWEL_DEPENDENT   | RIGHT_POSITION          | _0_             | _null_ | &#x0EB2; Sign Aa              |
|`U+0EB3`   | Letter           | VOWEL_DEPENDENT   | RIGHT_POSITION          | _0_             | _null_ | &#x0EB3; Sign Am              |
|`U+0EB4`   | Mark [Mn]        | VOWEL_DEPENDENT   | TOP_POSITION            | _0_             | _null_ | &#x0EB4; Sign I               |
|`U+0EB5`   | Mark [Mn]        | VOWEL_DEPENDENT   | TOP_POSITION            | _0_             | _null_ | &#x0EB5; Sign Ii              |
|`U+0EB6`   | Mark [Mn]        | VOWEL_DEPENDENT   | TOP_POSITION            | _0_             | _null_ | &#x0EB6; Sign Y               |
|`U+0EB7`   | Mark [Mn]        | VOWEL_DEPENDENT   | TOP_POSITION            | _0_             | _null_ | &#x0EB7; Sign Yy              |
|`U+0EB8`   | Mark [Mn]        | VOWEL_DEPENDENT   | BOTTOM_POSITION         | 118             | _null_ | &#x0EB8; Sign U               |
|`U+0EB9`   | Mark [Mn]        | VOWEL_DEPENDENT   | BOTTOM_POSITION         | 118             | _null_ | &#x0EB9; Sign Uu              |
|`U+0EBA`   | Mark [Mn]        | VIRAMA            | BOTTOM_POSITION         | 9               | _null_ | &#x0EBA; Pali Virama          |
|`U+0EBB`   | Mark [Mn]        | VOWEL_DEPENDENT   | TOP_POSITION            | _0_             | _null_ | &#x0EBB; Sign Mai Kon         |
|`U+0EBC`   | Mark [Mn]        | CONSONANT_MEDIAL  | BOTTOM_POSITION         | _0_             | _null_ | &#x0EBC; Semivowel Sign Lo    |
|`U+0EBD`   | Letter           | CONSONANT_MEDIAL  | _null_                  | _0_             | _null_ | &#x0EBD; Semivowel Sign Nyo   |
|`U+0EBE`   | _unassigned_     |                   |                         |                 |        |                               |
|`U+0EBF`   | _unassigned_     |                   |                         |                 |        |                               |
| | | | | | | |
|`U+0EC0`   | Letter           | VOWEL_DEPENDENT   | VISUAL_ORDER_LEFT       | _0_             | _null_ | &#x0EC0; Sign E               |
|`U+0EC1`   | Letter           | VOWEL_DEPENDENT   | VISUAL_ORDER_LEFT       | _0_             | _null_ | &#x0EC1; Sign Ei              |
|`U+0EC2`   | Letter           | VOWEL_DEPENDENT   | VISUAL_ORDER_LEFT       | _0_             | _null_ | &#x0EC2; Sign O               |
|`U+0EC3`   | Letter           | VOWEL_DEPENDENT   | VISUAL_ORDER_LEFT       | _0_             | _null_ | &#x0EC3; Sign Ay              |
|`U+0EC4`   | Letter           | VOWEL_DEPENDENT   | VISUAL_ORDER_LEFT       | _0_             | _null_ | &#x0EC4; Sign Ai              |
|`U+0EC5`   | _unassigned_     |                   |                         |                 |        |                               |
|`U+0EC6`   | Letter Modifier  | _null_            | _null_                  | _0_             | _null_ | &#x0EC6; Ko La                |
|`U+0EC7`   | _unassigned_     |                   |                         |                 |        |                               |
|`U+0EC8`   | Mark [Mn]        | TONE_MARKER       | TOP_POSITION            | 122             | _null_ | &#x0EC8; Tone Mai Ek          |
|`U+0EC9`   | Mark [Mn]        | TONE_MARKER       | TOP_POSITION            | 122             | _null_ | &#x0EC9; Tone Mai Tho         |
|`U+0ECA`   | Mark [Mn]        | TONE_MARKER       | TOP_POSITION            | 122             | _null_ | &#x0ECA; Tone Mai Ti          |
|`U+0ECB`   | Mark [Mn]        | TONE_MARKER       | TOP_POSITION            | 122             | _null_ | &#x0ECB; Tone Mai Catawa      |
|`U+0ECC`   | Mark [Mn]        | _null_            | TOP_POSITION            | _0_             | _null_ | &#x0ECC; Cancellation mark    |
|`U+0ECD`   | Mark [Mn]        | BINDU             | TOP_POSITION            | _0_             | _null_ | &#x0ECD; Niggahita            |
|`U+0ECE`   | Mark [Mn]        | TONE_MARKER       | TOP_POSITION            | _0_             | _null_ | &#x0ECE; Yamakkan             |
|`U+0ECF`   | _unassigned_     |                   |                         |                 |        |                               |
| | | | | | | |        														                    
|`U+0ED0`   | Number           | NUMBER            | _null_                  | _0_             | _null_ | &#x0ED0; Digit Zero           |
|`U+0ED1`   | Number           | NUMBER            | _null_                  | _0_             | _null_ | &#x0ED1; Digit One            |
|`U+0ED2`   | Number           | NUMBER            | _null_                  | _0_             | _null_ | &#x0ED2; Digit Two            |
|`U+0ED3`   | Number           | NUMBER            | _null_                  | _0_             | _null_ | &#x0ED3; Digit Three          |
|`U+0ED4`   | Number           | NUMBER            | _null_                  | _0_             | _null_ | &#x0ED4; Digit Four           |
|`U+0ED5`   | Number           | NUMBER            | _null_                  | _0_             | _null_ | &#x0ED5; Digit Five           |
|`U+0ED6`   | Number           | NUMBER            | _null_                  | _0_             | _null_ | &#x0ED6; Digit Six            |
|`U+0ED7`   | Number           | NUMBER            | _null_                  | _0_             | _null_ | &#x0ED7; Digit Seven          |
|`U+0ED8`   | Number           | NUMBER            | _null_                  | _0_             | _null_ | &#x0ED8; Digit Eight          |
|`U+0ED9`   | Number           | NUMBER            | _null_                  | _0_             | _null_ | &#x0ED9; Digit Nine           |
|`U+0EDA`   | _unassigned_     |                   |                         |                 |        |                               |
|`U+0EDB`   | _unassigned_     |                   |                         |                 |        |                               |
|`U+0EDC`   | Letter           | CONSONANT         | _null_                  | _0_             | _null_ | &#x0EDC; Ho No                |
|`U+0EDD`   | Letter           | CONSONANT         | _null_                  | _0_             | _null_ | &#x0EDD; Ho Mo                |
|`U+0EDE`   | Letter           | CONSONANT         | _null_                  | _0_             | _null_ | &#x0EDE; Khmu Go              |
|`U+0EDF`   | Letter           | CONSONANT         | _null_                  | _0_             | _null_ | &#x0EDF; Khmu Nyo             |
| | | | | | | |
|`U+0EE0`   | _unassigned_     |                   |                         |                 |        |                               |
|`U+0EE1`   | _unassigned_     |                   |                         |                 |        |                               |
|`U+0EE2`   | _unassigned_     |                   |                         |                 |        |                               |
|`U+0EE3`   | _unassigned_     |                   |                         |                 |        |                               |
|`U+0EE4`   | _unassigned_     |                   |                         |                 |        |                               |
|`U+0EE5`   | _unassigned_     |                   |                         |                 |        |                               |
|`U+0EE6`   | _unassigned_     |                   |                         |                 |        |                               |
|`U+0EE7`   | _unassigned_     |                   |                         |                 |        |                               |
|`U+0EE8`   | _unassigned_     |                   |                         |                 |        |                               |
|`U+0EE9`   | _unassigned_     |                   |                         |                 |        |                               |
|`U+0EEA`   | _unassigned_     |                   |                         |                 |        |                               |
|`U+0EEB`   | _unassigned_     |                   |                         |                 |        |                               |
|`U+0EEC`   | _unassigned_     |                   |                         |                 |        |                               |
|`U+0EED`   | _unassigned_     |                   |                         |                 |        |                               |
|`U+0EEE`   | _unassigned_     |                   |                         |                 |        |                               |
|`U+0EEF`   | _unassigned_     |                   |                         |                 |        |                               |
| | | | | | | |
|`U+0EF0`   | _unassigned_     |                   |                         |                 |        |                               |
|`U+0EF1`   | _unassigned_     |                   |                         |                 |        |                               |
|`U+0EF2`   | _unassigned_     |                   |                         |                 |        |                               |
|`U+0EF3`   | _unassigned_     |                   |                         |                 |        |                               |
|`U+0EF4`   | _unassigned_     |                   |                         |                 |        |                               |
|`U+0EF5`   | _unassigned_     |                   |                         |                 |        |                               |
|`U+0EF6`   | _unassigned_     |                   |                         |                 |        |                               |
|`U+0EF7`   | _unassigned_     |                   |                         |                 |        |                               |
|`U+0EF8`   | _unassigned_     |                   |                         |                 |        |                               |
|`U+0EF9`   | _unassigned_     |                   |                         |                 |        |                               |
|`U+0EFA`   | _unassigned_     |                   |                         |                 |        |                               |
|`U+0EFB`   | _unassigned_     |                   |                         |                 |        |                               |
|`U+0EFC`   | _unassigned_     |                   |                         |                 |        |                               |
|`U+0EFD`   | _unassigned_     |                   |                         |                 |        |                               |
|`U+0EFE`   | _unassigned_     |                   |                         |                 |        |                               |
|`U+0EFF`   | _unassigned_     |                   |                         |                 |        |                               |
:::


## Miscellaneous character table ##

In addition to general punctuation, runs of Lao text text typically do not
insert spaces between words. Consequently, the Zero-Width Space (`U+200B`)
character is often used to insert invisible break points that may be
converted to line breaks.


:::{table} Additional punctuation character table

| Codepoint | Unicode category | Shaping class     | Mark-placement subclass    | Glyph                          |
|:----------|:-----------------|:------------------|:---------------------------|:-------------------------------|
|`U+200B`   | Separator        | PLACEHOLDER       | _null_                     | &#x200B; Zero-width space      |
:::


Other important characters that may be encountered when shaping runs
of Lao text include the dotted-circle placeholder (`U+25CC`), the
zero-width joiner (`U+200D`) and zero-width non-joiner (`U+200C`), and
the no-break space (`U+00A0`).

The dotted-circle placeholder is frequently used when displaying a
dependent vowel or a combining mark in isolation. Real-world
text syllables may also use other characters, such as hyphens or dashes,
in a similar placeholder fashion; shaping engines should cope with
this situation gracefully.


:::{table} Miscellaneous character table

| Codepoint | Unicode category | Shaping class     | Mark-placement subclass    | Glyph                          |
|:----------|:-----------------|:------------------|:---------------------------|:-------------------------------|
|`U+00A0`   | Separator        | PLACEHOLDER       | _null_                     | &#x00A0; No-break space        |
|`U+200C`   | Other            | NON_JOINER        | _null_                     | &#x200C; Zero-width non-joiner |
|`U+200D`   | Other            | JOINER            | _null_                     | &#x200D; Zero-width joiner     |
|`U+2010`   | Punctuation      | PLACEHOLDER       | _null_                     | &#x2010; Hyphen                |
|`U+2011`   | Punctuation      | PLACEHOLDER       | _null_                     | &#x2011; No-break hyphen       |
|`U+2012`   | Punctuation      | PLACEHOLDER       | _null_                     | &#x2012; Figure dash           |
|`U+2013`   | Punctuation      | PLACEHOLDER       | _null_                     | &#x2013; En dash               |
|`U+2014`   | Punctuation      | PLACEHOLDER       | _null_                     | &#x2014; Em dash               |
|`U+25CC`   | Symbol           | DOTTED_CIRCLE     | _null_                     | &#x25CC; Dotted circle         |
:::


================================================
FILE: character-tables/character-tables-malayalam.md
================================================
# Malayalam character tables #

This document lists the per-character shaping information needed to
[shape Malayalam text](../opentype-shaping-malayalam.md).

**Contents**

  - [Malayalam character table](#malayalam-character-table)
  - [Vedic Extensions character table](#vedic-extensions-character-table)
  - [Miscellaneous character table](#miscellaneous-character-table)
	  

## Malayalam character table ##

Malayalam glyphs should be classified as in the following
table. Codepoints in the Malayalam block with no assigned meaning are
designated as _unassigned_ in the _Unicode category_ column. 

Assigned codepoints with a _null_ in the _Shaping class_
column evoke no special behavior from the shaping engine. Note that
this does include some valid codepoints, such as currency marks,
punctuation, and other symbols.

> Note: the `NUMBER` and `SYMBOL` _Shaping classes_ are important
> during syllable identification, but generally evoke no further
> special behavior during the rest of the shaping process. 

The _Mark-placement subclass_ column indicates mark-placement
positioning for codepoints in the _Mark_ category. Assigned, non-mark
codepoints have a _null_ in this column and evoke no special
mark-placement behavior. Marks tagged with [Mn] in the _Unicode
category_ column are categorized as non-spacing; marks tagged with
[Mc] are categorized as spacing-combining.

Some codepoints in the following table use a _Shaping class_ that
differs from the codepoint's Unicode _General Category_. The _Shaping
class_ takes precedence during OpenType shaping, as it captures more
specific, script-aware behavior.


:::{table} Malayalam character table

| Codepoint | Unicode category | Shaping class     | Mark-placement subclass    | Glyph                        |
|:----------|:-----------------|:------------------|:---------------------------|:-----------------------------|
|`U+0D00`   | Mark [Mn]        | BINDU             | TOP_POSITION               | &#x0D00; Combining Anusvara Above |
|`U+0D01`   | Mark [Mn]        | BINDU             | TOP_POSITION               | &#x0D01; Candrabindu         |
|`U+0D02`   | Mark [Mc]        | BINDU             | RIGHT_POSITION             | &#x0D02; Anusvara            |
|`U+0D03`   | Mark [Mc]        | VISARGA           | RIGHT_POSITION             | &#x0D03; Visarga             |
|`U+0D04`   | Letter           | BINDU             | _null_                     | &#x0D04; Vedic Anusvara      |
|`U+0D05`   | Letter           | VOWEL_INDEPENDENT | _null_                     | &#x0D05; A                   |
|`U+0D06`   | Letter           | VOWEL_INDEPENDENT | _null_                     | &#x0D06; Aa                  |
|`U+0D07`   | Letter           | VOWEL_INDEPENDENT | _null_                     | &#x0D07; I                   |
|`U+0D08`   | Letter           | VOWEL_INDEPENDENT | _null_                     | &#x0D08; Ii                  |
|`U+0D09`   | Letter           | VOWEL_INDEPENDENT | _null_                     | &#x0D09; U                   |
|`U+0D0A`   | Letter           | VOWEL_INDEPENDENT | _null_                     | &#x0D0A; Uu                  |
|`U+0D0B`   | Letter           | VOWEL_INDEPENDENT | _null_                     | &#x0D0B; Vocalic R           |
|`U+0D0C`   | Letter           | VOWEL_INDEPENDENT | _null_                     | &#x0D0C; Vocalic L           |
|`U+0D0D`   | _unassigned_     |                   |                            |                              |
|`U+0D0E`   | Letter           | VOWEL_INDEPENDENT | _null_                     | &#x0D0E; E                   |
|`U+0D0F`   | Letter           | VOWEL_INDEPENDENT | _null_                     | &#x0D0F; Ee                  |
| | | | |																		
|`U+0D10`   | Letter           | VOWEL_INDEPENDENT | _null_                     | &#x0D10; Ai                  |
|`U+0D11`   | _unassigned_     |                   |                            |                              |
|`U+0D12`   | Letter           | VOWEL_INDEPENDENT | _null_                     | &#x0D12; O                   |
|`U+0D13`   | Letter           | VOWEL_INDEPENDENT | _null_                     | &#x0D13; Oo                  |
|`U+0D14`   | Letter           | VOWEL_INDEPENDENT | _null_                     | &#x0D14; Au                  |
|`U+0D15`   | Letter           | CONSONANT         | _null_                     | &#x0D15; Ka                  |
|`U+0D16`   | Letter           | CONSONANT         | _null_                     | &#x0D16; Kha                 |
|`U+0D17`   | Letter           | CONSONANT         | _null_                     | &#x0D17; Ga                  |
|`U+0D18`   | Letter           | CONSONANT         | _null_                     | &#x0D18; Gha                 |
|`U+0D19`   | Letter           | CONSONANT         | _null_                     | &#x0D19; Nga                 |
|`U+0D1A`   | Letter           | CONSONANT         | _null_                     | &#x0D1A; Ca                  |
|`U+0D1B`   | Letter           | CONSONANT         | _null_                     | &#x0D1B; Cha                 |
|`U+0D1C`   | Letter           | CONSONANT         | _null_                     | &#x0D1C; Ja                  |
|`U+0D1D`   | Letter           | CONSONANT         | _null_                     | &#x0D1D; Jha                 |
|`U+0D1E`   | Letter           | CONSONANT         | _null_                     | &#x0D1E; Nya                 |
|`U+0D1F`   | Letter           | CONSONANT         | _null_                     | &#x0D1F; Tta                 |
| | | | |																		
|`U+0D20`   | Letter           | CONSONANT         | _null_                     | &#x0D20; Ttha                |
|`U+0D21`   | Letter           | CONSONANT         | _null_                     | &#x0D21; Dda                 |
|`U+0D22`   | Letter           | CONSONANT         | _null_                     | &#x0D22; Ddha                |
|`U+0D23`   | Letter           | CONSONANT         | _null_                     | &#x0D23; Nna                 |
|`U+0D24`   | Letter           | CONSONANT         | _null_                     | &#x0D24; Ta                  |
|`U+0D25`   | Letter           | CONSONANT         | _null_                     | &#x0D25; Tha                 |
|`U+0D26`   | Letter           | CONSONANT         | _null_                     | &#x0D26; Da                  |
|`U+0D27`   | Letter           | CONSONANT         | _null_                     | &#x0D27; Dha                 |
|`U+0D28`   | Letter           | CONSONANT         | _null_                     | &#x0D28; Na                  |
|`U+0D29`   | Letter           | CONSONANT         | _null_                     | &#x0D29; Nnna                |
|`U+0D2A`   | Letter           | CONSONANT         | _null_                     | &#x0D2A; Pa                  |
|`U+0D2B`   | Letter           | CONSONANT         | _null_                     | &#x0D2B; Pha                 |
|`U+0D2C`   | Letter           | CONSONANT         | _null_                     | &#x0D2C; Ba                  |
|`U+0D2D`   | Letter           | CONSONANT         | _null_                     | &#x0D2D; Bha                 |
|`U+0D2E`   | Letter           | CONSONANT         | _null_                     | &#x0D2E; Ma                  |
|`U+0D2F`   | Letter           | CONSONANT         | _null_                     | &#x0D2F; Ya                  |
| | | | |																		
|`U+0D30`   | Letter           | CONSONANT         | _null_                     | &#x0D30; Ra                  |
|`U+0D31`   | Letter           | CONSONANT         | _null_                     | &#x0D31; Rra                 |
|`U+0D32`   | Letter           | CONSONANT         | _null_                     | &#x0D32; La                  |
|`U+0D33`   | Letter           | CONSONANT         | _null_                     | &#x0D33; Lla                 |
|`U+0D34`   | Letter           | CONSONANT         | _null_                     | &#x0D34; Llla                |
|`U+0D35`   | Letter           | CONSONANT         | _null_                     | &#x0D35; Va                  |
|`U+0D36`   | Letter           | CONSONANT         | _null_                     | &#x0D36; Sha                 |
|`U+0D37`   | Letter           | CONSONANT         | _null_                     | &#x0D37; Ssa                 |
|`U+0D38`   | Letter           | CONSONANT         | _null_                     | &#x0D38; Sa                  |
|`U+0D39`   | Letter           | CONSONANT         | _null_                     | &#x0D39; Ha                  |
|`U+0D3A`   | Letter           | CONSONANT         | _null_                     | &#x0D3A; Ttta                |
|`U+0D3B`   | Mark [Mn]        | PURE_KILLER       | TOP_POSITION               | &#x0D3B; Vertical Bar Virama |
|`U+0D3C`   | Mark [Mn]        | PURE_KILLER       | TOP_POSITION               | &#x0D3C; Circular Virama     |
|`U+0D3D`   | Letter           | AVAGRAHA          | _null_                     | &#x0D3D; Avagraha            |
|`U+0D3E`   | Mark [Mc]        | VOWEL_DEPENDENT   | RIGHT_POSITION             | &#x0D3E; Sign Aa             |
|`U+0D3F`   | Mark [Mc]        | VOWEL_DEPENDENT   | RIGHT_POSITION             | &#x0D3F; Sign I              |
| | | | |																		
|`U+0D40`   | Mark [Mc]        | VOWEL_DEPENDENT   | RIGHT_POSITION             | &#x0D40; Sign Ii             |
|`U+0D41`   | Mark [Mn]        | VOWEL_DEPENDENT   | RIGHT_POSITION             | &#x0D41; Sign U              |
|`U+0D42`   | Mark [Mn]        | VOWEL_DEPENDENT   | RIGHT_POSITION             | &#x0D42; Sign Uu             |
|`U+0D43`   | Mark [Mn]        | VOWEL_DEPENDENT   | BOTTOM_POSITION            | &#x0D43; Sign Vocalic R      |
|`U+0D44`   | Mark [Mn]        | VOWEL_DEPENDENT   | BOTTOM_POSITION            | &#x0D44; Sign Vocalic Rr     |
|`U+0D45`   | _unassigned_     |                   |                            |                              |
|`U+0D46`   | Mark [Mc]        | VOWEL_DEPENDENT   | LEFT_POSITION              | &#x0D46; Sign E              |
|`U+0D47`   | Mark [Mc]        | VOWEL_DEPENDENT   | LEFT_POSITION              | &#x0D47; Sign Ee             |
|`U+0D48`   | Mark [Mc]        | VOWEL_DEPENDENT   | LEFT_POSITION              | &#x0D48; Sign Ai             |
|`U+0D49`   | _unassigned_     |                   |                            |                              |
|`U+0D4A`   | Mark [Mc]        | VOWEL_DEPENDENT   | LEFT_AND_RIGHT_POSITION    | &#x0D4A; Sign O              |
|`U+0D4B`   | Mark [Mc]        | VOWEL_DEPENDENT   | LEFT_AND_RIGHT_POSITION    | &#x0D4B; Sign Oo             |
|`U+0D4C`   | Mark [Mc]        | VOWEL_DEPENDENT   | LEFT_AND_RIGHT_POSITION    | &#x0D4C; Sign Au             |
|`U+0D4D`   | Mark [Mn]        | VIRAMA            | TOP_POSITION               | &#x0D4D; Virama              |
|`U+0D4E`   | Letter           | CONSONANT_PRE_REPHA| _null_                    | &#x0D4E; Dot Reph            |
|`U+0D4F`   | Symbol           | SYMBOL            | _null_                     | &#x0D4F; Para                |
| | | | |																		
|`U+0D50`   | _unassigned_     |                   |                            |                              |
|`U+0D51`   | _unassigned_     |                   |                            |                              |
|`U+0D52`   | _unassigned_     |                   |                            |                              |
|`U+0D53`   | _unassigned_     |                   |                            |                              |
|`U+0D54`   | Letter           | CONSONANT_DEAD    | _null_                     | &#x0D54; Chillu M            |
|`U+0D55`   | Letter           | CONSONANT_DEAD    | _null_                     | &#x0D55; Chillu Y            |
|`U+0D56`   | Letter           | CONSONANT_DEAD    | _null_                     | &#x0D56; Chillu Lll          |
|`U+0D57`   | Mark [Mc]        | VOWEL_DEPENDENT   | RIGHT_POSITION             | &#x0D57; Au Length Mark      |
|`U+0D58`   | Number           | NUMBER            | _null_                     | &#x0D58; Fraction 1/160      |
|`U+0D59`   | Number           | NUMBER            | _null_                     | &#x0D59; Fraction 1/40       |
|`U+0D5A`   | Number           | NUMBER            | _null_                     | &#x0D5A; Fraction 3/80       |
|`U+0D5B`   | Number           | NUMBER            | _null_                     | &#x0D5B; Fraction 1/20       |
|`U+0D5C`   | Number           | NUMBER            | _null_                     | &#x0D5C; Fraction 1/10       |
|`U+0D5D`   | Number           | NUMBER            | _null_                     | &#x0D5D; Fraction 3/20       |
|`U+0D5E`   | Number           | NUMBER            | _null_                     | &#x0D5E; Fraction 1/5        |
|`U+0D5F`   | Letter           | VOWEL_INDEPENDENT | _null_                     | &#x0D5F; Archaic Ii          |
| | | | |																		
|`U+0D60`   | Letter           | VOWEL_INDEPENDENT | _null_                     | &#x0D60; Vocalic Rr          |
|`U+0D61`   | Letter           | VOWEL_INDEPENDENT | _null_                     | &#x0D61; Vocalic Ll          |
|`U+0D62`   | Mark [Mn]        | VOWEL_DEPENDENT   | BOTTOM_POSITION            | &#x0D62; Sign Vocalic L      |
|`U+0D63`   | Mark [Mn]        | VOWEL_DEPENDENT   | BOTTOM_POSITION            | &#x0D63; Sign Vocalic Ll     |
|`U+0D64`   | _unassigned_     |                   |                            |                              |
|`U+0D65`   | _unassigned_     |                   |                            |                              |
|`U+0D66`   | Number           | NUMBER            | _null_                     | &#x0D66; Digit Zero          |
|`U+0D67`   | Number           | NUMBER            | _null_                     | &#x0D67; Digit One           |
|`U+0D68`   | Number           | NUMBER            | _null_                     | &#x0D68; Digit Two           |
|`U+0D69`   | Number           | NUMBER            | _null_                     | &#x0D69; Digit Three         |
|`U+0D6A`   | Number           | NUMBER            | _null_                     | &#x0D6A; Digit Four          |
|`U+0D6B`   | Number           | NUMBER            | _null_                     | &#x0D6B; Digit Five          |
|`U+0D6C`   | Number           | NUMBER            | _null_                     | &#x0D6C; Digit Six           |
|`U+0D6D`   | Number           | NUMBER            | _null_                     | &#x0D6D; Digit Seven         |
|`U+0D6E`   | Number           | NUMBER            | _null_                     | &#x0D6E; Digit Eight         |
|`U+0D6F`   | Number           | NUMBER            | _null_                     | &#x0D6F; Digit Nine          |
| | | | |																		
|`U+0D70`   | Number           | NUMBER            |                            | &#x0D70; Number Ten          |
|`U+0D71`   | Number           | NUMBER            |                            | &#x0D71; Number One Hundred  |
|`U+0D72`   | Number           | NUMBER            |                            | &#x0D72; Number One Thousand |
|`U+0D73`   | Number           | NUMBER            |                            | &#x0D73; Fraction 1/4        |
|`U+0D74`   | Number           | NUMBER            |                            | &#x0D74; Fraction 1/2        |
|`U+0D75`   | Number           | NUMBER            |                            | &#x0D75; Fraction 3/4        |
|`U+0D76`   | Number           | NUMBER            |                            | &#x0D76; Fraction 1/16       |
|`U+0D77`   | Number           | NUMBER            |                            | &#x0D77; Fraction 1/8        |
|`U+0D78`   | Number           | NUMBER            | _null_                     | &#x0D78; Fraction 3/16       |
|`U+0D79`   | Symbol           | SYMBOL            | _null_                     | &#x0D79; Date Mark           |
|`U+0D7A`   | Letter           | CONSONANT_DEAD    | _null_                     | &#x0D7A; Chillu Nn           |
|`U+0D7B`   | Letter           | CONSONANT_DEAD    | _null_                     | &#x0D7B; Chillu N            |
|`U+0D7C`   | Letter           | CONSONANT_DEAD    | _null_                     | &#x0D7C; Chillu Rr           |
|`U+0D7D`   | Letter           | CONSONANT_DEAD    | _null_                     | &#x0D7D; Chillu L            |
|`U+0D7E`   | Letter           | CONSONANT_DEAD    | _null_                     | &#x0D7E; Chillu Ll           |
|`U+0D7F`   | Letter           | CONSONANT_DEAD    | _null_                     | &#x0D7F; Chillu K            |
:::


## Vedic Extensions character table ##

Sanskrit runs written in the Malayalam script may also include
characters from the Vedic Extensions block. These characters should be
classified as follows.

> Note: See the [Vedic Extensions](../opentype-shaping-vedic-extensions.md) 
> document for additional information.


:::{table} Vedic Extensions character table

| Codepoint | Unicode category | Shaping class     | Mark-placement subclass    | Glyph                        |
|:----------|:-----------------|:------------------|:---------------------------|:-----------------------------|
|`U+1CD0`   | Mark [Mn]        | CANTILLATION      | TOP_POSITION               | &#x1CD0; Tone Karshana       |
|`U+1CD1`   | Mark [Mn]        | CANTILLATION      | TOP_POSITION               | &#x1CD1; Tone Shara          |
|`U+1CD2`   | Mark [Mn]        | CANTILLATION      | TOP_POSITION               | &#x1CD2; Tone Prenkha        |
|`U+1CD3`   | Punctuation      | _null_            | _null_                     | &#x1CD3; Sign Nihshvasa      |
|`U+1CD4`   | Mark [Mn]        | CANTILLATION      | OVERSTRUCK                 | &#x1CD4; Tone Midline Svarita |
|`U+1CD5`   | Mark [Mn]        | CANTILLATION      | BOTTOM_POSITION            | &#x1CD5; Tone Aggravated Independent Svarita |
|`U+1CD6`   | Mark [Mn]        | CANTILLATION      | BOTTOM_POSITION            | &#x1CD6; Tone Independent Svarita |
|`U+1CD7`   | Mark [Mn]        | CANTILLATION      | BOTTOM_POSITION            | &#x1CD7; Tone Kathaka Independent Svarita |
|`U+1CD8`   | Mark [Mn]        | CANTILLATION      | BOTTOM_POSITION            | &#x1CD8; Tone Candra Below   |
|`U+1CD9`   | Mark [Mn]        | CANTILLATION      | BOTTOM_POSITION            | &#x1CD9; Tone Kathaka Independent Svarita Schroeder |
|`U+1CDA`   | Mark [Mn]        | CANTILLATION      | TOP_POSITION               | &#x1CDA; Tone Double Svarita |
|`U+1CDB`   | Mark [Mn]        | CANTILLATION      | TOP_POSITION               | &#x1CDB; Tone Triple Svarita |
|`U+1CDC`   | Mark [Mn]        | CANTILLATION      | BOTTOM_POSITION            | &#x1CDC; Tone Kathaka Anudatta |
|`U+1CDD`   | Mark [Mn]        | CANTILLATION      | BOTTOM_POSITION            | &#x1CDD; Tone Dot Below      |
|`U+1CDE`   | Mark [Mn]        | CANTILLATION      | BOTTOM_POSITION            | &#x1CDE; Tone Two Dots Below |
|`U+1CDF`   | Mark [Mn]        | CANTILLATION      | BOTTOM_POSITION            | &#x1CDF; Tone Three Dots Below |
| | | | |																		
|`U+1CE0`   | Mark [Mn]        | CANTILLATION      | TOP_POSITION               | &#x1CE0; Tone Rigvedic Kashmiri Independent Svarita |
|`U+1CE1`   | Mark [Mc]        | CANTILLATION      | RIGHT_POSITION             | &#x1CE1; Tone Atharavedic Independent Svarita |
|`U+1CE2`   | Mark [Mn]        | AVAGRAHA          | OVERSTRUCK                 | &#x1CE2; Sign Visarga Svarita |
|`U+1CE3`   | Mark [Mn]        | _null_            | OVERSTRUCK                 | &#x1CE3; Sign Visarga Udatta |
|`U+1CE4`   | Mark [Mn]        | _null_            | OVERSTRUCK                 | &#x1CE4; Sign Reversed Visarga Udatta |
|`U+1CE5`   | Mark [Mn]        | _null_            | OVERSTRUCK                 | &#x1CE5; Sign Visarga Anudatta |
|`U+1CE6`   | Mark [Mn]        | _null_            | OVERSTRUCK                 | &#x1CE6; Sign Reversed Visarga Anudatta |
|`U+1CE7`   | Mark [Mn]        | _null_            | OVERSTRUCK                 | &#x1CE7; Sign Visarga Udatta With Tail |
|`U+1CE8`   | Mark [Mn]        | AVAGRAHA          | OVERSTRUCK                 | &#x1CE8; Sign Visarga Anudatta With Tail |
|`U+1CE9`   | Letter           | SYMBOL            | _null_                     | &#x1CE9; Sign Anusvara Antargomukha |
|`U+1CEA`   | Letter           | _null_            | _null_                     | &#x1CEA; Sign Anusvara Bahirgomukha |
|`U+1CEB`   | Letter           | _null_            | _null_                     | &#x1CEB; Sign Anusvara Vamagomukha |
|`U+1CEC`   | Letter           | SYMBOL            | _null_                     | &#x1CEC; Sign Anusvara Vamagomukha With Tail |
|`U+1CED`   | Mark [Mn]        | AVAGRAHA          | BOTTOM_POSITION            | &#x1CED; Sign Tiryak         |
|`U+1CEE`   | Letter           | SYMBOL            | _null_                     | &#x1CEE; Sign Hexiform Long Anusvara |
|`U+1CEF`   | Letter           | _null_            | _null_                     | &#x1CEF; Sign Long Anusvara  |
| | | | |																		
|`U+1CF0`   | Letter           | _null_            | _null_                     | &#x1CF0; Sign Rthang Long Anusvara |
|`U+1CF2`   | Letter           | CONSONANT_DEAD    | _null_                     | &#x1CF2; Sign Ardhavisarga   |
|`U+1CF3`   | Letter           | CONSONANT_DEAD    | _null_                     | &#x1CF3; Sign Rotated Ardhavisarga |
|`U+1CF3`   | Mark [Mc]        | VISARGA           | _null_                     | &#x1CF3; Sign Rotated Ardhavisarga |
|`U+1CF4`   | Mark [Mn]        | CANTILLATION      | TOP_POSITION               | &#x1CF4; Tone Candra Above   |
|`U+1CF5`   | Letter           | CONSONANT_WITH_STACKER | _null_                | &#x1CF5; Sign Jihvamuliya    |
|`U+1CF6`   | Letter           | CONSONANT_WITH_STACKER | _null_                | &#x1CF6; Sign Upadhmaniya    |
|`U+1CF7`   | Mark [Mc]        | _null_            | _null_                     | &#x1CF7; Sign Atikrama       |
|`U+1CF8`   | Mark [Mn]        | CANTILLATION      | _null_                     | &#x1CF8; Tone Ring Above     |
|`U+1CF9`   | Mark [Mn]        | CANTILLATION      | _null_                     | &#x1CF9; Tone Double Ring Above |
|`U+1CFA`   | Letter           | PLACEHOLDER       | _null_                     | &#x1CFA; Sign Double Anusvara Antargomukha |
|`U+1CFB`   | _unassigned_     |                   |                            |                              |
|`U+1CFC`   | _unassigned_     |                   |                            |                              |
|`U+1CFD`   | _unassigned_     |                   |                            |                              |
|`U+1CFE`   | _unassigned_     |                   |                            |                              |
|`U+1CFF`   | _unassigned_     |                   |                            |                              |
:::


## Miscellaneous character table ##

In addition to general punctuation, runs of Malayalam text often use the
danda (`U+0964`) and double danda (`U+0965`) punctuation marks from
the Devanagari block. Malayalam text can also incorporate the udatta
(`U+0951`) and anudatta (`U+0952`) signs from the Devanagari block.


:::{table} Additional punctuation character table

| Codepoint | Unicode category | Shaping class     | Mark-placement subclass    | Glyph                          |
|:----------|:-----------------|:------------------|:---------------------------|:-------------------------------|
|`U+0951`   | Mark [Mn]        | CANTILLATION      | TOP_POSITION               | &#x0951; Udatta              |
|`U+0952`   | Mark [Mn]        | CANTILLATION      | BOTTOM_POSITION            | &#x0952; Anudatta            |
|`U+0964`   | Punctuation      | _null_            | _null_                     | &#x0964; Danda               |
|`U+0965`   | Punctuation      | _null_            | _null_                     | &#x0965; Double Danda        |
:::


Other important characters that may be encountered when shaping runs
of Malayalam text include the dotted-circle placeholder (`U+25CC`), the
zero-width joiner (`U+200D`) and zero-width non-joiner (`U+200C`), and
the no-break space (`U+00A0`).

The dotted-circle placeholder is frequently used when displaying a
dependent vowel (matra) or a combining mark in isolation. Real-world
text syllables may also use other characters, such as hyphens or dashes,
in a similar placeholder fashion; shaping engines should cope with
this situation gracefully.


:::{table} Miscellaneous character table

| Codepoint | Unicode category | Shaping class     | Mark-placement subclass    | Glyph                          |
|:----------|:-----------------|:------------------|:---------------------------|:-------------------------------|
|`U+00A0`   | Separator        | PLACEHOLDER       | _null_                     | &#x00A0; No-break space        |
|`U+200C`   | Other            | NON_JOINER        | _null_                     | &#x200C; Zero-width non-joiner |
|`U+200D`   | Other            | JOINER            | _null_                     | &#x200D; Zero-width joiner     |
|`U+2010`   | Punctuation      | PLACEHOLDER       | _null_                     | &#x2010; Hyphen                |
|`U+2011`   | Punctuation      | PLACEHOLDER       | _null_                     | &#x2011; No-break hyphen       |
|`U+2012`   | Punctuation      | PLACEHOLDER       | _null_                     | &#x2012; Figure dash           |
|`U+2013`   | Punctuation      | PLACEHOLDER       | _null_                     | &#x2013; En dash               |
|`U+2014`   | Punctuation      | PLACEHOLDER       | _null_                     | &#x2014; Em dash               |
|`U+25CC`   | Symbol           | DOTTED_CIRCLE     | _null_                     | &#x25CC; Dotted circle         |
:::


The zero-width joiner (<abbr>ZWJ</abbr>) is primarily used to prevent the formation
of a conjunct from a "_Consonant_,Halant,_Consonant_" sequence. The
sequence "_Consonant_,Halant,ZWJ,_Consonant_" blocks the formation of
a conjunct between the two consonants. 

Note, however, that the "_Consonant_,Halant" subsequence in the above
example may still trigger a half-forms feature. To prevent the
application of the half-forms feature in addition to preventing the
conjunct, the zero-width non-joiner (<abbr>ZWNJ</abbr>) must be used instead. The
sequence "_Consonant_,Halant,ZWNJ,_Consonant_" should produce the
first consonant in its standard form, followed by an explicit
"Halant".

A secondary usage of the zero-width joiner is to prevent the formation of
"Reph". An initial "Ra,Halant,ZWJ" sequence should not produce a "Reph",
where an initial "Ra,Halant" sequence without the zero-width joiner
otherwise would.

The no-break space (<abbr>NBSP</abbr>) is primarily used to display
those codepoints that are defined as non-spacing (marks, dependent
vowels (matras), below-base consonant forms, and post-base consonant
forms) in an isolated context, as an alternative to displaying them
superimposed on the dotted-circle placeholder. These sequences will
match "NBSP,ZWJ,Halant,_Consonant_", "NBSP,_mark_", or "NBSP,_matra_".


================================================
FILE: character-tables/character-tables-mongolian.md
================================================
# Mongolian character tables #

This document lists the per-character shaping information needed to
[shape Mongolian text](../opentype-shaping-mongolian.md).

**Contents**

  - [Mongolian character table](#mongolian-character-table)
  - [Mongolian Supplement character table](#mongolian-supplement-character-table)
  - [Miscellaneous character table](#miscellaneous-character-table)


## Mongolian character table ##

Mongolian glyphs should be classified as in the following
table. Codepoints in the Mongolian block with no assigned meaning are
designated as _unassigned_ in the _Unicode category_ column.

The _Joining type_ column indicates whether each codepoint is defined
as joining with adjacent characters on the left side, right side, left
and right sides ("DUAL"), or neither side ("NON_JOINING"). Codepoints
designated TRANSPARENT in the _Joining type_ column do not join with
adjacent characters and, in addition, do not affect the joining
behavior of surrounding characters. Non-spacing marks are of type
TRANSPARENT. Codepoints designated JOIN_CAUSING force adjacent
characters to join.

The _Joining group_ column lists the fundamental letter that the
listed codepoint behaves like for joining purposes.

Assigned codepoints with a _null_ in the _Joining group_
column evoke no special behavior from the shaping engine during the
join-computation stage.

The _Mark class_ column indicates the Canonical Combining Class
for the codepoint.  Marks are assigned non-zero combining classes so
that sequences of adjacent marks can be reordered as required by the
orthography. 

For Mongolian, a subset of marks in the 220 and 230 classes are also
designated _Modifier Combining Marks_ (<abbr>MCM</abbr>). These are denoted with
_220_MCM_ and _230_MCM_ in the _Mark class_ column. The <abbr title="Modifier Combining Mark">MCM</abbr> marks are
treated differently during the mark-reordering stage.


:::{table} Mongolian character table

| Codepoint | Unicode category | Joining type | Joining group        | Mark class | Glyph                                         |
|:----------|:-----------------|:-------------|:---------------------|:-----------|-----------------------------------------------|
|`U+1800`   | Punctuation      | NON_JOINING  | _null_               | _0_        | &#x1800; Mongolian Birga                      |
|`U+1801`   | Punctuation      | NON_JOINING  | _null_               | _0_        | &#x1801; Mongolian Ellipsis                   |
|`U+1802`   | Punctuation      | NON_JOINING  | _null_               | _0_        | &#x1802; Mongolian Comma                      |
|`U+1803`   | Punctuation      | NON_JOINING  | _null_               | _0_        | &#x1803; Mongolian Full Stop                  |
|`U+1804`   | Punctuation      | NON_JOINING  | _null_               | _0_        | &#x1804; Mongolian Colon                      |
|`U+1805`   | Punctuation      | NON_JOINING  | _null_               | _0_        | &#x1805; Mongolian Four Dots                  |
|`U+1806`   | Punctuation [Pd] | NON_JOINING  | _null_               | _0_        | &#x1806; Todo Soft Hyphen                     |
|`U+1807`   | Punctuation      | DUAL         | _null_               | _0_        | &#x1807; Sibe Syllable Boundary Mark          |
|`U+1808`   | Punctuation      | NON_JOINING  | _null_               | _0_        | &#x1808; Manchu Comma                         |
|`U+1809`   | Punctuation      | NON_JOINING  | _null_               | _0_        | &#x1809; Manchu Full Stop                     |
|`U+180A`   | Punctuation      | JOIN_CAUSING | _null_               | _0_        | &#x180A; Mongolian Nirugu                     |
|`U+180B`   | Mark [Mn]        | TRANSPARENT  | _null_               | _0_        | &#x180B; Free Variation Selector One          |
|`U+180C`   | Mark [Mn]        | TRANSPARENT  | _null_               | _0_        | &#x180C; Free Variation Selector Two          |
|`U+180D`   | Mark [Mn]        | TRANSPARENT  | _null_               | _0_        | &#x180D; Free Variation Selector Three        |
|`U+180E`   | Formatting       | NON_JOINING  | _null_               | _0_        | &#x180E; Mongolian Vowel Separator            |
|`U+180F`   | Mark [Mn]        | TRANSPARENT  | _null_               | _0_        | &#x180f; Free Variation Selector Four         |
| | | | | |                                                                                  
|`U+1810`   | Number           | NON_JOINING  | _null_               | _0_        | &#x1810; Digit Zero                           |
|`U+1811`   | Number           | NON_JOINING  | _null_               | _0_        | &#x1811; Digit One                            |
|`U+1812`   | Number           | NON_JOINING  | _null_               | _0_        | &#x1812; Digit Two                            |
|`U+1813`   | Number           | NON_JOINING  | _null_               | _0_        | &#x1813; Digit Three                          |
|`U+1814`   | Number           | NON_JOINING  | _null_               | _0_        | &#x1814; Digit Four                           |
|`U+1815`   | Number           | NON_JOINING  | _null_               | _0_        | &#x1815; Digit Five                           |
|`U+1816`   | Number           | NON_JOINING  | _null_               | _0_        | &#x1816; Digit Six                            |
|`U+1817`   | Number           | NON_JOINING  | _null_               | _0_        | &#x1817; Digit Seven                          |
|`U+1818`   | Number           | NON_JOINING  | _null_               | _0_        | &#x1818; Digit Eight                          |
|`U+1819`   | Number           | NON_JOINING  | _null_               | _0_        | &#x1819; Digit Nine                           |
|`U+181A`   | _unassigned_     |              |                      |            |                                               |
|`U+181B`   | _unassigned_     |              |                      |            |                                               |
|`U+181C`   | _unassigned_     |              |                      |            |                                               |
|`U+181D`   | _unassigned_     |              |                      |            |                                               |
|`U+181E`   | _unassigned_     |              |                      |            |                                               |
|`U+181F`   | _unassigned_     |              |                      |            |                                               |
| | | | | |                                                                                  
|`U+1820`   | Letter           | DUAL         | _null_               | _0_        | &#x1820; A                                    |
|`U+1821`   | Letter           | DUAL         | _null_               | _0_        | &#x1821; E                                    |
|`U+1822`   | Letter           | DUAL         | _null_               | _0_        | &#x1822; I                                    |
|`U+1823`   | Letter           | DUAL         | _null_               | _0_        | &#x1823; O                                    |
|`U+1824`   | Letter           | DUAL         | _null_               | _0_        | &#x1824; U                                    |
|`U+1825`   | Letter           | DUAL         | _null_               | _0_        | &#x1825; Oe                                   |
|`U+1827`   | Letter           | DUAL         | _null_               | _0_        | &#x1826; Ue                                   |
|`U+1827`   | Letter           | DUAL         | _null_               | _0_        | &#x1827; Ee                                   |
|`U+1828`   | Letter           | DUAL         | _null_               | _0_        | &#x1828; Na                                   |
|`U+1829`   | Letter           | DUAL         | _null_               | _0_        | &#x1829; Ang                                  |
|`U+182A`   | Letter           | DUAL         | _null_               | _0_        | &#x182A; Ba                                   |
|`U+182B`   | Letter           | DUAL         | _null_               | _0_        | &#x182B; Pa                                   |
|`U+182C`   | Letter           | DUAL         | _null_               | _0_        | &#x182C; Qa                                   |
|`U+182D`   | Letter           | DUAL         | _null_               | _0_        | &#x182D; Ga                                   |
|`U+182E`   | Letter           | DUAL         | _null_               | _0_        | &#x182E; Ma                                   |
|`U+182F`   | Letter           | DUAL         | _null_               | _0_        | &#x182F; La                                   |
| | | | | |                                                                                  
|`U+1830`   | Letter           | DUAL         | _null_               | _0_        | &#x1830; Sa                                   |
|`U+1831`   | Letter           | DUAL         | _null_               | _0_        | &#x1831; Sha                                  |
|`U+1832`   | Letter           | DUAL         | _null_               | _0_        | &#x1832; Ta                                   |
|`U+1833`   | Letter           | DUAL         | _null_               | _0_        | &#x1833; Da                                   |
|`U+1834`   | Letter           | DUAL         | _null_               | _0_        | &#x1834; Cha                                  |
|`U+1835`   | Letter           | DUAL         | _null_               | _0_        | &#x1835; Ja                                   |
|`U+1836`   | Letter           | DUAL         | _null_               | _0_        | &#x1836; Ya                                   |
|`U+1837`   | Letter           | DUAL         | _null_               | _0_        | &#x1837; Ra                                   |
|`U+1838`   | Letter           | DUAL         | _null_               | _0_        | &#x1838; Wa                                   |
|`U+1839`   | Letter           | DUAL         | _null_               | _0_        | &#x1839; Fa                                   |
|`U+183A`   | Letter           | DUAL         | _null_               | _0_        | &#x183A; Ka                                   |
|`U+183B`   | Letter           | DUAL         | _null_               | _0_        | &#x183B; Kha                                  |
|`U+183C`   | Letter           | DUAL         | _null_               | _0_        | &#x183C; Tsa                                  |
|`U+183D`   | Letter           | DUAL         | _null_               | _0_        | &#x183D; Za                                   |
|`U+183E`   | Letter           | DUAL         | _null_               | _0_        | &#x183E; Haa                                  |
|`U+183F`   | Letter           | DUAL         | _null_               | _0_        | &#x183F; Zra                                  |
| | | | | |                                                                                  
|`U+1840`   | Letter           | DUAL         | _null_               | _0_        | &#x1840; Lha                                  |
|`U+1841`   | Letter           | DUAL         | _null_               | _0_        | &#x1841; Zhi                                  |
|`U+1842`   | Letter           | DUAL         | _null_               | _0_        | &#x1842; Chi                                  |
|`U+1843`   | Letter           | DUAL         | _null_               | _0_        | &#x1843; Todo Long Vowel Sign                 |
|`U+1844`   | Letter           | DUAL         | _null_               | _0_        | &#x1844; Todo E                               |
|`U+1845`   | Letter           | DUAL         | _null_               | _0_        | &#x1845; Todo I                               |
|`U+1846`   | Letter           | DUAL         | _null_               | _0_        | &#x1846; Todo O                               |
|`U+1847`   | Letter           | DUAL         | _null_               | _0_        | &#x1847; Todo U                               |
|`U+1848`   | Letter           | DUAL         | _null_               | _0_        | &#x1848; Todo Oe                              |
|`U+1849`   | Letter           | DUAL         | _null_               | _0_        | &#x1849; Todo Ue                              |
|`U+184A`   | Letter           | DUAL         | _null_               | _0_        | &#x184A; Todo Ang                             |
|`U+184B`   | Letter           | DUAL         | _null_               | _0_        | &#x184B; Todo Ba                              |
|`U+184C`   | Letter           | DUAL         | _null_               | _0_        | &#x184C; Todo Pa                              |
|`U+184D`   | Letter           | DUAL         | _null_               | _0_        | &#x184D; Todo Qa                              |
|`U+184E`   | Letter           | DUAL         | _null_               | _0_        | &#x184E; Todo Ga                              |
|`U+184F`   | Letter           | DUAL         | _null_               | _0_        | &#x184F; Todo Ma                              |
| | | | | |                                                                                      
|`U+1850`   | Letter           | DUAL         | _null_               | _0_        | &#x1850; Todo Ta                              |
|`U+1851`   | Letter           | DUAL         | _null_               | _0_        | &#x1851; Todo Da                              |
|`U+1852`   | Letter           | DUAL         | _null_               | _0_        | &#x1852; Todo Cha                             |
|`U+1853`   | Letter           | DUAL         | _null_               | _0_        | &#x1853; Todo Ja                              |
|`U+1854`   | Letter           | DUAL         | _null_               | _0_        | &#x1854; Todo Tsa                             |
|`U+1855`   | Letter           | DUAL         | _null_               | _0_        | &#x1855; Todo Ya                              |
|`U+1856`   | Letter           | DUAL         | _null_               | _0_        | &#x1856; Todo Wa                              |
|`U+1857`   | Letter           | DUAL         | _null_               | _0_        | &#x1857; Todo Ka                              |
|`U+1858`   | Letter           | DUAL         | _null_               | _0_        | &#x1858; Todo Gaa                             |
|`U+1859`   | Letter           | DUAL         | _null_               | _0_        | &#x1859; Todo Haa                             |
|`U+185A`   | Letter           | DUAL         | _null_               | _0_        | &#x185A; Todo Jia                             |
|`U+185B`   | Letter           | DUAL         | _null_               | _0_        | &#x185B; Todo Nia                             |
|`U+185C`   | Letter           | DUAL         | _null_               | _0_        | &#x185C; Todo Dza                             |
|`U+185D`   | Letter           | DUAL         | _null_               | _0_        | &#x185D; Sibe E                               |
|`U+185E`   | Letter           | DUAL         | _null_               | _0_        | &#x185E; Sibe I                               |
|`U+185F`   | Letter           | DUAL         | _null_               | _0_        | &#x185F; Sibe Iy                              |
| | | | | |                                                                                     
|`U+1860`   | Letter           | DUAL         | _null_               | _0_        | &#x1860; Sibe Ue                              |
|`U+1861`   | Letter           | DUAL         | _null_               | _0_        | &#x1861; Sibe U                               |
|`U+1862`   | Letter           | DUAL         | _null_               | _0_        | &#x1862; Sibe Ang                             |
|`U+1863`   | Letter           | DUAL         | _null_               | _0_        | &#x1863; Sibe Ka                              |
|`U+1864`   | Letter           | DUAL         | _null_               | _0_        | &#x1864; Sibe Ga                              |
|`U+1865`   | Letter           | DUAL         | _null_               | _0_        | &#x1865; Sibe Ha                              |
|`U+1866`   | Letter           | DUAL         | _null_               | _0_        | &#x1866; Sibe Pa                              |
|`U+1867`   | Letter           | DUAL         | _null_               | _0_        | &#x1867; Sibe Sha                             |
|`U+1868`   | Letter           | DUAL         | _null_               | _0_        | &#x1868; Sibe Ta                              |
|`U+1869`   | Letter           | DUAL         | _null_               | _0_        | &#x1869; Sibe Da                              |
|`U+186A`   | Letter           | DUAL         | _null_               | _0_        | &#x186A; Sibe Ja                              |
|`U+186B`   | Letter           | DUAL         | _null_               | _0_        | &#x186B; Sibe Fa                              |
|`U+186C`   | Letter           | DUAL         | _null_               | _0_        | &#x186C; Sibe Gaa                             |
|`U+186D`   | Letter           | DUAL         | _null_               | _0_        | &#x186D; Sibe Haa                             |
|`U+186E`   | Letter           | DUAL         | _null_               | _0_        | &#x186E; Sibe Tsa                             |
|`U+186F`   | Letter           | DUAL         | _null_               | _0_        | &#x186F; Sibe Za                              |
| | | | | |                                                                                      
|`U+1870`   | Letter           | DUAL         | _null_               | _0_        | &#x1870; Sibe Raa                             |
|`U+1871`   | Letter           | DUAL         | _null_               | _0_        | &#x1871; Sibe Cha                             |
|`U+1872`   | Letter           | DUAL         | _null_               | _0_        | &#x1872; Sibe Zha                             |
|`U+1873`   | Letter           | DUAL         | _null_               | _0_        | &#x1873; Manchu I                             |
|`U+1874`   | Letter           | DUAL         | _null_               | _0_        | &#x1874; Manchu Ka                            |
|`U+1875`   | Letter           | DUAL         | _null_               | _0_        | &#x1875; Manchu Ra                            |
|`U+1876`   | Letter           | DUAL         | _null_               | _0_        | &#x1876; Manchu Fa                            |
|`U+1877`   | Letter           | DUAL         | _null_               | _0_        | &#x1877; Manchu Zha                           |
|`U+1878`   | Letter           | DUAL         | _null_               | _0_        | &#x1878; Cha With Two Dots                    |
|`U+1879`   | _unassigned_     |              |                      |            |                                               |
|`U+187A`   | _unassigned_     |              |                      |            |                                               |
|`U+187B`   | _unassigned_     |              |                      |            |                                               |
|`U+187C`   | _unassigned_     |              |                      |            |                                               |
|`U+187D`   | _unassigned_     |              |                      |            |                                               |
|`U+187E`   | _unassigned_     |              |                      |            |                                               |
|`U+187F`   | _unassigned_     |              |                      |            |                                               |
| | | | | |                                                                                  
|`U+1880`   | Letter           | NON_JOINING  | _null_               | _0_        | &#x1880; Ali Gali Anusvara One                |
|`U+1881`   | Letter           | NON_JOINING  | _null_               | _0_        | &#x1881; Ali Gali Visarga One                 |
|`U+1882`   | Letter           | NON_JOINING  | _null_               | _0_        | &#x1882; Ali Gali Damaru                      |
|`U+1883`   | Letter           | NON_JOINING  | _null_               | _0_        | &#x1883; Ali Gali Ubadama                     |
|`U+1884`   | Letter           | NON_JOINING  | _null_               | _0_        | &#x1884; Ali Gali Inverted Ubadama            |
|`U+1885`   | Mark [Mn]        | TRANSPARENT  | _null_               | _0_        | &#x1885; Ali Gali Baluda                      |
|`U+1886`   | Mark [Mn]        | TRANSPARENT  | _null_               | _0_        | &#x1886; Ali Gali Three Baluda                |
|`U+1887`   | Letter           | DUAL         | _null_               | _0_        | &#x1887; Ali Gali A                           |
|`U+1888`   | Letter           | DUAL         | _null_               | _0_        | &#x1888; Ali Gali I                           |
|`U+1889`   | Letter           | DUAL         | _null_               | _0_        | &#x1889; Ali Gali Ka                          |
|`U+188A`   | Letter           | DUAL         | _null_               | _0_        | &#x188A; Ali Gali Nga                         |
|`U+188B`   | Letter           | DUAL         | _null_               | _0_        | &#x188B; Ali Gali Ca                          |
|`U+188C`   | Letter           | DUAL         | _null_               | _0_        | &#x188C; Ali Gali Tta                         |
|`U+188D`   | Letter           | DUAL         | _null_               | _0_        | &#x188D; Ali Gali Ttha                        |
|`U+188E`   | Letter           | DUAL         | _null_               | _0_        | &#x188E; Ali Gali Dda                         |
|`U+188F`   | Letter           | DUAL         | _null_               | _0_        | &#x188F; Ali Gali Nna                         |
| | | | | |                                                                                          
|`U+1890`   | Letter           | DUAL         | _null_               | _0_        | &#x1890; Ali Gali Ta                          |
|`U+1891`   | Letter           | DUAL         | _null_               | _0_        | &#x1891; Ali Gali Da                          |
|`U+1892`   | Letter           | DUAL         | _null_               | _0_        | &#x1892; Ali Gali Pa                          |
|`U+1893`   | Letter           | DUAL         | _null_               | _0_        | &#x1893; Ali Gali Pha                         |
|`U+1894`   | Letter           | DUAL         | _null_               | _0_        | &#x1894; Ali Gali Ssa                         |
|`U+1895`   | Letter           | DUAL         | _null_               | _0_        | &#x1895; Ali Gali Zha                         |
|`U+1896`   | Letter           | DUAL         | _null_               | _0_        | &#x1896; Ali Gali Za                          |
|`U+1897`   | Letter           | DUAL         | _null_               | _0_        | &#x1897; Ali Gali Ah                          |
|`U+1898`   | Letter           | DUAL         | _null_               | _0_        | &#x1898; Todo Ali Gali Ta                     |
|`U+1899`   | Letter           | DUAL         | _null_               | _0_        | &#x1899; Todo Ali Gali Zha                    |
|`U+189A`   | Letter           | DUAL         | _null_               | _0_        | &#x189A; Manchu Ali Gali Gha                  |
|`U+189B`   | Letter           | DUAL         | _null_               | _0_        | &#x189B; Manchu Ali Gali Nga                  |
|`U+189C`   | Letter           | DUAL         | _null_               | _0_        | &#x189C; Manchu Ali Gali Ca                   |
|`U+189D`   | Letter           | DUAL         | _null_               | _0_        | &#x189D; Manchu Ali Gali Jha                  |
|`U+189E`   | Letter           | DUAL         | _null_               | _0_        | &#x189E; Manchu Ali Gali Tta                  |
|`U+189F`   | Letter           | DUAL         | _null_               | _0_        | &#x189F; Manchu Ali Gali Ddha                 |
| | | | | |                                                                                                  
|`U+18A0`   | Letter           | DUAL         | _null_               | _0_        | &#x18A0; Manchu Ali Gali Ta                   |
|`U+18A1`   | Letter           | DUAL         | _null_               | _0_        | &#x18A1; Manchu Ali Gali Dha                  |
|`U+18A2`   | Letter           | DUAL         | _null_               | _0_        | &#x18A2; Manchu Ali Gali Ssa                  |
|`U+18A3`   | Letter           | DUAL         | _null_               | _0_        | &#x18A3; Manchu Ali Gali Cya                  |
|`U+18A4`   | Letter           | DUAL         | _null_               | _0_        | &#x18A4; Manchu Ali Gali Zha                  |
|`U+18A5`   | Letter           | DUAL         | _null_               | _0_        | &#x18A5; Manchu Ali Gali Za                   |
|`U+18A6`   | Letter           | DUAL         | _null_               | _0_        | &#x18A6; Ali Gali Half U                      |
|`U+18A7`   | Letter           | DUAL         | _null_               | _0_        | &#x18A7; Ali Gali Half Ya                     |
|`U+18A8`   | Letter           | DUAL         | _null_               | _0_        | &#x18A8; Manchu Ali Gali Bha                  |
|`U+18A9`   | Mark [Mn]        | TRANSPARENT  | _null_               | 228        | &#x18A9; Ali Gali Dagalga                     |
|`U+18AA`   | Letter           | DUAL         | _null_               | _0_        | &#x18AA; Manchu Ali Gali Lha                  |
|`U+18AB`   | _unassigned_     |              |                      |            |                                               |
|`U+18AC`   | _unassigned_     |              |                      |            |                                               |
|`U+18AD`   | _unassigned_     |              |                      |            |                                               |
|`U+18AE`   | _unassigned_     |              |                      |            |                                               |
|`U+18AF`   | _unassigned_     |              |                      |            |                                               |
:::


## Mongolian Supplement character table ##

The Mongolian Supplement block includes variants of the _birga_ mark
used to denote the beginning of a text.

:::{table} Mongolian Supplement character table

| Codepoint | Unicode category | Joining type | Joining group        | Mark class | Glyph                                         |
|:----------|:-----------------|:-------------|:---------------------|:-----------|-----------------------------------------------|
|`U+11660`  | Punctuation      | NON_JOINING  | _null_               | _0_        | &#x11660; Birga with Ornament                 |
|`U+11661`  | Punctuation      | NON_JOINING  | _null_               | _0_        | &#x11661; Rotated Birga                       |
|`U+11662`  | Punctuation      | NON_JOINING  | _null_               | _0_        | &#x11662; Double Birga with Ornament          |
|`U+11663`  | Punctuation      | NON_JOINING  | _null_               | _0_        | &#x11663; Triple Birga with Ornament          |
|`U+11664`  | Punctuation      | NON_JOINING  | _null_               | _0_        | &#x11664; Birga with Double Ornament          |
|`U+11665`  | Punctuation      | NON_JOINING  | _null_               | _0_        | &#x11665; Rotated Birga with Ornament         |
|`U+11666`  | Punctuation      | NON_JOINING  | _null_               | _0_        | &#x11666; Rotated Birga with Double Ornament  |
|`U+11667`  | Punctuation      | NON_JOINING  | _null_               | _0_        | &#x11667; Inverted Birga                      |
|`U+11668`  | Punctuation      | NON_JOINING  | _null_               | _0_        | &#x11668; Inverted Birga with Double Ornament |
|`U+11669`  | Punctuation      | NON_JOINING  | _null_               | _0_        | &#x11669; Swirl Birga                         |
|`U+1166A`  | Punctuation      | NON_JOINING  | _null_               | _0_        | &#x1166A; Swirl Birga with Ornament           |
|`U+1166B`  | Punctuation      | NON_JOINING  | _null_               | _0_        | &#x1166B; Swirl Birga with Double Ornament    |
|`U+1166C`  | Punctuation      | NON_JOINING  | _null_               | _0_        | &#x1166C; Turned Swirl Birga with Double Ornament|
|`U+1166D`  | _unassigned_     |              |                      |            |                                               |
|`U+1166E`  | _unassigned_     |              |                      |            |                                               |
|`U+1166F`  | _unassigned_     |              |                      |            |                                               |
| | | | | |
|`U+11670`  | _unassigned_     |              |                      |            |                                               |
|`U+11671`  | _unassigned_     |              |                      |            |                                               |
|`U+11672`  | _unassigned_     |              |                      |            |                                               |
|`U+11673`  | _unassigned_     |              |                      |            |                                               |
|`U+11674`  | _unassigned_     |              |                      |            |                                               |
|`U+11675`  | _unassigned_     |              |                      |            |                                               |
|`U+11676`  | _unassigned_     |              |                      |            |                                               |
|`U+11677`  | _unassigned_     |              |                      |            |                                               |
|`U+11678`  | _unassigned_     |              |                      |            |                                               |
|`U+11679`  | _unassigned_     |              |                      |            |                                               |
|`U+1167A`  | _unassigned_     |              |                      |            |                                               |
|`U+1167B`  | _unassigned_     |              |                      |            |                                               |
|`U+1167C`  | _unassigned_     |              |                      |            |                                               |
|`U+1167D`  | _unassigned_     |              |                      |            |                                               |
|`U+1167E`  | _unassigned_     |              |                      |            |                                               |
|`U+1167F`  | _unassigned_     |              |                      |            |                                               |
:::


## Miscellaneous character table ##

Other important characters that may be encountered when shaping runs
of Mongolian text include the dotted-circle placeholder (`U+25CC`), the
combining grapheme joiner (`U+034F`), the zero-width joiner (`U+200D`)
and zero-width non-joiner (`U+200C`), the left-to-right text marker
(`U+200E`) and right-to-left text marker (`U+200F`), and the no-break
space (`U+00A0`).

The dotted-circle placeholder is frequently used when displaying a
combining mark in isolation. Real-world text syllables may also use
other characters, such as hyphens or dashes, in a similar placeholder
fashion; shaping engines should cope with this situation gracefully.

:::{table} Miscellaneous character table

| Codepoint | Unicode category | Joining type | Joining group        | Mark class | Glyph                          |
|:----------|:-----------------|:-------------|:---------------------|:-----------|--------------------------------|
|`U+00A0`   | Separator        | NON_JOINING  | _null_               | _0_        | &#x00A0; No-break space        |
|`U+200C`   | Other            | NON_JOINING  | _null_               | _0_        | &#x200C; Zero-width non-joiner |
|`U+200D`   | Other            | JOIN_CAUSING | _null_               | _0_        | &#x200D; Zero-width joiner     |
|`U+2010`   | Punctuation      | NON_JOINING  | _null_               | _0_        | &#x2010; Hyphen                |
|`U+2011`   | Punctuation      | NON_JOINING  | _null_               | _0_        | &#x2011; No-break hyphen       |
|`U+2012`   | Punctuation      | NON_JOINING  | _null_               | _0_        | &#x2012; Figure dash           |
|`U+2013`   | Punctuation      | NON_JOINING  | _null_               | _0_        | &#x2013; En dash               |
|`U+2014`   | Punctuation      | NON_JOINING  | _null_               | _0_        | &#x2014; Em dash               |
|`U+202F`   | Separator        | NON_JOINING  | _null_               | _0_        | &#x202F; Narrow No-Break Space |
|`U+25CC`   | Symbol           | NON_JOINING  | _null_               | _0_        | &#x25CC; Dotted circle         |
| | | | | | |
:::


The zero-width joiner (<abbr>ZWJ</abbr>) is primarily used to force the usage of the
cursive connecting form of a letter even when the context of the
adjoining letters would not trigger the connecting form. 

For example, to show the initial form of a letter in isolation (such
as for displaying it in a table of forms), the sequence "_Letter_,ZWJ"
would be used. To show the medial form of a letter in isolation, the
sequence "ZWJ,_Letter_,ZWJ" would be used.


<!--- Zero-Width Non Joiner explanation --->

The no-break space is primarily used to display those codepoints that
are defined as non-spacing (such as vowel or diacritical marks and "Hamza") in an
isolated context, as an alternative to displaying them superimposed on
the dotted-circle placeholder.

The narrow no-break space is used in Mongolian to insert a small gap
between a word and its suffix. 


================================================
FILE: character-tables/character-tables-myanmar.md
================================================
# Myanmar character tables #

This document lists the per-character shaping information needed to
[shape Myanmar text](../opentype-shaping-myanmar.md).

**Contents**

  - [Myanmar character table](#myanmar-character-table)
  - [Myanmar Extended character tables](#myanmar-extended-character-tables)
  - [Vedic Extensions character table](#vedic-extensions-character-table)
  - [Miscellaneous character table](#miscellaneous-character-table)
	  

## Myanmar character table ##

Myanmar glyphs should be classified as in the following
table. Codepoints in the Myanmar block with no assigned meaning are
designated as _unassigned_ in the _Unicode category_ column. 

Assigned codepoints with a _null_ in the _Shaping class_
column evoke no special behavior from the shaping engine. Note that
this does include some valid codepoints, such as currency marks,
punctuation, and other symbols.

> Note: the `NUMBER` and `SYMBOL` _Shaping classes_ are important
> during syllable identification, but generally evoke no further
> special behavior during the rest of the shaping process. 

The _Mark-placement subclass_ column indicates mark-placement
positioning for codepoints in the _Mark_ category. Assigned, non-mark
codepoints have a _null_ in this column and evoke no special
mark-placement behavior. Marks tagged with [Mn] in the _Unicode
category_ column are categorized as non-spacing; marks tagged with
[Mc] are categorized as spacing-combining.

Some codepoints in the following table use a _Shaping class_ that
differs from the codepoint's Unicode _General Category_. The _Shaping
class_ takes precedence during OpenType shaping, as it captures more
specific, script-aware behavior.


:::{table} Myanmar character table

| Codepoint | Unicode category | Shaping class     | Mark-placement subclass    | Glyph                        |
|:----------|:-----------------|:------------------|:---------------------------|:-----------------------------|
|`U+1000`   | Letter           | CONSONANT         | _null_                     | &#x1000; Ka                  |
|`U+1001`   | Letter           | CONSONANT         | _null_                     | &#x1001; Kha                 |
|`U+1002`   | Letter           | CONSONANT         | _null_                     | &#x1002; Ga                  |
|`U+1003`   | Letter           | CONSONANT         | _null_                     | &#x1003; Gha                 |
|`U+1004`   | Letter           | CONSONANT         | _null_                     | &#x1004; Nga                 |
|`U+1005`   | Letter           | CONSONANT         | _null_                     | &#x1005; Ca                  |
|`U+1006`   | Letter           | CONSONANT         | _null_                     | &#x1006; Cha                 |
|`U+1007`   | Letter           | CONSONANT         | _null_                     | &#x1007; Ja                  |
|`U+1008`   | Letter           | CONSONANT         | _null_                     | &#x1008; Jha                 |
|`U+1009`   | Letter           | CONSONANT         | _null_                     | &#x1009; Nya                 |
|`U+100A`   | Letter           | CONSONANT         | _null_                     | &#x100A; Nnya                |
|`U+100B`   | Letter           | CONSONANT         | _null_                     | &#x100B; Tta                 |
|`U+100C`   | Letter           | CONSONANT         | _null_                     | &#x100C; Ttha                |
|`U+100D`   | Letter           | CONSONANT         | _null_                     | &#x100D; Dda                 |
|`U+100E`   | Letter           | CONSONANT         | _null_                     | &#x100E; DDha                |
|`U+100F`   | Letter           | CONSONANT         | _null_                     | &#x100F; Nna                 |
| | | | |
|`U+1010`   | Letter           | CONSONANT         | _null_                     | &#x1010; Ta                  |
|`U+1011`   | Letter           | CONSONANT         | _null_                     | &#x1011; Tha                 |
|`U+1012`   | Letter           | CONSONANT         | _null_                     | &#x1012; Da                  |
|`U+1013`   | Letter           | CONSONANT         | _null_                     | &#x1013; Dha                 |
|`U+1014`   | Letter           | CONSONANT         | _null_                     | &#x1014; Na                  |
|`U+1015`   | Letter           | CONSONANT         | _null_                     | &#x1015; Pa                  |
|`U+1016`   | Letter           | CONSONANT         | _null_                     | &#x1016; Pha                 |
|`U+1017`   | Letter           | CONSONANT         | _null_                     | &#x1017; Ba                  |
|`U+1018`   | Letter           | CONSONANT         | _null_                     | &#x1018; Bha                 |
|`U+1019`   | Letter           | CONSONANT         | _null_                     | &#x1019; Ma                  |
|`U+101A`   | Letter           | CONSONANT         | _null_                     | &#x101A; Ya                  |
|`U+101B`   | Letter           | CONSONANT         | _null_                     | &#x101B; Ra                  |
|`U+101C`   | Letter           | CONSONANT         | _null_                     | &#x101C; La                  |
|`U+101D`   | Letter           | CONSONANT         | _null_                     | &#x101D; Wa                  |
|`U+101E`   | Letter           | CONSONANT         | _null_                     | &#x101E; Sa                  |
|`U+101F`   | Letter           | CONSONANT         | _null_                     | &#x101F; Ha                  |
| | | | |
|`U+1020`   | Letter           | CONSONANT         | _null_                     | &#x1020; Lla                 |
|`U+1021`   | Letter           | VOWEL_INDEPENDENT | _null_                     | &#x1021; A                   |
|`U+1022`   | Letter           | VOWEL_INDEPENDENT | _null_                     | &#x1022; Shan A              |
|`U+1023`   | Letter           | VOWEL_INDEPENDENT | _null_                     | &#x1023; I                   |
|`U+1024`   | Letter           | VOWEL_INDEPENDENT | _null_                     | &#x1024; Ii                  |
|`U+1025`   | Letter           | VOWEL_INDEPENDENT | _null_                     | &#x1025; U                   |
|`U+1026`   | Letter           | VOWEL_INDEPENDENT | _null_                     | &#x1026; Uu                  |
|`U+1027`   | Letter           | VOWEL_INDEPENDENT | _null_                     | &#x1027; E                   |
|`U+1028`   | Letter           | VOWEL_INDEPENDENT | _null_                     | &#x1028; Mon E               |
|`U+1029`   | Letter           | VOWEL_INDEPENDENT | _null_                     | &#x1029; O                   |
|`U+102A`   | Letter           | VOWEL_INDEPENDENT | _null_                     | &#x102A; Au                  |
|`U+102B`   | Mark [Mc]        | VOWEL_DEPENDENT   | RIGHT_POSITION             | &#x102B; Sign Tall Aa        |
|`U+102C`   | Mark [Mc]        | VOWEL_DEPENDENT   | RIGHT_POSITION             | &#x102C; Sign Aa             |
|`U+102D`   | Mark [Mn]        | VOWEL_DEPENDENT   | TOP_POSITION               | &#x102D; Sign I              |
|`U+102E`   | Mark [Mn]        | VOWEL_DEPENDENT   | TOP_POSITION               | &#x102E; Sign Ii             |
|`U+102F`   | Mark [Mn]        | VOWEL_DEPENDENT   | BOTTOM_POSITION            | &#x102F; Sign U              |
| | | | |
|`U+1030`   | Mark [Mn]        | VOWEL_DEPENDENT   | BOTTOM_POSITION            | &#x1030; Sign Uu             |
|`U+1031`   | Mark [Mc]        | VOWEL_DEPENDENT   | LEFT_POSITION              | &#x1031; Sign E              |
|`U+1032`   | Mark [Mn]        | VOWEL_DEPENDENT   | TOP_POSITION               | &#x1032; Sign Ai             |
|`U+1033`   | Mark [Mn]        | VOWEL_DEPENDENT   | TOP_POSITION               | &#x1033; Sign Mon Ii         |
|`U+1034`   | Mark [Mn]        | VOWEL_DEPENDENT   | TOP_POSITION               | &#x1034; Sign Mon O          |
|`U+1035`   | Mark [Mn]        | VOWEL_DEPENDENT   | TOP_POSITION               | &#x1035; Sign E Above        |
|`U+1036`   | Mark [Mn]        | BINDU             | TOP_POSITION               | &#x1036; Anusvara            |
|`U+1037`   | Mark [Mn]        | TONE_MARKER       | BOTTOM_POSITION            | &#x1037; Dot Below           |
|`U+1038`   | Mark [Mc]        | VISARGA           | RIGHT_POSITION             | &#x1038; Visarga             |
|`U+1039`   | Mark [Mn]        | INVISIBLE_STACKER | _null_                     | &#x1039; Virama              |
|`U+103A`   | Mark [Mn]        | PURE_KILLER       | TOP_POSITION               | &#x103A; Asat                |
|`U+103B`   | Mark [Mc]        | CONSONANT_MEDIAL  | RIGHT_POSITION             | &#x103B; Sign Medial Ya      |
|`U+103C`   | Mark [Mc]        | CONSONANT_MEDIAL  | TOP_LEFT_AND_BOTTOM_POSITION | &#x103C; Sign Medial Ra      |
|`U+103D`   | Mark [Mn]        | CONSONANT_MEDIAL  | BOTTOM_POSITION            | &#x103D; Sign Medial Wa      |
|`U+103E`   | Mark [Mn]        | CONSONANT_MEDIAL  | BOTTOM_POSITION            | &#x103E; Sign Medial Ha      |
|`U+103F`   | Letter           | CONSONANT         | _null_                     | &#x103F; Great Sa            |
| | | | |
|`U+1040`   | Number           | NUMBER            | _null_                     | &#x1040; Digit Zero          |
|`U+1041`   | Number           | NUMBER            | _null_                     | &#x1041; Digit One           |
|`U+1042`   | Number           | NUMBER            | _null_                     | &#x1042; Digit Two           |
|`U+1043`   | Number           | NUMBER            | _null_                     | &#x1043; Digit Three         |
|`U+1044`   | Number           | NUMBER            | _null_                     | &#x1044; Digit Four          |
|`U+1045`   | Number           | NUMBER            | _null_                     | &#x1045; Digit Five          |
|`U+1046`   | Number           | NUMBER            | _null_                     | &#x1046; Digit Six           |
|`U+1047`   | Number           | NUMBER            | _null_                     | &#x1047; Digit Seven         |
|`U+1048`   | Number           | NUMBER            | _null_                     | &#x1048; Digit Eight         |
|`U+1049`   | Number           | NUMBER            | _null_                     | &#x1049; Digit Nine          |
|`U+104A`   | Punctuation      | _null_            | _null_                     | &#x104A; Little Section      |
|`U+104B`   | Punctuation      | _null_            | _null_                     | &#x104B; Section             |
|`U+104C`   | Punctuation      | _null_            | _null_                     | &#x104C; Locative            |
|`U+104D`   | Punctuation      | _null_            | _null_                     | &#x104D; Completed           |
|`U+104E`   | Punctuation      | CONSONANT_PLACEHOLDER| _null_                  | &#x104E; Aforementioned      |
|`U+104F`   | Punctuation      | _null_            | _null_                     | &#x104F; Genitive            |
| | | | |
|`U+1050`   | Letter           | CONSONANT         | _null_                     | &#x1050; Sha                 |
|`U+1051`   | Letter           | CONSONANT         | _null_                     | &#x1051; Ssa                 |
|`U+1052`   | Letter           | VOWEL_INDEPENDENT | _null_                     | &#x1052; Vocalic R           |
|`U+1053`   | Letter           | VOWEL_INDEPENDENT | _null_                     | &#x1053; Vocalic Rr          |
|`U+1054`   | Letter           | VOWEL_INDEPENDENT | _null_                     | &#x1054; Vocalic L           |
|`U+1055`   | Letter           | VOWEL_INDEPENDENT | _null_                     | &#x1055; Vocalic Ll          |
|`U+1056`   | Mark [Mc]        | VOWEL_DEPENDENT   | RIGHT_POSITION             | &#x1056; Sign Vocalic R      |
|`U+1057`   | Mark [Mc]        | VOWEL_DEPENDENT   | RIGHT_POSITION             | &#x1057; Sign Vocalic Rr     |
|`U+1058`   | Mark [Mn]        | VOWEL_DEPENDENT   | BOTTOM_POSITION            | &#x1058; Sign Vocalic L      |
|`U+1059`   | Mark [Mn]        | VOWEL_DEPENDENT   | BOTTOM_POSITION            | &#x1059; Sign Vocalic Ll     |
|`U+105A`   | Letter           | CONSONANT         | _null_                     | &#x105A; Mon Nga             |
|`U+105B`   | Letter           | CONSONANT         | _null_                     | &#x105B; Mon Jha             |
|`U+105C`   | Letter           | CONSONANT         | _null_                     | &#x105C; Mon Bba             |
|`U+105D`   | Letter           | CONSONANT         | _null_                     | &#x105D; Mon Bbe             |
|`U+105E`   | Mark [Mn]        | CONSONANT_MEDIAL  | BOTTOM_POSITION            | &#x105E; Sign Mon Medial Na  |
|`U+105F`   | Mark [Mn]        | CONSONANT_MEDIAL  | BOTTOM_POSITION            | &#x105F; Sign Mon Medial Ma  |
| | | | |
|`U+1060`   | Mark [Mn]        | CONSONANT_MEDIAL  | BOTTOM_POSITION            | &#x1060; Sign Mon Medial La  |
|`U+1061`   | Letter           | CONSONANT         | _null_                     | &#x1061; Sgaw Karen Sha      |
|`U+1062`   | Mark [Mc]        | VOWEL_DEPENDENT   | RIGHT_POSITION             | &#x1062; Sign Sgaw Karen Eu  |
|`U+1063`   | Mark [Mc]        | TONE_MARKER       | RIGHT_POSITION             | &#x1063; Tone Sgaw Karen Hathi|
|`U+1064`   | Mark [Mc]        | TONE_MARKER       | RIGHT_POSITION             | &#x1064; Tone Sgaw Karen Ke Pho|
|`U+1065`   | Letter           | CONSONANT         | _null_                     | &#x1065; Western Pwo Karen Tha|
|`U+1066`   | Letter           | CONSONANT         | _null_                     | &#x1066; Western Pwo Karen Pwa|
|`U+1067`   | Mark [Mc]        | VOWEL_DEPENDENT   | RIGHT_POSITION             | &#x1067; Sign Western Pwo Karen Eu|
|`U+1068`   | Mark [Mc]        | VOWEL_DEPENDENT   | RIGHT_POSITION             | &#x1068; Sign Western Pwo Karen Ue|
|`U+1069`   | Mark [Mc]        | TONE_MARKER       | RIGHT_POSITION             | &#x1069; Sign Western Pwo Karen Tone 1|
|`U+106A`   | Mark [Mc]        | TONE_MARKER       | RIGHT_POSITION             | &#x106A; Sign Western Pwo Karen Tone 2|
|`U+106B`   | Mark [Mc]        | TONE_MARKER       | RIGHT_POSITION             | &#x106B; Sign Western Pwo Karen Tone 3|
|`U+106C`   | Mark [Mc]        | TONE_MARKER       | RIGHT_POSITION             | &#x106C; Sign Western Pwo Karen Tone 4|
|`U+106D`   | Mark [Mc]        | TONE_MARKER       | RIGHT_POSITION             | &#x106D; Sign Western Pwo Karen Tone 5|
|`U+106E`   | Letter           | CONSONANT         | _null_                     | &#x106E; Eastern Pwo Karen Nna|
|`U+106F`   | Letter           | CONSONANT         | _null_                     | &#x106F; Eastern Pwo Karen Ywa|
| | | | |
|`U+1070`   | Letter           | CONSONANT         | _null_                     | &#x1070; Eastern Pwo Karen Ghwa|
|`U+1071`   | Mark [Mn]        | VOWEL_DEPENDENT   | TOP_POSITION               | &#x1071; Sign Geba Karen I   |
|`U+1072`   | Mark [Mn]        | VOWEL_DEPENDENT   | TOP_POSITION               | &#x1072; Sign Kayah Oe       |
|`U+1073`   | Mark [Mn]        | VOWEL_DEPENDENT   | TOP_POSITION               | &#x1073; Sign Kayah U        |
|`U+1074`   | Mark [Mn]        | VOWEL_DEPENDENT   | TOP_POSITION               | &#x1074; Sign Kayah Ee       |
|`U+1075`   | Letter           | CONSONANT         | _null_                     | &#x1075; Shan Ka             |
|`U+1076`   | Letter           | CONSONANT         | _null_                     | &#x1076; Shan Kha            |
|`U+1077`   | Letter           | CONSONANT         | _null_                     | &#x1077; Shan Ga             |
|`U+1078`   | Letter           | CONSONANT         | _null_                     | &#x1078; Shan Ca             |
|`U+1079`   | Letter           | CONSONANT         | _null_                     | &#x1079; Shan Za             |
|`U+107A`   | Letter           | CONSONANT         | _null_                     | &#x107A; Shan Nya            |
|`U+107B`   | Letter           | CONSONANT         | _null_                     | &#x107B; Shan Da             |
|`U+107C`   | Letter           | CONSONANT         | _null_                     | &#x107C; Shan Na             |
|`U+107D`   | Letter           | CONSONANT         | _null_                     | &#x107D; Shan Pha            |
|`U+107E`   | Letter           | CONSONANT         | _null_                     | &#x107E; Shan Fa             |
|`U+107F`   | Letter           | CONSONANT         | _null_                     | &#x107F; Shan Ba             |
| | | | |
|`U+1080`   | Letter           | CONSONANT         | _null_                     | &#x1080; Shan Tha            |
|`U+1081`   | Letter           | CONSONANT         | _null_                     | &#x1081; Shan Ha             |
|`U+1082`   | Mark [Mn]        | CONSONANT_MEDIAL  | BOTTOM_POSITION            | &#x1082; Sign Shan Medial Wa |
|`U+1083`   | Mark [Mc]        | VOWEL_DEPENDENT   | RIGHT_POSITION             | &#x1083; Sign Shan Aa        |
|`U+1084`   | Mark [Mc]        | VOWEL_DEPENDENT   | LEFT_POSITION              | &#x1084; Sign Shan E         |
|`U+1085`   | Mark [Mn]        | VOWEL_DEPENDENT   | TOP_POSITION               | &#x1085; Sign Shan E Above   |
|`U+1086`   | Mark [Mn]        | VOWEL_DEPENDENT   | TOP_POSITION               | &#x1086; Sign Shan Final Y   |
|`U+1087`   | Mark [Mc]        | TONE_MARKER       | RIGHT_POSITION             | &#x1087; Sign Shan Tone 2    |
|`U+1088`   | Mark [Mc]        | TONE_MARKER       | RIGHT_POSITION             | &#x1088; Sign Shan Tone 3    |
|`U+1089`   | Mark [Mc]        | TONE_MARKER       | RIGHT_POSITION             | &#x1089; Sign Shan Tone 5    |
|`U+108A`   | Mark [Mc]        | TONE_MARKER       | RIGHT_POSITION             | &#x108A; Sign Shan Tone 6    |
|`U+108B`   | Mark [Mc]        | TONE_MARKER       | RIGHT_POSITION             | &#x108B; Sign Shan Council Tone 2|
|`U+108C`   | Mark [Mc]        | TONE_MARKER       | RIGHT_POSITION             | &#x108C; Sign Shan Council Tone 3|
|`U+108D`   | Mark [Mn]        | TONE_MARKER       | BOTTOM_POSITION            | &#x108D; Sign Shan Council Emphatic Tone|
|`U+108E`   | Letter           | CONSONANT         | _null_                     | &#x108E; Rumai Palaung Fa    |
|`U+108F`   | Mark [Mc]        | TONE_MARKER       | RIGHT_POSITION             | &#x108F; Sign Rumai Palaung Tone 5|
| | | | |
|`U+1090`   | Number           | NUMBER            | _null_                     | &#x1090; Shan Digit Zero     |
|`U+1091`   | Number           | NUMBER            | _null_                     | &#x1091; Shan Digit One      |
|`U+1092`   | Number           | NUMBER            | _null_                     | &#x1092; Shan Digit Two      |
|`U+1093`   | Number           | NUMBER            | _null_                     | &#x1093; Shan Digit Three    |
|`U+1094`   | Number           | NUMBER            | _null_                     | &#x1094; Shan Digit Four     |
|`U+1095`   | Number           | NUMBER            | _null_                     | &#x1095; Shan Digit Five     |
|`U+1096`   | Number           | NUMBER            | _null_                     | &#x1096; Shan Digit Six      |
|`U+1097`   | Number           | NUMBER            | _null_                     | &#x1097; Shan Digit Seven    |
|`U+1098`   | Number           | NUMBER            | _null_                     | &#x1098; Shan Digit Eight    |
|`U+1099`   | Number           | NUMBER            | _null_                     | &#x1099; Shan Digit Nine     |
|`U+109A`   | Mark [Mc]        | TONE_MARKER       | RIGHT_POSITION             | &#x109A; Sign Khamti Tone 1  |
|`U+109B`   | Mark [Mc]        | TONE_MARKER       | RIGHT_POSITION             | &#x109B; Sign Khamti Tone 3  |
|`U+109C`   | Mark [Mc]        | VOWEL_DEPENDENT   | RIGHT_POSITION             | &#x109C; Sign Aiton A        |
|`U+109D`   | Mark [Mn]        | VOWEL_DEPENDENT   | TOP_POSITION               | &#x109D; Sign Aiton Ai       |
|`U+109E`   | Symbol           | SYMBOL            | _null_                     | &#x109E; Shan One            |
|`U+109F`   | Symbol           | SYMBOL            | _null_                     | &#x109F; Shan Exclamation    |
:::


## Myanmar Extended character tables ##

### Myanmar Extended A character table ###


:::{table} Myanmar Extended-A character table

| Codepoint | Unicode category | Shaping class     | Mark-placement subclass    | Glyph                        |
|:----------|:-----------------|:------------------|:---------------------------|:-----------------------------|
|`U+AA60`   | Letter           | CONSONANT         | _null_                     | &#xAA60; Khamti Ga           |
|`U+AA61`   | Letter           | CONSONANT         | _null_                     | &#xAA61; Khamti Ca           |
|`U+AA62`   | Letter           | CONSONANT         | _null_                     | &#xAA62; Khamti Cha          |
|`U+AA63`   | Letter           | CONSONANT         | _null_                     | &#xAA63; Khamti Ja           |
|`U+AA64`   | Letter           | CONSONANT         | _null_                     | &#xAA64; Khamti Jha          |
|`U+AA65`   | Letter           | CONSONANT         | _null_                     | &#xAA65; Khamti Nya          |
|`U+AA66`   | Letter           | CONSONANT         | _null_                     | &#xAA66; Khamti Tta          |
|`U+AA67`   | Letter           | CONSONANT         | _null_                     | &#xAA67; Khamti Ttha         |
|`U+AA68`   | Letter           | CONSONANT         | _null_                     | &#xAA68; Khamti Dda          |
|`U+AA69`   | Letter           | CONSONANT         | _null_                     | &#xAA69; Khamti Ddha         |
|`U+AA6A`   | Letter           | CONSONANT         | _null_                     | &#xAA6A; Khamti Dha          |
|`U+AA6B`   | Letter           | CONSONANT         | _null_                     | &#xAA6B; Khamti Na           |
|`U+AA6C`   | Letter           | CONSONANT         | _null_                     | &#xAA6C; Khamti Sa           |
|`U+AA6D`   | Letter           | CONSONANT         | _null_                     | &#xAA6D; Khamti Ha           |
|`U+AA6E`   | Letter           | CONSONANT         | _null_                     | &#xAA6E; Khamti Hha          |
|`U+AA6F`   | Letter           | CONSONANT         | _null_                     | &#xAA6F; Khamti Fa           |
| | | | |
|`U+AA70`   | Letter           | _null_            | _null_                     | &#xAA70; Khamti Reduplication|
|`U+AA71`   | Letter           | CONSONANT         | _null_                     | &#xAA71; Khamti Xa           |
|`U+AA72`   | Letter           | CONSONANT         | _null_                     | &#xAA72; Khamti Za           |
|`U+AA73`   | Letter           | CONSONANT         | _null_                     | &#xAA73; Khamti Ra           |
|`U+AA74`   | Letter           | CONSONANT_PLACEHOLDER| _null_                  | &#xAA74; Khamti Oay          |
|`U+AA75`   | Letter           | CONSONANT_PLACEHOLDER| _null_                  | &#xAA75; Khamti Qn           |
|`U+AA76`   | Letter           | CONSONANT_PLACEHOLDER| _null_                  | &#xAA76; Khamti Hm           |
|`U+AA77`   | Symbol           | SYMBOL            | _null_                     | &#xAA77; Khamti Aiton Exclamation|
|`U+AA78`   | Symbol           | SYMBOL            | _null_                     | &#xAA78; Khamti Aiton One    |
|`U+AA79`   | Symbol           | SYMBOL            | _null_                     | &#xAA79; Khamti Aiton Two    |
|`U+AA7A`   | Letter           | CONSONANT         | _null_                     | &#xAA7A; Khamti Aiton Ra     |
|`U+AA7B`   | Mark [Mc]        | TONE_MARKER       | RIGHT_POSITION             | &#xAA7B; Sign Pao Karen Tone |
|`U+AA7C`   | Mark [Mn]        | TONE_MARKER       | TOP_POSITION               | &#xAA7C; Sign Tai Laing Tone 2|
|`U+AA7D`   | Mark [Mc]        | TONE_MARKER       | RIGHT_POSITION             | &#xAA7D; Sign Tai Laing Tone 5|
|`U+AA7E`   | Letter           | CONSONANT         | _null_                     | &#xAA7E; Shwe Palaung Cha    |
|`U+AA7F`   | Letter           | CONSONANT         | _null_                     | &#xAA7F; Shwe Palaung Sha    |
:::


### Myanmar Extended B character table ###


:::{table} Myanmar Extended-B character table

| Codepoint | Unicode category | Shaping class     | Mark-placement subclass    | Glyph                        |
|:----------|:-----------------|:------------------|:---------------------------|:-----------------------------|
|`U+A9E0`   | Letter           | CONSONANT         | _null_                     | &#xA9E0; Shan Gha            |
|`U+A9E1`   | Letter           | CONSONANT         | _null_                     | &#xA9E1; Shan Cha            |
|`U+A9E2`   | Letter           | CONSONANT         | _null_                     | &#xA9E2; Shan Jha            |
|`U+A9E3`   | Letter           | CONSONANT         | _null_                     | &#xA9E3; Shan Nna            |
|`U+A9E4`   | Letter           | CONSONANT         | _null_                     | &#xA9E4; Shan Bha            |
|`U+A9E5`   | Mark [Mn]        | VOWEL_DEPENDENT   | TOP_POSITION               | &#xA9E5; Sign Shan Saw       |
|`U+A9E6`   | Letter           | _null_            | _null_                     | &#xA9E6; Shan Reduplication  |
|`U+A9E7`   | Letter           | CONSONANT         | _null_                     | &#xA9E7; Tai Laing Nya       |
|`U+A9E8`   | Letter           | CONSONANT         | _null_                     | &#xA9E8; Tai Laing Fa        |
|`U+A9E9`   | Letter           | CONSONANT         | _null_                     | &#xA9E9; Tai Laing Ga        |
|`U+A9EA`   | Letter           | CONSONANT         | _null_                     | &#xA9EA; Tai Laing Gha       |
|`U+A9EB`   | Letter           | CONSONANT         | _null_                     | &#xA9EB; Tai Laing Ja        |
|`U+A9EC`   | Letter           | CONSONANT         | _null_                     | &#xA9EC; Tai Laing Jha       |
|`U+A9ED`   | Letter           | CONSONANT         | _null_                     | &#xA9ED; Tai Laing Dda       |
|`U+A9EE`   | Letter           | CONSONANT         | _null_                     | &#xA9EE; Tai Laing Ddha      |
|`U+A9EF`   | Letter           | CONSONANT         | _null_                     | &#xA9EF; Tai Laing Nna       |
| | | | |
|`U+A9F0`   | Number           | NUMBER            | _null_                     | &#xA9F0; Tai Laing Digit Zero|
|`U+A9F1`   | Number           | NUMBER            | _null_                     | &#xA9F1; Tai Laing Digit One |
|`U+A9F2`   | Number           | NUMBER            | _null_                     | &#xA9F2; Tai Laing Digit Two |
|`U+A9F3`   | Number           | NUMBER            | _null_                     | &#xA9F3; Tai Laing Digit Three|
|`U+A9F4`   | Number           | NUMBER            | _null_                     | &#xA9F4; Tai Laing Digit Four|
|`U+A9F5`   | Number           | NUMBER            | _null_                     | &#xA9F5; Tai Laing Digit Five|
|`U+A9F6`   | Number           | NUMBER            | _null_                     | &#xA9F6; Tai Laing Digit Six |
|`U+A9F7`   | Number           | NUMBER            | _null_                     | &#xA9F7; Tai Laing Digit Seven|
|`U+A9F8`   | Number           | NUMBER            | _null_                     | &#xA9F8; Tai Laing Digit Eight|
|`U+A9F9`   | Number           | NUMBER            | _null_                     | &#xA9F9; Tai Laing Digit Nine|
|`U+A9FA`   | Letter           | CONSONANT         | _null_                     | &#xA9FA; Tai Laing Lla       |
|`U+A9FB`   | Letter           | CONSONANT         | _null_                     | &#xA9FB; Tai Laing Da        |
|`U+A9FC`   | Letter           | CONSONANT         | _null_                     | &#xA9FC; Tai Laing Dha       |
|`U+A9FD`   | Letter           | CONSONANT         | _null_                     | &#xA9FD; Tai Laing Ba        |
|`U+A9FE`   | Letter           | CONSONANT         | _null_                     | &#xA9FE; Tai Laing Bha       |
|`U+A9FF`   | _unassigned_     |                   |                            |                              |
:::


### Myanmar Extended C character table ###


:::{table} Myanmar Extended-C character table

| Codepoint | Unicode category | Shaping class     | Mark-placement subclass    | Glyph                        |
|:----------|:-----------------|:------------------|:---------------------------|:-----------------------------|
|`U+116D0`  | Number           | NUMBER            | _null_                     | &#x116D0; Pao Digit Zero     |
|`U+116D1`  | Number           | NUMBER            | _null_                     | &#x116D1; Pao Digit One      |
|`U+116D2`  | Number           | NUMBER            | _null_                     | &#x116D2; Pao Digit Two      |
|`U+116D3`  | Number           | NUMBER            | _null_                     | &#x116D3; Pao Digit Three    |
|`U+116D4`  | Number           | NUMBER            | _null_                     | &#x116D4; Pao Digit Four     |
|`U+116D5`  | Number           | NUMBER            | _null_                     | &#x116D5; Pao Digit Five     |
|`U+116D6`  | Number           | NUMBER            | _null_                     | &#x116D6; Pao Digit Six      |
|`U+116D7`  | Number           | NUMBER            | _null_                     | &#x116D7; Pao Digit Seven    |
|`U+116D8`  | Number           | NUMBER            | _null_                     | &#x116D8; Pao Digit Eight    |
|`U+116D9`  | Number           | NUMBER            | _null_                     | &#x116D9; Pao Digit Nine     |
|`U+116DA`  | Number           | NUMBER            | _null_                     | &#x116DA; Pao Digit Zero     |
|`U+116DB`  | Number           | NUMBER            | _null_                     | &#x116DB; Eastern Pwo Karen Digit One|
|`U+116DC`  | Number           | NUMBER            | _null_                     | &#x116DC; Eastern Pwo Karen Digit Two|
|`U+116DD`  | Number           | NUMBER            | _null_                     | &#x116DD; Eastern Pwo Karen Digit Three|
|`U+116DE`  | Number           | NUMBER            | _null_                     | &#x116DE; Eastern Pwo Karen Digit Four|
|`U+116DF`  | Number           | NUMBER            | _null_                     | &#x116DF; Eastern Pwo Karen Digit Five|
| | | | |
|`U+116E0`  | Number           | NUMBER            | _null_                     | &#x116D0; Eastern Pwo Karen Digit Six|
|`U+116E1`  | Number           | NUMBER            | _null_                     | &#x116D1; Eastern Pwo Karen Digit Seven|
|`U+116E2`  | Number           | NUMBER            | _null_                     | &#x116D2; Eastern Pwo Karen Digit Eight|
|`U+116E3`  | Number           | NUMBER            | _null_                     | &#x116D3; Eastern Pwo Karen Digit Nine|
|`U+116E4`  | _unassigned_     |                   |                            |                              |
|`U+116E5`  | _unassigned_     |                   |                            |                              |
|`U+116E6`  | _unassigned_     |                   |                            |                              |
|`U+116E7`  | _unassigned_     |                   |                            |                              |
|`U+116E8`  | _unassigned_     |                   |                            |                              |
|`U+116E9`  | _unassigned_     |                   |                            |                              |
|`U+116EA`  | _unassigned_     |                   |                            |                              |
|`U+116EB`  | _unassigned_     |                   |                            |                              |
|`U+116EC`  | _unassigned_     |                   |                            |                              |
|`U+116ED`  | _unassigned_     |                   |                            |                              |
|`U+116EE`  | _unassigned_     |                   |                            |                              |
|`U+116EF`  | _unassigned_     |                   |                            |                              |
| | | | |
|`U+116F0`  | _unassigned_     |                   |                            |                              |
|`U+116F1`  | _unassigned_     |                   |                            |                              |
|`U+116F2`  | _unassigned_     |                   |                            |                              |
|`U+116F3`  | _unassigned_     |                   |                            |                              |
|`U+116F4`  | _unassigned_     |                   |                            |                              |
|`U+116F5`  | _unassigned_     |                   |                            |                              |
|`U+116F6`  | _unassigned_     |                   |                            |                              |
|`U+116F7`  | _unassigned_     |                   |                            |                              |
|`U+116F8`  | _unassigned_     |                   |                            |                              |
|`U+116F9`  | _unassigned_     |                   |                            |                              |
|`U+116FA`  | _unassigned_     |                   |                            |                              |
|`U+116FB`  | _unassigned_     |                   |                            |                              |
|`U+116FC`  | _unassigned_     |                   |                            |                              |
|`U+116FD`  | _unassigned_     |                   |                            |                              |
|`U+116FE`  | _unassigned_     |                   |                            |                              |
|`U+116FF`  | _unassigned_     |                   |                            |                              |
:::


## Vedic Extensions character table ##

Sanskrit runs written in the Myanmar script may also include
characters from the Vedic Extensions block. These characters should be
classified as follows.

> Note: See the [Vedic Extensions](../opentype-shaping-vedic-extensions.md) 
> document for additional information.


:::{table} Vedic Extensions character table

| Codepoint | Unicode category | Shaping class     | Mark-placement subclass    | Glyph                        |
|:----------|:-----------------|:------------------|:---------------------------|:-----------------------------|
|`U+1CD0`   | Mark [Mn]        | CANTILLATION      | TOP_POSITION               | &#x1CD0; Tone Karshana       |
|`U+1CD1`   | Mark [Mn]        | CANTILLATION      | TOP_POSITION               | &#x1CD1; Tone Shara          |
|`U+1CD2`   | Mark [Mn]        | CANTILLATION      | TOP_POSITION               | &#x1CD2; Tone Prenkha        |
|`U+1CD3`   | Punctuation      | _null_            | _null_                     | &#x1CD3; Sign Nihshvasa      |
|`U+1CD4`   | Mark [Mn]        | CANTILLATION      | OVERSTRUCK                 | &#x1CD4; Tone Midline Svarita |
|`U+1CD5`   | Mark [Mn]        | CANTILLATION      | BOTTOM_POSITION            | &#x1CD5; Tone Aggravated Independent Svarita |
|`U+1CD6`   | Mark [Mn]        | CANTILLATION      | BOTTOM_POSITION            | &#x1CD6; Tone Independent Svarita |
|`U+1CD7`   | Mark [Mn]        | CANTILLATION      | BOTTOM_POSITION            | &#x1CD7; Tone Kathaka Independent Svarita |
|`U+1CD8`   | Mark [Mn]        | CANTILLATION      | BOTTOM_POSITION            | &#x1CD8; Tone Candra Below   |
|`U+1CD9`   | Mark [Mn]        | CANTILLATION      | BOTTOM_POSITION            | &#x1CD9; Tone Kathaka Independent Svarita Schroeder |
|`U+1CDA`   | Mark [Mn]        | CANTILLATION      | TOP_POSITION               | &#x1CDA; Tone Double Svarita |
|`U+1CDB`   | Mark [Mn]        | CANTILLATION      | TOP_POSITION               | &#x1CDB; Tone Triple Svarita |
|`U+1CDC`   | Mark [Mn]        | CANTILLATION      | BOTTOM_POSITION            | &#x1CDC; Tone Kathaka Anudatta |
|`U+1CDD`   | Mark [Mn]        | CANTILLATION      | BOTTOM_POSITION            | &#x1CDD; Tone Dot Below      |
|`U+1CDE`   | Mark [Mn]        | CANTILLATION      | BOTTOM_POSITION            | &#x1CDE; Tone Two Dots Below |
|`U+1CDF`   | Mark [Mn]        | CANTILLATION      | BOTTOM_POSITION            | &#x1CDF; Tone Three Dots Below |
| | | | |																		
|`U+1CE0`   | Mark [Mn]        | CANTILLATION      | TOP_POSITION               | &#x1CE0; Tone Rigvedic Kashmiri Independent Svarita |
|`U+1CE1`   | Mark [Mc]        | CANTILLATION      | RIGHT_POSITION             | &#x1CE1; Tone Atharavedic Independent Svarita |
|`U+1CE2`   | Mark [Mn]        | AVAGRAHA          | OVERSTRUCK                 | &#x1CE2; Sign Visarga Svarita |
|`U+1CE3`   | Mark [Mn]        | _null_            | OVERSTRUCK                 | &#x1CE3; Sign Visarga Udatta |
|`U+1CE4`   | Mark [Mn]        | _null_            | OVERSTRUCK                 | &#x1CE4; Sign Reversed Visarga Udatta |
|`U+1CE5`   | Mark [Mn]        | _null_            | OVERSTRUCK                 | &#x1CE5; Sign Visarga Anudatta |
|`U+1CE6`   | Mark [Mn]        | _null_            | OVERSTRUCK                 | &#x1CE6; Sign Reversed Visarga Anudatta |
|`U+1CE7`   | Mark [Mn]        | _null_            | OVERSTRUCK                 | &#x1CE7; Sign Visarga Udatta With Tail |
|`U+1CE8`   | Mark [Mn]        | AVAGRAHA          | OVERSTRUCK                 | &#x1CE8; Sign Visarga Anudatta With Tail |
|`U+1CE9`   | Letter           | SYMBOL            | _null_                     | &#x1CE9; Sign Anusvara Antargomukha |
|`U+1CEA`   | Letter           | _null_            | _null_                     | &#x1CEA; Sign Anusvara Bahirgomukha |
|`U+1CEB`   | Letter           | _null_            | _null_                     | &#x1CEB; Sign Anusvara Vamagomukha |
|`U+1CEC`   | Letter           | SYMBOL            | _null_                     | &#x1CEC; Sign Anusvara Vamagomukha With Tail |
|`U+1CED`   | Mark [Mn]        | AVAGRAHA          | BOTTOM_POSITION            | &#x1CED; Sign Tiryak         |
|`U+1CEE`   | Letter           | SYMBOL            | _null_                     | &#x1CEE; Sign Hexiform Long Anusvara |
|`U+1CEF`   | Letter           | _null_            | _null_                     | &#x1CEF; Sign Long Anusvara  |
| | | | |																		
|`U+1CF0`   | Letter           | _null_            | _null_                     | &#x1CF0; Sign Rthang Long Anusvara |
|`U+1CF2`   | Letter           | CONSONANT_DEAD    | _null_                     | &#x1CF2; Sign Ardhavisarga   |
|`U+1CF3`   | Letter           | CONSONANT_DEAD    | _null_                     | &#x1CF3; Sign Rotated Ardhavisarga |
|`U+1CF3`   | Mark [Mc]        | VISARGA           | _null_                     | &#x1CF3; Sign Rotated Ardhavisarga |
|`U+1CF4`   | Mark [Mn]        | CANTILLATION      | TOP_POSITION               | &#x1CF4; Tone Candra Above   |
|`U+1CF5`   | Letter           | CONSONANT_WITH_STACKER | _null_                | &#x1CF5; Sign Jihvamuliya    |
|`U+1CF6`   | Letter           | CONSONANT_WITH_STACKER | _null_                | &#x1CF6; Sign Upadhmaniya    |
|`U+1CF7`   | Mark [Mc]        | _null_            | _null_                     | &#x1CF7; Sign Atikrama       |
|`U+1CF8`   | Mark [Mn]        | CANTILLATION      | _null_                     | &#x1CF8; Tone Ring Above     |
|`U+1CF9`   | Mark [Mn]        | CANTILLATION      | _null_                     | &#x1CF9; Tone Double Ring Above |
|`U+1CFA`   | Letter           | PLACEHOLDER       | _null_                     | &#x1CFA; Sign Double Anusvara Antargomukha |
|`U+1CFB`   | _unassigned_     |                   |                            |                              |
|`U+1CFC`   | _unassigned_     |                   |                            |                              |
|`U+1CFD`   | _unassigned_     |                   |                            |                              |
|`U+1CFE`   | _unassigned_     |                   |                            |                              |
|`U+1CFF`   | _unassigned_     |                   |                            |                              |
:::


## Miscellaneous character table ##

Other important characters that may be encountered when shaping runs
of Myanmar text include the dotted-circle placeholder (`U+25CC`), the
zero-width joiner (`U+200D`) and zero-width non-joiner (`U+200C`), and
the no-break space (`U+00A0`).

The dotted-circle placeholder is frequently used when displaying a
dependent vowel (matra) or a combining mark in isolation. Real-world
text syllables may also use other characters, such as hyphens or dashes,
in a similar placeholder fashion; shaping engines should cope with
this situation gracefully.


:::{table} Miscellaneous character table

| Codepoint | Unicode category | Shaping class     | Mark-placement subclass    | Glyph                          |
|:----------|:-----------------|:------------------|:---------------------------|:-------------------------------|
|`U+00A0`   | Separator        | PLACEHOLDER       | _null_                     | &#x00A0; No-break space        |
|`U+200C`   | Other            | NON_JOINER        | _null_                     | &#x200C; Zero-width non-joiner |
|`U+200D`   | Other            | JOINER            | _null_                     | &#x200D; Zero-width joiner     |
|`U+2010`   | Punctuation      | PLACEHOLDER       | _null_                     | &#x2010; Hyphen                |
|`U+2011`   | Punctuation      | PLACEHOLDER       | _null_                     | &#x2011; No-break hyphen       |
|`U+2012`   | Punctuation      | PLACEHOLDER       | _null_                     | &#x2012; Figure dash           |
|`U+2013`   | Punctuation      | PLACEHOLDER       | _null_                     | &#x2013; En dash               |
|`U+2014`   | Punctuation      | PLACEHOLDER       | _null_                     | &#x2014; Em dash               |
|`U+25CC`   | Symbol           | DOTTED_CIRCLE     | _null_                     | &#x25CC; Dotted circle         |
:::


The zero-width joiner (<abbr>ZWJ</abbr>) is primarily used to prevent the formation of a conjunct
from a "_Consonant_,Halant,_Consonant_" sequence. The sequence
"_Consonant_,Halant,ZWJ,_Consonant_" blocks the formation of a
conjunct between the two consonants. 

Note, however, that the "_Consonant_,Halant" subsequence in the above
example may still trigger a half-forms feature. To prevent the
application of the half-forms feature in addition to preventing the
conjunct, the zero-width non-joiner (<abbr>ZWNJ</abbr>) must be used instead. The sequence
"_Consonant_,Halant,ZWNJ,_Consonant_" should produce the first
consonant in its standard form, followed by an explicit "Halant".

A secondary usage of the zero-width joiner is to prevent the formation of
"Reph". An initial "Ra,Halant,ZWJ" sequence should not produce a "Reph",
where an initial "Ra,Halant" sequence without the zero-width joiner
otherwise would.

The no-break space (<abbr>NBSP</abbr>) is primarily used to display
those codepoints that are defined as non-spacing (marks, dependent
vowels (matras), below-base consonant forms, and post-base consonant
forms) in an isolated context, as an alternative to displaying them
superimposed on the dotted-circle placeholder. These sequences will
match "NBSP,ZWJ,Halant,_Consonant_", "NBSP,_mark_", or "NBSP,_matra_".


================================================
FILE: character-tables/character-tables-nko.md
================================================
# N'Ko character tables #

This document lists the per-character shaping information needed to
[shape N'Ko text](../opentype-shaping-nko.md).

**Contents**

  - [NKo character table](#nko-character-table)
  - [Miscellaneous character table](#miscellaneous-character-table)


## NKo character table ##

N'Ko glyphs should be classified as in the following
table. Codepoints in the NKo block with no assigned meaning are
designated as _unassigned_ in the _Unicode category_ column.

The _Joining type_ column indicates whether each codepoint is defined
as joining with adjacent characters on the left side, right side, left
and right sides ("DUAL"), or neither side ("NON_JOINING"). Codepoints
designated TRANSPARENT in the _Joining type_ column do not join with
adjacent characters and, in addition, do not affect the joining
behavior of surrounding characters. Non-spacing marks are of type
TRANSPARENT. Codepoints designated JOIN_CAUSING force adjacent
characters to join.

The _Joining group_ column lists the fundamental letter that the
listed codepoint behaves like for joining purposes.

Assigned codepoints with a _null_ in the _Joining group_
column evoke no special behavior from the shaping engine during the
join-computation stage.

> Note: No codepoints in the NKo block are assigned a non-null _Joining group_.

The _Mark class_ column indicates the Canonical Combining Class
for the codepoint.  Marks are assigned non-zero combining classes so
that sequences of adjacent marks can be reordered as required by the
orthography. 


:::{table} N'Ko character table

| Codepoint | Unicode category | Joining type | Joining group        | Mark class | Glyph                                         |
|:----------|:-----------------|:-------------|:---------------------|:-----------|-----------------------------------------------|
|`U+07C0`   | Number           | NON_JOINING  | _null_               | _0_        | &#x07C0; Digit Zero                           |
|`U+07C1`   | Number           | NON_JOINING  | _null_               | _0_        | &#x07C1; Digit One                            |
|`U+07C2`   | Number           | NON_JOINING  | _null_               | _0_        | &#x07C2; Digit Two                            |
|`U+07C3`   | Number           | NON_JOINING  | _null_               | _0_        | &#x07C3; Digit Three                          |
|`U+07C4`   | Number           | NON_JOINING  | _null_               | _0_        | &#x07C4; Digit Four                           |
|`U+07C5`   | Number           | NON_JOINING  | _null_               | _0_        | &#x07C5; Digit Five                           |
|`U+07C6`   | Number           | NON_JOINING  | _null_               | _0_        | &#x07C6; Digit Six                            |
|`U+07C7`   | Number           | NON_JOINING  | _null_               | _0_        | &#x07C7; Digit Seven                          |
|`U+07C8`   | Number           | NON_JOINING  | _null_               | _0_        | &#x07C8; Digit Eight                          |
|`U+07C9`   | Number           | NON_JOINING  | _null_               | _0_        | &#x07C9; Digit Nine                           |
|`U+07CA`   | Letter           | DUAL         | _null_               | _0_        | &#x07CA; A                                    |
|`U+07CB`   | Letter           | DUAL         | _null_               | _0_        | &#x07CB; Ee                                   |
|`U+07CC`   | Letter           | DUAL         | _null_               | _0_        | &#x07CC; I                                    |
|`U+07CD`   | Letter           | DUAL         | _null_               | _0_        | &#x07CD; E                                    |
|`U+07CE`   | Letter           | DUAL         | _null_               | _0_        | &#x07CE; U                                    |
|`U+07CF`   | Letter           | DUAL         | _null_               | _0_        | &#x07CF; Oo                                   |
| | | | | |                                                                                                      			      
|`U+07D0`   | Letter           | DUAL         | _null_               | _0_        | &#x07D0; O                                    |
|`U+07D1`   | Letter           | DUAL         | _null_               | _0_        | &#x07D1; Dagbasinna                           |
|`U+07D2`   | Letter           | DUAL         | _null_               | _0_        | &#x07D2; N                                    |
|`U+07D3`   | Letter           | DUAL         | _null_               | _0_        | &#x07D3; Ba                                   |
|`U+07D4`   | Letter           | DUAL         | _null_               | _0_        | &#x07D4; Pa                                   |
|`U+07D5`   | Letter           | DUAL         | _null_               | _0_        | &#x07D5; Ta                                   |
|`U+07D6`   | Letter           | DUAL         | _null_               | _0_        | &#x07D6; Ja                                   |
|`U+07D7`   | Letter           | DUAL         | _null_               | _0_        | &#x07D7; Cha                                  |
|`U+07D8`   | Letter           | DUAL         | _null_               | _0_        | &#x07D8; Da                                   |
|`U+07D9`   | Letter           | DUAL         | _null_               | _0_        | &#x07D9; Ra                                   |
|`U+07DA`   | Letter           | DUAL         | _null_               | _0_        | &#x07DA; Rra                                  |
|`U+07DB`   | Letter           | DUAL         | _null_               | _0_        | &#x07DB; Sa                                   |
|`U+07DC`   | Letter           | DUAL         | _null_               | _0_        | &#x07DC; Gba                                  |
|`U+07DD`   | Letter           | DUAL         | _null_               | _0_        | &#x07DD; Fa                                   |
|`U+07DE`   | Letter           | DUAL         | _null_               | _0_        | &#x07DE; Ka                                   |
|`U+07DF`   | Letter           | DUAL         | _null_               | _0_        | &#x07DF; La                                   |
| | | | | |                                                                                                      			      
|`U+07E0`   | Letter           | DUAL         | _null_               | _0_        | &#x07E0; Na Woloso                            |
|`U+07E1`   | Letter           | DUAL         | _null_               | _0_        | &#x07E1; Ma                                   |
|`U+07E2`   | Letter           | DUAL         | _null_               | _0_        | &#x07E2; Nya                                  |
|`U+07E3`   | Letter           | DUAL         | _null_               | _0_        | &#x07E3; Na                                   |
|`U+07E4`   | Letter           | DUAL         | _null_               | _0_        | &#x07E4; Ha                                   |
|`U+07E5`   | Letter           | DUAL         | _null_               | _0_        | &#x07E5; Wa                                   |
|`U+07E6`   | Letter           | DUAL         | _null_               | _0_        | &#x07E6; Ya                                   |
|`U+07E7`   | Letter           | DUAL         | _null_               | _0_        | &#x07E7; Nya Woloso                           |
|`U+07E8`   | Letter           | DUAL         | _null_               | _0_        | &#x07E8; Jona Ja                              |
|`U+07E9`   | Letter           | DUAL         | _null_               | _0_        | &#x07E9; Jona Cha                             |
|`U+07EA`   | Letter           | DUAL         | _null_               | _0_        | &#x07EA; Jona Ra                              |
|`U+07EB`   | Mark [Mn]        | TRANSPARENT  | _null_               | 230        | &#x07EB; Combining  Short High Tone           |
|`U+07EC`   | Mark [Mn]        | TRANSPARENT  | _null_               | 230        | &#x07EC; Combining  Short Low Tone            |
|`U+07ED`   | Mark [Mn]        | TRANSPARENT  | _null_               | 230        | &#x07ED; Combining  Short Rising Tone         |
|`U+07EE`   | Mark [Mn]        | TRANSPARENT  | _null_               | 230        | &#x07EE; Combining  Long Descending Tone      |
|`U+07EF`   | Mark [Mn]        | TRANSPARENT  | _null_               | 230        | &#x07EF; Combining  Long High Tone            |
| | | | | |                                                                                                     			      
|`U+07F0`   | Mark [Mn]        | TRANSPARENT  | _null_               | 230        | &#x07F0; Combining  Long Low Tone             |
|`U+07F1`   | Mark [Mn]        | TRANSPARENT  | _null_               | 230        | &#x07F1; Combining  Long Rising Tone          |
|`U+07F2`   | Mark [Mn]        | TRANSPARENT  | _null_               | 220        | &#x07F2; Combining  Nasalization Mark         |
|`U+07F3`   | Mark [Mn]        | TRANSPARENT  | _null_               | 230        | &#x07F3; Combining  Double Dot Above          |
|`U+07F4`   | Letter modifier  | NON_JOINING  | _null_               | _0_        | &#x07F4; High Tone Apostrophe                 |
|`U+07F5`   | Letter modifier  | NON_JOINING  | _null_               | _0_        | &#x07F5; Low Tone Apostrophe                  |
|`U+07F6`   | Symbol           | NON_JOINING  | _null_               | _0_        | &#x07F6; Symbol Oo Dennen                     |
|`U+07F7`   | Symbol           | NON_JOINING  | _null_               | _0_        | &#x07F7; Symbol Gbakurunen                    |
|`U+07F8`   | Punctuation      | NON_JOINING  | _null_               | _0_        | &#x07F8; Comma                                |
|`U+07F9`   | Punctuation      | NON_JOINING  | _null_               | _0_        | &#x07F9; Exclamation Mark                     |
|`U+07FA`   | Letter modifier  | JOIN_CAUSING | _null_               | _0_        | &#x07FA; Lajanyalan                           |
|`U+07FB`   | _unassigned_     |              |                      |            |                                               |
|`U+07FC`   | _unassigned_     |              |                      |            |                                               |
|`U+07FD`   | Mark [Mn]        | TRANSPARENT  | _null_               | 220        | &#x07FD; Dantalayan                           |
|`U+07FE`   | Symbol           | NON_JOINING  | _null_               | _0_        | &#x07FE; Dorome Sign                          |
|`U+07FF`   | Symbol           | NON_JOINING  | _null_               | _0_        | &#x07FF; Taman Sign                           |
:::


## Miscellaneous character table ##

Other important characters that may be encountered when shaping runs
of N'Ko text include the dotted-circle placeholder (`U+25CC`), the
combining grapheme joiner (`U+034F`), the zero-width joiner (`U+200D`)
and zero-width non-joiner (`U+200C`), the left-to-right text marker
(`U+200E`) and right-to-left text marker (`U+200F`), and the no-break
space (`U+00A0`).

The dotted-circle placeholder is frequently used when displaying a
combining mark in isolation. Real-world text syllables may also use
other characters, such as hyphens or dashes, in a similar placeholder
fashion; shaping engines should cope with this situation gracefully.

:::{table} Miscellaneous character table

| Codepoint | Unicode category | Joining type | Joining group        | Mark class | Glyph                          |
|:----------|:-----------------|:-------------|:---------------------|:-----------|--------------------------------|
|`U+00A0`   | Separator        | NON_JOINING  | _null_               | _0_        | &#x00A0; No-break space        |
|`U+034F`   | Other            | NON_JOINING  | _null_               | _0_        | &#x034F; Combining grapheme joiner |
|`U+200C`   | Other            | NON_JOINING  | _null_               | _0_        | &#x200C; Zero-width non-joiner |
|`U+200D`   | Other            | JOIN_CAUSING | _null_               | _0_        | &#x200D; Zero-width joiner     |
|`U+200E`   | Other            | NON_JOINING  | _null_               | _0_        | &#x200E; Left-to-Right marker  |
|`U+200F`   | Other            | NON_JOINING  | _null_               | _0_        | &#x200F; Right-to-Left marker  |
|`U+2010`   | Punctuation      | NON_JOINING  | _null_               | _0_        | &#x2010; Hyphen                |
|`U+2011`   | Punctuation      | NON_JOINING  | _null_               | _0_        | &#x2011; No-break hyphen       |
|`U+2012`   | Punctuation      | NON_JOINING  | _null_               | _0_        | &#x2012; Figure dash           |
|`U+2013`   | Punctuation      | NON_JOINING  | _null_               | _0_        | &#x2013; En dash               |
|`U+2014`   | Punctuation      | NON_JOINING  | _null_               | _0_        | &#x2014; Em dash               |
|`U+25CC`   | Symbol           | NON_JOINING  | _null_               | _0_        | &#x25CC; Dotted circle         |
:::


The combining grapheme joiner (<abbr>CGJ</abbr>) is primarily used to alter the
order in which adjacent marks are positioned during the
mark-reordering stage, in order to adhere to the needs of a
non-default language orthography.
<!--- combining grapheme joiner explanation --->

The zero-width joiner (<abbr>ZWJ</abbr>) is primarily used to force the usage of the
cursive connecting form of a letter even when the context of the
adjoining letters would not trigger the connecting form. 

For example, to show the initial form of a letter in isolation (such
as for displaying it in a table of forms), the sequence "_Letter_,ZWJ"
would be used. To show the medial form of a letter in isolation, the
sequence "ZWJ,_Letter_,ZWJ" would be used.


<!--- Zero-Width Non Joiner explanation --->

The right-to-left mark (<abbr>RLM</abbr>) and left-to-right mark (<abbr>LRM</abbr>) are used by
the Unicode bidirectionality algorithm (BiDi) to indicate the points
in a text run at which the writing direction changes.


<!--- How shaping is affected by the <abbr title="Left-To-Right">LTR</abbr> and <abbr title="Right-To-Left">RTL</abbr> markers explanation --->


The no-break space is primarily used to display those codepoints that
are defined as non-spacing (such as vowel or diacritical marks and "Hamza") in an
isolated context, as an alternative to displaying them superimposed on
the dotted-circle placeholder.


================================================
FILE: character-tables/character-tables-oriya.md
================================================
# Oriya character tables #

This document lists the per-character shaping information needed to
[shape Oriya text](../opentype-shaping-oriya.md).

**Contents**

  - [Oriya character table](#oriya-character-table)
  - [Vedic Extensions character table](#vedic-extensions-character-table)
  - [Miscellaneous character table](#miscellaneous-character-table)
	  

## Oriya character table ##

Oriya glyphs should be classified as in the following
table. Codepoints in the Oriya block with no assigned meaning are
designated as _unassigned_ in the _Unicode category_ column. 

Assigned codepoints with a _null_ in the _Shaping class_
column evoke no special behavior from the shaping engine. Note that
this does include some valid codepoints, such as currency marks,
punctuation, and other symbols.

> Note: the `NUMBER` and `SYMBOL` _Shaping classes_ are important
> during syllable identification, but generally evoke no further
> special behavior during the rest of the shaping process. 

The _Mark-placement subclass_ column indicates mark-placement
positioning for codepoints in the _Mark_ category. Assigned, non-mark
codepoints have a _null_ in this column and evoke no special
mark-placement behavior. Marks tagged with [Mn] in the _Unicode
category_ column are categorized as non-spacing; marks tagged with
[Mc] are categorized as spacing-combining.

Some codepoints in the following table use a _Shaping class_ that
differs from the codepoint's Unicode _General Category_. The _Shaping
class_ takes precedence during OpenType shaping, as it captures more
specific, script-aware behavior.


:::{table} Oriya character table

| Codepoint | Unicode category | Shaping class     | Mark-placement subclass    | Glyph                        |
|:----------|:-----------------|:------------------|:---------------------------|:-----------------------------|
|`U+0B00`   | _unassigned_     |                   |                            |                              |
|`U+0B01`   | Mark [Mn]        | BINDU             | TOP_POSITION               | &#x0B01; Candrabindu         |
|`U+0B02`   | Mark [Mc]        | BINDU             | RIGHT_POSITION             | &#x0B02; Anusvara            |
|`U+0B03`   | Mark [Mc]        | VISARGA           | RIGHT_POSITION             | &#x0B03; Visarga             |
|`U+0B04`   | _unassigned_     |                   |                            |                              |
|`U+0B05`   | Letter           | VOWEL_INDEPENDENT | _null_                     | &#x0B05; A                   |
|`U+0B06`   | Letter           | VOWEL_INDEPENDENT | _null_                     | &#x0B06; Aa                  |
|`U+0B07`   | Letter           | VOWEL_INDEPENDENT | _null_                     | &#x0B07; I                   |
|`U+0B08`   | Letter           | VOWEL_INDEPENDENT | _null_                     | &#x0B08; Ii                  |
|`U+0B09`   | Letter           | VOWEL_INDEPENDENT | _null_                     | &#x0B09; U                   |
|`U+0B0A`   | Letter           | VOWEL_INDEPENDENT | _null_                     | &#x0B0A; Uu                  |
|`U+0B0B`   | Letter           | VOWEL_INDEPENDENT | _null_                     | &#x0B0B; Vocalic R           |
|`U+0B0C`   | Letter           | VOWEL_INDEPENDENT | _null_                     | &#x0B0C; Vocalic L           |
|`U+0B0D`   | _unassigned_     |                   |                            |                              |
|`U+0B0E`   | _unassigned_     |                   |                            |                              |
|`U+0B0F`   | Letter           | VOWEL_INDEPENDENT | _null_                     | &#x0B0F; E                   |
| | | | |
|`U+0B10`   | Letter           | VOWEL_INDEPENDENT | _null_                     | &#x0B10; Ai                  |
|`U+0B11`   | _unassigned_     |                   |                            |                              |
|`U+0B12`   | _unassigned_     |                   |                            |                              |
|`U+0B13`   | Letter           | VOWEL_INDEPENDENT | _null_                     | &#x0B13; O                   |
|`U+0B14`   | Letter           | VOWEL_INDEPENDENT | _null_                     | &#x0B14; Au                  |
|`U+0B15`   | Letter           | CONSONANT         | _null_                     | &#x0B15; Ka                  |
|`U+0B16`   | Letter           | CONSONANT         | _null_                     | &#x0B16; Kha                 |
|`U+0B17`   | Letter           | CONSONANT         | _null_                     | &#x0B17; Ga                  |
|`U+0B18`   | Letter           | CONSONANT         | _null_                     | &#x0B18; Gha                 |
|`U+0B19`   | Letter           | CONSONANT         | _null_                     | &#x0B19; Nga                 |
|`U+0B1A`   | Letter           | CONSONANT         | _null_                     | &#x0B1A; Ca                  |
|`U+0B1B`   | Letter           | CONSONANT         | _null_                     | &#x0B1B; Cha                 |
|`U+0B1C`   | Letter           | CONSONANT         | _null_                     | &#x0B1C; Ja                  |
|`U+0B1D`   | Letter           | CONSONANT         | _null_                     | &#x0B1D; Jha                 |
|`U+0B1E`   | Letter           | CONSONANT         | _null_                     | &#x0B1E; Nya                 |
|`U+0B1F`   | Letter           | CONSONANT         | _null_                     | &#x0B1F; Tta                 |
| | | | |
|`U+0B20`   | Letter           | CONSONANT         | _null_                     | &#x0B20; Ttha                |
|`U+0B21`   | Letter           | CONSONANT         | _null_                     | &#x0B21; Dda                 |
|`U+0B22`   | Letter           | CONSONANT         | _null_                     | &#x0B22; Ddha                |
|`U+0B23`   | Letter           | CONSONANT         | _null_                     | &#x0B23; Nna                 |
|`U+0B24`   | Letter           | CONSONANT         | _null_                     | &#x0B24; Ta                  |
|`U+0B25`   | Letter           | CONSONANT         | _null_                     | &#x0B25; Tha                 |
|`U+0B26`   | Letter           | CONSONANT         | _null_                     | &#x0B26; Da                  |
|`U+0B27`   | Letter           | CONSONANT         | _null_                     | &#x0B27; Dha                 |
|`U+0B28`   | Letter           | CONSONANT         | _null_                     | &#x0B28; Na                  |
|`U+0B29`   | _unassigned_     |                   |                            |                              |
|`U+0B2A`   | Letter           | CONSONANT         | _null_                     | &#x0B2A; Pa                  |
|`U+0B2B`   | Letter           | CONSONANT         | _null_                     | &#x0B2B; Pha                 |
|`U+0B2C`   | Letter           | CONSONANT         | _null_                     | &#x0B2C; Ba                  |
|`U+0B2D`   | Letter           | CONSONANT         | _null_                     | &#x0B2D; Bha                 |
|`U+0B2E`   | Letter           | CONSONANT         | _null_                     | &#x0B2E; Ma                  |
|`U+0B2F`   | Letter           | CONSONANT         | _null_                     | &#x0B2F; Ya                  |
| | | | |
|`U+0B30`   | Letter           | CONSONANT         | _null_                     | &#x0B30; Ra                  |
|`U+0B31`   | _unassigned_     |                   |                            |                              |
|`U+0B32`   | Letter           | CONSONANT         | _null_                     | &#x0B32; La                  |
|`U+0B33`   | Letter           | CONSONANT         | _null_                     | &#x0B33; Lla                 |
|`U+0B34`   | _unassigned_     |                   |                            |                              |
|`U+0B35`   | Letter           | CONSONANT         | _null_                     | &#x0B35; Va                  |
|`U+0B36`   | Letter           | CONSONANT         | _null_                     | &#x0B36; Sha                 |
|`U+0B37`   | Letter           | CONSONANT         | _null_                     | &#x0B37; Ssa                 |
|`U+0B38`   | Letter           | CONSONANT         | _null_                     | &#x0B38; Sa                  |
|`U+0B39`   | Letter           | CONSONANT         | _null_                     | &#x0B39; Ha                  |
|`U+0B3A`   | _unassigned_     |                   |                            |                              |
|`U+0B3B`   | _unassigned_     |                   |                            |                              |
|`U+0B3C`   | Mark [Mn]        | NUKTA             | BOTTOM_POSITION            | &#x0B3C; Nukta               |
|`U+0B3D`   | Letter           | AVAGRAHA          | _null_                     | &#x0B3D; Avagraha            |
|`U+0B3E`   | Mark [Mc]        | VOWEL_DEPENDENT   | RIGHT_POSITION             | &#x0B3E; Sign Aa             |
|`U+0B3F`   | Mark [Mc]        | VOWEL_DEPENDENT   | TOP_POSITION               | &#x0B3F; Sign I              |
| | | | |
|`U+0B40`   | Mark [Mc]        | VOWEL_DEPENDENT   | RIGHT_POSITION             | &#x0B40; Sign Ii             |
|`U+0B41`   | Mark [Mn]        | VOWEL_DEPENDENT   | BOTTOM_POSITION            | &#x0B41; Sign U              |
|`U+0B42`   | Mark [Mn]        | VOWEL_DEPENDENT   | BOTTOM_POSITION            | &#x0B42; Sign Uu             |
|`U+0B43`   | Mark [Mn]        | VOWEL_DEPENDENT   | BOTTOM_POSITION            | &#x0B43; Sign Vocalic R      |
|`U+0B44`   | Mark [Mn]        | VOWEL_DEPENDENT   | BOTTOM_POSITION            | &#x0B44; Sign Vocalic Rr     |
|`U+0B45`   | _unassigned_     |                   |                            |                              |
|`U+0B46`   | _unassigned_     |                   |                            |                              |
|`U+0B47`   | Mark [Mc]        | VOWEL_DEPENDENT   | LEFT_POSITION              | &#x0B47; Sign E              |
|`U+0B48`   | Mark [Mc]        | VOWEL_DEPENDENT   | TOP_AND_LEFT_POSITION      | &#x0B48; Sign Ai             |
|`U+0B49`   | _unassigned_     |                   |                            |                              |
|`U+0B4A`   | _unassigned_     |                   |                            |                              |
|`U+0B4B`   | Mark [Mc]        | VOWEL_DEPENDENT   | LEFT_AND_RIGHT_POSITION    | &#x0B4B; Sign O              |
|`U+0B4C`   | Mark [Mc]        | VOWEL_DEPENDENT   | TOP_LEFT_AND_RIGHT_POSITION| &#x0B4C; Sign Au             |
|`U+0B4D`   | Mark [Mn]        | VIRAMA            | BOTTOM_POSITION            | &#x0B4D; Virama              |
|`U+0B4E`   | _unassigned_     |                   |                            |                              |
|`U+0B4F`   | _unassigned_     |                   |                            |                              |
| | | | |
|`U+0B50`   | _unassigned_     |                   |                            |                              |
|`U+0B51`   | _unassigned_     |                   |                            |                              |
|`U+0B52`   | _unassigned_     |                   |                            |                              |
|`U+0B53`   | _unassigned_     |                   |                            |                              |
|`U+0B54`   | _unassigned_     |                   |                            |                              |
|`U+0B55`   | Mark [Mn]        | VOWEL_DEPENDENT   | TOP_POSITION               | &#x0B55; Sign Overline       |
|`U+0B56`   | Mark [Mn]        | VOWEL_DEPENDENT   | TOP_POSITION               | &#x0B56; Ai Length Mark      |
|`U+0B57`   | Mark [Mc]        | VOWEL_DEPENDENT   | TOP_AND_RIGHT_POSITION     | &#x0B57; Au Length Mark      |
|`U+0B58`   | _unassigned_     |                   |                            |                              |
|`U+0B59`   | _unassigned_     |                   |                            |                              |
|`U+0B5A`   | _unassigned_     |                   |                            |                              |
|`U+0B5B`   | _unassigned_     |                   |                            |                              |
|`U+0B5C`   | Letter           | CONSONANT         | _null_                     | &#x0B5C; Rra                 |
|`U+0B5D`   | Letter           | CONSONANT         | _null_                     | &#x0B5D; Rha                 |
|`U+0B5E`   | _unassigned_     |                   |                            |                              |
|`U+0B5F`   | Letter           | CONSONANT         | _null_                     | &#x0B5F; Yya                 |
| | | | |
|`U+0B60`   | Letter           | VOWEL_INDEPENDENT | _null_                     | &#x0B60; Vocalic Rr          |
|`U+0B61`   | Letter           | VOWEL_INDEPENDENT | _null_                     | &#x0B61; Vocalic Ll          |
|`U+0B62`   | Mark [Mn]        | VOWEL_DEPENDENT   | BOTTOM_POSITION            | &#x0B62; Sign Vocalic L      |
|`U+0B63`   | Mark [Mn]        | VOWEL_DEPENDENT   | BOTTOM_POSITION            | &#x0B63; Sign Vocalic Ll     |
|`U+0B64`   | _unassigned_     |                   |                            |                              |
|`U+0B65`   | _unassigned_     |                   |                            |                              |
|`U+0B66`   | Number           | NUMBER            | _null_                     | &#x0B66; Digit Zero          |
|`U+0B67`   | Number           | NUMBER            | _null_                     | &#x0B67; Digit One           |
|`U+0B68`   | Number           | NUMBER            | _null_                     | &#x0B68; Digit Two           |
|`U+0B69`   | Number           | NUMBER            | _null_                     | &#x0B69; Digit Three         |
|`U+0B6A`   | Number           | NUMBER            | _null_                     | &#x0B6A; Digit Four          |
|`U+0B6B`   | Number           | NUMBER            | _null_                     | &#x0B6B; Digit Five          |
|`U+0B6C`   | Number           | NUMBER            | _null_                     | &#x0B6C; Digit Six           |
|`U+0B6D`   | Number           | NUMBER            | _null_                     | &#x0B6D; Digit Seven         |
|`U+0B6E`   | Number           | NUMBER            | _null_                     | &#x0B6E; Digit Eight         |
|`U+0B6F`   | Number           | NUMBER            | _null_                     | &#x0B6F; Digit Nine          |
| | | | |
|`U+0B70`   | Symbol           | SYMBOL            | _null_                     | &#x0B70; Isshar              |
|`U+0B71`   | Letter           | CONSONANT         | _null_                     | &#x0B71; Wa                  |
|`U+0B72`   | Number           | NUMBER            | _null_                     | &#x0B72; Fraction 1/4        |
|`U+0B73`   | Number           | NUMBER            | _null_                     | &#x0B73; Fraction 1/2        |
|`U+0B74`   | Number           | NUMBER            | _null_                     | &#x0B74; Fraction 3/4        |
|`U+0B75`   | Number           | NUMBER            | _null_                     | &#x0B75; Fraction 1/16       |
|`U+0B76`   | Number           | NUMBER            | _null_                     | &#x0B76; Fraction 1/8        |
|`U+0B77`   | Number           | NUMBER            | _null_                     | &#x0B77; Fraction 3/16       |
|`U+0B78`   | _unassigned_     |                   |                            |                              |
|`U+0B79`   | _unassigned_     |                   |                            |                              |
|`U+0B7A`   | _unassigned_     |                   |                            |                              |
|`U+0B7B`   | _unassigned_     |                   |                            |                              |
|`U+0B7C`   | _unassigned_     |                   |                            |                              |
|`U+0B7D`   | _unassigned_     |                   |                            |                              |
|`U+0B7E`   | _unassigned_     |                   |                            |                              |
|`U+0B7F`   | _unassigned_     |                   |                            |                              |
:::


## Vedic Extensions character table ##

Sanskrit runs written in the Oriya script may also include
characters from the Vedic Extensions block. These characters should be
classified as follows.

> Note: See the [Vedic Extensions](../opentype-shaping-vedic-extensions.md) 
> document for additional information.


:::{table} Vedic Extensions character table

| Codepoint | Unicode category | Shaping class     | Mark-placement subclass    | Glyph                        |
|:----------|:-----------------|:------------------|:---------------------------|:-----------------------------|
|`U+1CD0`   | Mark [Mn]        | CANTILLATION      | TOP_POSITION               | &#x1CD0; Tone Karshana       |
|`U+1CD1`   | Mark [Mn]        | CANTILLATION      | TOP_POSITION               | &#x1CD1; Tone Shara          |
|`U+1CD2`   | Mark [Mn]        | CANTILLATION      | TOP_POSITION               | &#x1CD2; Tone Prenkha        |
|`U+1CD3`   | Punctuation      | _null_            | _null_                     | &#x1CD3; Sign Nihshvasa      |
|`U+1CD4`   | Mark [Mn]        | CANTILLATION      | OVERSTRUCK                 | &#x1CD4; Tone Midline Svarita |
|`U+1CD5`   | Mark [Mn]        | CANTILLATION      | BOTTOM_POSITION            | &#x1CD5; Tone Aggravated Independent Svarita |
|`U+1CD6`   | Mark [Mn]        | CANTILLATION      | BOTTOM_POSITION            | &#x1CD6; Tone Independent Svarita |
|`U+1CD7`   | Mark [Mn]        | CANTILLATION      | BOTTOM_POSITION            | &#x1CD7; Tone Kathaka Independent Svarita |
|`U+1CD8`   | Mark [Mn]        | CANTILLATION      | BOTTOM_POSITION            | &#x1CD8; Tone Candra Below   |
|`U+1CD9`   | Mark [Mn]        | CANTILLATION      | BOTTOM_POSITION            | &#x1CD9; Tone Kathaka Independent Svarita Schroeder |
|`U+1CDA`   | Mark [Mn]        | CANTILLATION      | TOP_POSITION               | &#x1CDA; Tone Double Svarita |
|`U+1CDB`   | Mark [Mn]        | CANTILLATION      | TOP_POSITION               | &#x1CDB; Tone Triple Svarita |
|`U+1CDC`   | Mark [Mn]        | CANTILLATION      | BOTTOM_POSITION            | &#x1CDC; Tone Kathaka Anudatta |
|`U+1CDD`   | Mark [Mn]        | CANTILLATION      | BOTTOM_POSITION            | &#x1CDD; Tone Dot Below      |
|`U+1CDE`   | Mark [Mn]        | CANTILLATION      | BOTTOM_POSITION            | &#x1CDE; Tone Two Dots Below |
|`U+1CDF`   | Mark [Mn]        | CANTILLATION      | BOTTOM_POSITION            | &#x1CDF; Tone Three Dots Below |
| | | | |																		
|`U+1CE0`   | Mark [Mn]        | CANTILLATION      | TOP_POSITION               | &#x1CE0; Tone Rigvedic Kashmiri Independent Svarita |
|`U+1CE1`   | Mark [Mc]        | CANTILLATION      | RIGHT_POSITION             | &#x1CE1; Tone Atharavedic Independent Svarita |
|`U+1CE2`   | Mark [Mn]        | AVAGRAHA          | OVERSTRUCK                 | &#x1CE2; Sign Visarga Svarita |
|`U+1CE3`   | Mark [Mn]        | _null_            | OVERSTRUCK                 | &#x1CE3; Sign Visarga Udatta |
|`U+1CE4`   | Mark [Mn]        | _null_            | OVERSTRUCK                 | &#x1CE4; Sign Reversed Visarga Udatta |
|`U+1CE5`   | Mark [Mn]        | _null_            | OVERSTRUCK                 | &#x1CE5; Sign Visarga Anudatta |
|`U+1CE6`   | Mark [Mn]        | _null_            | OVERSTRUCK                 | &#x1CE6; Sign Reversed Visarga Anudatta |
|`U+1CE7`   | Mark [Mn]        | _null_            | OVERSTRUCK                 | &#x1CE7; Sign Visarga Udatta With Tail |
|`U+1CE8`   | Mark [Mn]        | AVAGRAHA          | OVERSTRUCK                 | &#x1CE8; Sign Visarga Anudatta With Tail |
|`U+1CE9`   | Letter           | SYMBOL            | _null_                     | &#x1CE9; Sign Anusvara Antargomukha |
|`U+1CEA`   | Letter           | _null_            | _null_                     | &#x1CEA; Sign Anusvara Bahirgomukha |
|`U+1CEB`   | Letter           | _null_            | _null_                     | &#x1CEB; Sign Anusvara Vamagomukha |
|`U+1CEC`   | Letter           | SYMBOL            | _null_                     | &#x1CEC; Sign Anusvara Vamagomukha With Tail |
|`U+1CED`   | Mark [Mn]        | AVAGRAHA          | BOTTOM_POSITION            | &#x1CED; Sign Tiryak         |
|`U+1CEE`   | Letter           | SYMBOL            | _null_                     | &#x1CEE; Sign Hexiform Long Anusvara |
|`U+1CEF`   | Letter           | _null_            | _null_                     | &#x1CEF; Sign Long Anusvara  |
| | | | |																		
|`U+1CF0`   | Letter           | _null_            | _null_                     | &#x1CF0; Sign Rthang Long Anusvara |
|`U+1CF2`   | Letter           | CONSONANT_DEAD    | _null_                     | &#x1CF2; Sign Ardhavisarga   |
|`U+1CF3`   | Letter           | CONSONANT_DEAD    | _null_                     | &#x1CF3; Sign Rotated Ardhavisarga |
|`U+1CF3`   | Mark [Mc]        | VISARGA           | _null_                     | &#x1CF3; Sign Rotated Ardhavisarga |
|`U+1CF4`   | Mark [Mn]        | CANTILLATION      | TOP_POSITION               | &#x1CF4; Tone Candra Above   |
|`U+1CF5`   | Letter           | CONSONANT_WITH_STACKER | _null_                | &#x1CF5; Sign Jihvamuliya    |
|`U+1CF6`   | Letter           | CONSONANT_WITH_STACKER | _null_                | &#x1CF6; Sign Upadhmaniya    |
|`U+1CF7`   | Mark [Mc]        | _null_            | _null_                     | &#x1CF7; Sign Atikrama       |
|`U+1CF8`   | Mark [Mn]        | CANTILLATION      | _null_                     | &#x1CF8; Tone Ring Above     |
|`U+1CF9`   | Mark [Mn]        | CANTILLATION      | _null_                     | &#x1CF9; Tone Double Ring Above |
|`U+1CFA`   | Letter           | PLACEHOLDER       | _null_                     | &#x1CFA; Sign Double Anusvara Antargomukha |
|`U+1CFB`   | _unassigned_     |                   |                            |                              |
|`U+1CFC`   | _unassigned_     |                   |                            |                              |
|`U+1CFD`   | _unassigned_     |                   |                            |                              |
|`U+1CFE`   | _unassigned_     |                   |                            |                              |
|`U+1CFF`   | _unassigned_     |                   |                            |                              |
:::


## Miscellaneous character table ##

In addition to general punctuation, runs of Oriya text often use the
danda (`U+0964`) and double danda (`U+0965`) punctuation marks from
the Devanagari block. Oriya text can also incorporate the udatta
(`U+0951`) and anudatta (`U+0952`) signs from the Devanagari block.


:::{table} Additional punctuation character table

| Codepoint | Unicode category | Shaping class     | Mark-placement subclass    | Glyph                          |
|:----------|:-----------------|:------------------|:---------------------------|:-------------------------------|
|`U+0951`   | Mark [Mn]        | CANTILLATION      | TOP_POSITION               | &#x0951; Udatta              |
|`U+0952`   | Mark [Mn]        | CANTILLATION      | BOTTOM_POSITION            | &#x0952; Anudatta            |
|`U+0964`   | Punctuation      | _null_            | _null_                     | &#x0964; Danda               |
|`U+0965`   | Punctuation      | _null_            | _null_                     | &#x0965; Double Danda        |
:::


Other important characters that may be encountered when shaping runs
of Oriya text include the dotted-circle placeholder (`U+25CC`), the
zero-width joiner (`U+200D`) and zero-width non-joiner (`U+200C`), and
the no-break space (`U+00A0`).

The dotted-circle placeholder is frequently used when displaying a
dependent vowel (matra) or a combining mark in isolation. Real-world
text syllables may also use other characters, such as hyphens or dashes,
in a similar placeholder fashion; shaping engines should cope with
this situation gracefully.


:::{table} Miscellaneous character table

| Codepoint | Unicode category | Shaping class     | Mark-placement subclass    | Glyph                          |
|:----------|:-----------------|:------------------|:---------------------------|:-------------------------------|
|`U+00A0`   | Separator        | PLACEHOLDER       | _null_                     | &#x00A0; No-break space        |
|`U+200C`   | Other            | NON_JOINER        | _null_                     | &#x200C; Zero-width non-joiner |
|`U+200D`   | Other            | JOINER            | _null_                     | &#x200D; Zero-width joiner     |
|`U+2010`   | Punctuation      | PLACEHOLDER       | _null_                     | &#x2010; Hyphen                |
|`U+2011`   | Punctuation      | PLACEHOLDER       | _null_                     | &#x2011; No-break hyphen       |
|`U+2012`   | Punctuation      | PLACEHOLDER       | _null_                     | &#x2012; Figure dash           |
|`U+2013`   | Punctuation      | PLACEHOLDER       | _null_                     | &#x2013; En dash               |
|`U+2014`   | Punctuation      | PLACEHOLDER       | _null_                     | &#x2014; Em dash               |
|`U+25CC`   | Symbol           | DOTTED_CIRCLE     | _null_                     | &#x25CC; Dotted circle         |
:::


The zero-width joiner (<abbr>ZWJ</abbr>) is primarily used to prevent the formation
of a conjunct from a "_Consonant_,Halant,_Consonant_" sequence. The
sequence "_Consonant_,Halant,ZWJ,_Consonant_" blocks the formation of
a conjunct between the two consonants. 

Note, however, that the "_Consonant_,Halant" subsequence in the above
example may still trigger a half-forms feature. To prevent the
application of the half-forms feature in addition to preventing the
conjunct, the zero-width non-joiner (<abbr>ZWNJ</abbr>) must be used instead. The
sequence "_Consonant_,Halant,ZWNJ,_Consonant_" should produce the
first consonant in its standard form, followed by an explicit
"Halant".

A secondary usage of the zero-width joiner is to prevent the formation of
"Reph". An initial "Ra,Halant,ZWJ" sequence should not produce a "Reph",
where an initial "Ra,Halant" sequence without the zero-width joiner
otherwise would.

The no-break space (<abbr>NBSP</abbr>) is primarily used to display those
codepoints that are defined as non-spacing (marks, dependent vowels
(matras), below-base consonant forms, and post-base consonant forms)
in an isolated context, as an alternative to displaying them
superimposed on the dotted-circle placeholder. These sequences will
match "NBSP,ZWJ,Halant,_Consonant_", "NBSP,_mark_", or "NBSP,_matra_".


================================================
FILE: character-tables/character-tables-sinhala.md
================================================
# Sinhala character tables #

This document lists the per-character shaping information needed to
[shape Sinhala text](../opentype-shaping-sinhala.md).

**Contents**

  - [Sinhala character table](#sinhala-character-table)
  - [Sinhala Archaic Numbers character table](#sinhala-archaic-numbers-character-table)
  - [Vedic Extensions character table](#vedic-extensions-character-table)
  - [Miscellaneous character table](#miscellaneous-character-table)
	  

## Sinhala character table ##

Sinhala glyphs should be classified as in the following
table. Codepoints in the Sinhala block with no assigned meaning are
designated as _unassigned_ in the _Unicode category_ column. 

Assigned codepoints with a _null_ in the _Shaping class_
column evoke no special behavior from the shaping engine. Note that
this does include some valid codepoints, such as currency marks,
punctuation, and other symbols.

> Note: the `NUMBER` and `SYMBOL` _Shaping classes_ are important
> during syllable identification, but generally evoke no further
> special behavior during the rest of the shaping process. 

The _Mark-placement subclass_ column indicates mark-placement
positioning for codepoints in the _Mark_ category. Assigned, non-mark
codepoints have a _null_ in this column and evoke no special
mark-placement behavior. Marks tagged with [Mn] in the _Unicode
category_ column are categorized as non-spacing; marks tagged with
[Mc] are categorized as spacing-combining.

Some codepoints in the following table use a _Shaping class_ that
differs from the codepoint's Unicode _General Category_. The _Shaping
class_ takes precedence during OpenType shaping, as it captures more
specific, script-aware behavior.


:::{table} Sinhala character table

| Codepoint | Unicode category | Shaping class     | Mark-placement subclass    | Glyph                        |
|:----------|:-----------------|:------------------|:---------------------------|:-----------------------------|
|`U+0D80`   | _unassigned_     |                   |                            |                              |
|`U+0D81`   | Mark [Mn]  _     | BINDU             | TOP_POSITION               | &#x0D81; Candrabindu         |
|`U+0D82`   | Mark [Mc]        | BINDU             | RIGHT_POSITION             | &#x0D82; Anusvara            |
|`U+0D83`   | Mark [Mc]        | VISARGA           | RIGHT_POSITION             | &#x0D83; Visarga             |
|`U+0D84`   | _unassigned_     |                   |                            |                              |
|`U+0D85`   | Letter           | VOWEL_INDEPENDENT | _null_                     | &#x0D85; A                   |
|`U+0D86`   | Letter           | VOWEL_INDEPENDENT | _null_                     | &#x0D86; Aa                  |
|`U+0D87`   | Letter           | VOWEL_INDEPENDENT | _null_                     | &#x0D87; Ae                  |
|`U+0D88`   | Letter           | VOWEL_INDEPENDENT | _null_                     | &#x0D88; Aae                 |
|`U+0D89`   | Letter           | VOWEL_INDEPENDENT | _null_                     | &#x0D89; I                   |
|`U+0D8A`   | Letter           | VOWEL_INDEPENDENT | _null_                     | &#x0D8A; Ii                  |
|`U+0D8B`   | Letter           | VOWEL_INDEPENDENT | _null_                     | &#x0D8B; U                   |
|`U+0D8C`   | Letter           | VOWEL_INDEPENDENT | _null_                     | &#x0D8C; Uu                  |
|`U+0D8D`   | Letter           | VOWEL_INDEPENDENT | _null_                     | &#x0D8D; Vocalic R           |
|`U+0D8E`   | Letter           | VOWEL_INDEPENDENT | _null_                     | &#x0D8E; Vocalic Rr          |
|`U+0D8F`   | Letter           | VOWEL_INDEPENDENT | _null_                     | &#x0D8F; Vocalic L           |
| | | | |																		
|`U+0D90`   | Letter           | VOWEL_INDEPENDENT | _null_                     | &#x0D90; Vocalic Ll          |
|`U+0D91`   | Letter           | VOWEL_INDEPENDENT | _null_                     | &#x0D91; E                   |
|`U+0D92`   | Letter           | VOWEL_INDEPENDENT | _null_                     | &#x0D92; Ee                  |
|`U+0D93`   | Letter           | VOWEL_INDEPENDENT | _null_                     | &#x0D93; Ai                  |
|`U+0D94`   | Letter           | VOWEL_INDEPENDENT | _null_                     | &#x0D94; O                   |
|`U+0D95`   | Letter           | VOWEL_INDEPENDENT | _null_                     | &#x0D95; Oo                  |
|`U+0D96`   | Letter           | VOWEL_INDEPENDENT | _null_                     | &#x0D96; Au                  |
|`U+0D97`   | _unassigned_     |                   |                            |                              |
|`U+0D98`   | _unassigned_     |                   |                            |                              |
|`U+0D99`   | _unassigned_     |                   |                            |                              |
|`U+0D9A`   | Letter           | CONSONANT         | _null_                     | &#x0D9A; Ka                  |
|`U+0D9B`   | Letter           | CONSONANT         | _null_                     | &#x0D9B; Kha                 |
|`U+0D9C`   | Letter           | CONSONANT         | _null_                     | &#x0D9C; Ga                  |
|`U+0D9D`   | Letter           | CONSONANT         | _null_                     | &#x0D9D; Gha                 |
|`U+0D9E`   | Letter           | CONSONANT         | _null_                     | &#x0D9E; Nga                 |
|`U+0D9F`   | Letter           | CONSONANT         | _null_                     | &#x0D9F; Nnga                |
| | | | |																		
|`U+0DA0`   | Letter           | CONSONANT         | _null_                     | &#x0DA0; Ca                  |
|`U+0DA1`   | Letter           | CONSONANT         | _null_                     | &#x0DA1; Cha                 |
|`U+0DA2`   | Letter           | CONSONANT         | _null_                     | &#x0DA2; Ja                  |
|`U+0DA3`   | Letter           | CONSONANT         | _null_                     | &#x0DA3; Jha                 |
|`U+0DA4`   | Letter           | CONSONANT         | _null_                     | &#x0DA4; Nya                 |
|`U+0DA5`   | Letter           | CONSONANT         | _null_                     | &#x0DA5; Jnya                |
|`U+0DA6`   | Letter           | CONSONANT         | _null_                     | &#x0DA6; Nyja                |
|`U+0DA7`   | Letter           | CONSONANT         | _null_                     | &#x0DA7; Tta                 |
|`U+0DA8`   | Letter           | CONSONANT         | _null_                     | &#x0DA8; Ttha                |
|`U+0DA9`   | Letter           | CONSONANT         | _null_                     | &#x0DA9; Dda                 |
|`U+0DAA`   | Letter           | CONSONANT         | _null_                     | &#x0DAA; Ddha                |
|`U+0DAB`   | Letter           | CONSONANT         | _null_                     | &#x0DAB; Nna                 |
|`U+0DAC`   | Letter           | CONSONANT         | _null_                     | &#x0DAC; Nndda               |
|`U+0DAD`   | Letter           | CONSONANT         | _null_                     | &#x0DAD; Ta                  |
|`U+0DAE`   | Letter           | CONSONANT         | _null_                     | &#x0DAE; Tha                 |
|`U+0DAF`   | Letter           | CONSONANT         | _null_                     | &#x0DAF; Da                  |
| | | | |																		
|`U+0DB0`   | Letter           | CONSONANT         | _null_                     | &#x0DB0; Dha                 |
|`U+0DB1`   | Letter           | CONSONANT         | _null_                     | &#x0DB1; Na                  |
|`U+0DB2`   | _unassigned_     |                   |                            |                              |
|`U+0DB3`   | Letter           | CONSONANT         | _null_                     | &#x0DB3; Nda                 |
|`U+0DB4`   | Letter           | CONSONANT         | _null_                     | &#x0DB4; Pa                  |
|`U+0DB5`   | Letter           | CONSONANT         | _null_                     | &#x0DB5; Pha                 |
|`U+0DB6`   | Letter           | CONSONANT         | _null_                     | &#x0DB6; Ba                  |
|`U+0DB7`   | Letter           | CONSONANT         | _null_                     | &#x0DB7; Bha                 |
|`U+0DB8`   | Letter           | CONSONANT         | _null_                     | &#x0DB8; Ma                  |
|`U+0DB9`   | Letter           | CONSONANT         | _null_                     | &#x0DB9; Mba                 |
|`U+0DBA`   | Letter           | CONSONANT         | _null_                     | &#x0DBA; Ya                  |
|`U+0DBB`   | Letter           | CONSONANT         | _null_                     | &#x0DBB; Ra                  |
|`U+0DBC`   | _unassigned_     |                   |                            |                              |
|`U+0DBD`   | Letter           | CONSONANT         | _null_                     | &#x0DBD; La                  |
|`U+0DBE`   | _unassigned_     |                   |                            |                              |
|`U+0DBF`   | _unassigned_     |                   |                            |                              |
| | | | |																		
|`U+0DC0`   | Letter           | CONSONANT         | _null_                     | &#x0DC0; Va                  |
|`U+0DC1`   | Letter           | CONSONANT         | _null_                     | &#x0DC1; Sha                 |
|`U+0DC2`   | Letter           | CONSONANT         | _null_                     | &#x0DC2; Ssa                 |
|`U+0DC3`   | Letter           | CONSONANT         | _null_                     | &#x0DC3; Sa                  |
|`U+0DC4`   | Letter           | CONSONANT         | _null_                     | &#x0DC4; Ha                  |
|`U+0DC5`   | Letter           | CONSONANT         | _null_                     | &#x0DC5; Lla                 |
|`U+0DC6`   | Letter           | CONSONANT         | _null_                     | &#x0DC6; Fa                  |
|`U+0DC7`   | _unassigned_     |                   |                            |                              |
|`U+0DC8`   | _unassigned_     |                   |                            |                              |
|`U+0DC9`   | _unassigned_     |                   |                            |                              |
|`U+0DCA`   | Mark [MN]        | VIRAMA            | TOP_POSITION               | &#x0DCA; Virama              |
|`U+0DCB`   | _unassigned_     |                   |                            |                              |
|`U+0DCC`   | _unassigned_     |                   |                            |                              |
|`U+0DCD`   | _unassigned_     |                   |                            |                              |
|`U+0DCE`   | _unassigned_     |                   |                            |                              |
|`U+0DCF`   | Mark [Mc]        | VOWEL_DEPENDENT   | RIGHT_POSITION             | &#x0DCF; Sign Aa             |
| | | | |																	 	
|`U+0DD0`   | Mark [Mc]        | VOWEL_DEPENDENT   | RIGHT_POSITION             | &#x0DD0; Sign Ae             |
|`U+0DD1`   | Mark [Mc]        | VOWEL_DEPENDENT   | RIGHT_POSITION             | &#x0DD1; Sign Aae            |
|`U+0DD2`   | Mark [Mn]        | VOWEL_DEPENDENT   | TOP_POSITION               | &#x0DD2; Sign I              |
|`U+0DD3`   | Mark [Mn]        | VOWEL_DEPENDENT   | TOP_POSITION               | &#x0DD3; Sign Ii             |
|`U+0DD4`   | Mark [Mn]        | VOWEL_DEPENDENT   | BOTTOM_POSITION            | &#x0DD4; Sign U              |
|`U+0DD5`   | _unassigned_     |                   |                            |                              |
|`U+0DD6`   | Mark [Mn]        | VOWEL_DEPENDENT   | BOTTOM_POSITION            | &#x0DD6; Sign Uu             |
|`U+0DD7`   | _unassigned_     |                   |                            |                              |
|`U+0DD8`   | Mark [Mc]        | VOWEL_DEPENDENT   | RIGHT_POSITION             | &#x0DD8; Sign Vocalic R      |
|`U+0DD9`   | Mark [Mc]        | VOWEL_DEPENDENT   | LEFT_POSITION              | &#x0DD9; Sign E              |
|`U+0DDA`   | Mark [Mc]        | VOWEL_DEPENDENT   | TOP_AND_LEFT_POSITION      | &#x0DDA; Sign Ee             |
|`U+0DDB`   | Mark [Mc]        | VOWEL_DEPENDENT   | LEFT_POSITION              | &#x0DDB; Sign Ai             |
|`U+0DDC`   | Mark [Mc]        | VOWEL_DEPENDENT   | LEFT_AND_RIGHT_POSITION    | &#x0DDC; Sign O              |
|`U+0DDD`   | Mark [Mc]        | VOWEL_DEPENDENT   | TOP_LEFT_AND_RIGHT_POSITION| &#x0DDD; Sign Oo             |
|`U+0DDE`   | Mark [Mc]        | VOWEL_DEPENDENT   | LEFT_AND_RIGHT_POSITION    | &#x0DDE; Sign Au             |
|`U+0DDF`   | Mark [Mc]        | VOWEL_DEPENDENT   | RIGHT_POSITION             | &#x0DDF; Sign Vocalic L      |
| | | | |																		
|`U+0DE0`   | _unassigned_     |                   |                            |                              |
|`U+0DE1`   | _unassigned_     |                   |                            |                              |
|`U+0DE2`   | _unassigned_     |                   |                            |                              |
|`U+0DE3`   | _unassigned_     |                   |                            |                              |
|`U+0DE4`   | _unassigned_     |                   |                            |                              |
|`U+0DE5`   | _unassigned_     |                   |                            |                              |
|`U+0DE6`   | Number           | NUMBER            | _null_                     | &#x0DE6; Digit Zero          |
|`U+0DE7`   | Number           | NUMBER            | _null_                     | &#x0DE7; Digit One           |
|`U+0DE8`   | Number           | NUMBER            | _null_                     | &#x0DE8; Digit Two           |
|`U+0DE9`   | Number           | NUMBER            | _null_                     | &#x0DE9; Digit Three         |
|`U+0DEA`   | Number           | NUMBER            | _null_                     | &#x0DEA; Digit Four          |
|`U+0DEB`   | Number           | NUMBER            | _null_                     | &#x0DEB; Digit Five          |
|`U+0DEC`   | Number           | NUMBER            | _null_                     | &#x0DEC; Digit Six           |
|`U+0DED`   | Number           | NUMBER            | _null_                     | &#x0DED; Digit Seven         |
|`U+0DEE`   | Number           | NUMBER            | _null_                     | &#x0DEE; Digit Eight         |
|`U+0DEF`   | Number           | NUMBER            | _null_                     | &#x0DEF; Digit Nine          |
| | | | |																		
|`U+0DF0`   | _unassigned_     |                   |                            |                              |
|`U+0DF1`   | _unassigned_     |                   |                            |                              |
|`U+0DF2`   | Mark [Mc]        | VOWEL_DEPENDENT   | RIGHT_POSITION             | &#x0DF2; Sign Vocalic Rr     |
|`U+0DF3`   | Mark [Mc]        | VOWEL_DEPENDENT   | RIGHT_POSITION             | &#x0DF3; Sign Vocalic Ll     |
|`U+0DF4`   | Punctuation      | _null_            | _null_                     | &#x0DF4; Kunddaliya          |
|`U+0DF5`   | _unassigned_     |                   |                            |                              |
|`U+0DF6`   | _unassigned_     |                   |                            |                              |
|`U+0DF7`   | _unassigned_     |                   |                            |                              |
|`U+0DF8`   | _unassigned_     |                   |                            |                              |
|`U+0DF9`   | _unassigned_     |                   |                            |                              |
|`U+0DFA`   | _unassigned_     |                   |                            |                              |
|`U+0DFB`   | _unassigned_     |                   |                            |                              |
|`U+0DFC`   | _unassigned_     |                   |                            |                              |
|`U+0DFD`   | _unassigned_     |                   |                            |                              |
|`U+0DFE`   | _unassigned_     |                   |                            |                              |
|`U+0DFF`   | _unassigned_     |                   |                            |                              |
:::


## Sinhala Archaic Numbers character table ##

Sinhala text runs may also include glyphs from the Sinhala Archaic
Numbers block. These characters should be classified as follows.


:::{table} Sinhala Archaic Numbers character table

| Codepoint | Unicode category | Shaping class     | Mark-placement subclass    | Glyph                        |
|:----------|:-----------------|:------------------|:---------------------------|:-----------------------------|
|`U+111E0`  | _unassigned_     |                   |                            |                              |
|`U+111E1`  | Number           | NUMBER            | _null_                     | &#x111E1; Archaic Digit One  |
|`U+111E2`  | Number           | NUMBER            | _null_                     | &#x111E2; Archaic Digit Two  |
|`U+111E3`  | Number           | NUMBER            | _null_                     | &#x111E3; Archaic Digit Three|
|`U+111E4`  | Number           | NUMBER            | _null_                     | &#x111E4; Archaic Digit Four |
|`U+111E5`  | Number           | NUMBER            | _null_                     | &#x111E5; Archaic Digit Five |
|`U+111E6`  | Number           | NUMBER            | _null_                     | &#x111E6; Archaic Digit Six  |
|`U+111E7`  | Number           | NUMBER            | _null_                     | &#x111E7; Archaic Digit Seven|
|`U+111E8`  | Number           | NUMBER            | _null_                     | &#x111E8; Archaic Digit Eight|
|`U+111E9`  | Number           | NUMBER            | _null_                     | &#x111E9; Archaic Digit Nine |
|`U+111EA`  | Number           | NUMBER            | _null_                     | &#x111EA; Archaic Number Ten |
|`U+111EB`  | Number           | NUMBER            | _null_                     | &#x111EB; Archaic Number 20  |
|`U+111EC`  | Number           | NUMBER            | _null_                     | &#x111EC; Archaic Number 30  |
|`U+111ED`  | Number           | NUMBER            | _null_                     | &#x111ED; Archaic Number 40  |
|`U+111EE`  | Number           | NUMBER            | _null_                     | &#x111EE; Archaic Number 50  |
|`U+111EF`  | Number           | NUMBER            | _null_                     | &#x111EF; Archaic Number 60  |
| | | | |																		
|`U+111F0`  | Number           | NUMBER            | _null_                     | &#x111F0; Archaic Number 70  |
|`U+111F1`  | Number           | NUMBER            | _null_                     | &#x111F1; Archaic Number 80  |
|`U+111F2`  | Number           | NUMBER            | _null_                     | &#x111F2; Archaic Number 90  |
|`U+111F3`  | Number           | NUMBER            | _null_                     | &#x111F3; Archaic Number 100 |
|`U+111F4`  | Number           | NUMBER            | _null_                     | &#x111F4; Archaic Number 1000|
|`U+111F5`  | _unassigned_     |                   |                            |                              |
|`U+111F6`  | _unassigned_     |                   |                            |                              |
|`U+111F7`  | _unassigned_     |                   |                            |                              |
|`U+111F8`  | _unassigned_     |                   |                            |                              |
|`U+111F9`  | _unassigned_     |                   |                            |                              |
|`U+111FA`  | _unassigned_     |                   |                            |                              |
|`U+111FB`  | _unassigned_     |                   |                            |                              |
|`U+111FC`  | _unassigned_     |                   |                            |                              |
|`U+111FD`  | _unassigned_     |                   |                            |                              |
|`U+111FE`  | _unassigned_     |                   |                            |                              |
|`U+111FF`  | _unassigned_     |                   |                            |                              |
:::


## Vedic Extensions character table ##

Sanskrit runs written in the Sinhala script may also include
characters from the Vedic Extensions block. These characters should be
classified as follows.

> Note: See the [Vedic Extensions](../opentype-shaping-vedic-extensions.md) 
> document for additional information.


:::{table} Vedic Extensions character table

| Codepoint | Unicode category | Shaping class     | Mark-placement subclass    | Glyph                        |
|:----------|:-----------------|:------------------|:---------------------------|:-----------------------------|
|`U+1CD0`   | Mark [Mn]        | CANTILLATION      | TOP_POSITION               | &#x1CD0; Tone Karshana       |
|`U+1CD1`   | Mark [Mn]        | CANTILLATION      | TOP_POSITION               | &#x1CD1; Tone Shara          |
|`U+1CD2`   | Mark [Mn]        | CANTILLATION      | TOP_POSITION               | &#x1CD2; Tone Prenkha        |
|`U+1CD3`   | Punctuation      | _null_            | _null_                     | &#x1CD3; Sign Nihshvasa      |
|`U+1CD4`   | Mark [Mn]        | CANTILLATION      | OVERSTRUCK                 | &#x1CD4; Tone Midline Svarita |
|`U+1CD5`   | Mark [Mn]        | CANTILLATION      | BOTTOM_POSITION            | &#x1CD5; Tone Aggravated Independent Svarita |
|`U+1CD6`   | Mark [Mn]        | CANTILLATION      | BOTTOM_POSITION            | &#x1CD6; Tone Independent Svarita |
|`U+1CD7`   | Mark [Mn]        | CANTILLATION      | BOTTOM_POSITION            | &#x1CD7; Tone Kathaka Independent Svarita |
|`U+1CD8`   | Mark [Mn]        | CANTILLATION      | BOTTOM_POSITION            | &#x1CD8; Tone Candra Below   |
|`U+1CD9`   | Mark [Mn]        | CANTILLATION      | BOTTOM_POSITION            | &#x1CD9; Tone Kathaka Independent Svarita Schroeder |
|`U+1CDA`   | Mark [Mn]        | CANTILLATION      | TOP_POSITION               | &#x1CDA; Tone Double Svarita |
|`U+1CDB`   | Mark [Mn]        | CANTILLATION      | TOP_POSITION               | &#x1CDB; Tone Triple Svarita |
|`U+1CDC`   | Mark [Mn]        | CANTILLATION      | BOTTOM_POSITION            | &#x1CDC; Tone Kathaka Anudatta |
|`U+1CDD`   | Mark [Mn]        | CANTILLATION      | BOTTOM_POSITION            | &#x1CDD; Tone Dot Below      |
|`U+1CDE`   | Mark [Mn]        | CANTILLATION      | BOTTOM_POSITION            | &#x1CDE; Tone Two Dots Below |
|`U+1CDF`   | Mark [Mn]        | CANTILLATION      | BOTTOM_POSITION            | &#x1CDF; Tone Three Dots Below |
| | | | |																		
|`U+1CE0`   | Mark [Mn]        | CANTILLATION      | TOP_POSITION               | &#x1CE0; Tone Rigvedic Kashmiri Independent Svarita |
|`U+1CE1`   | Mark [Mc]        | CANTILLATION      | RIGHT_POSITION             | &#x1CE1; Tone Atharavedic Independent Svarita |
|`U+1CE2`   | Mark [Mn]        | AVAGRAHA          | OVERSTRUCK                 | &#x1CE2; Sign Visarga Svarita |
|`U+1CE3`   | Mark [Mn]        | _null_            | OVERSTRUCK                 | &#x1CE3; Sign Visarga Udatta |
|`U+1CE4`   | Mark [Mn]        | _null_            | OVERSTRUCK                 | &#x1CE4; Sign Reversed Visarga Udatta |
|`U+1CE5`   | Mark [Mn]        | _null_            | OVERSTRUCK                 | &#x1CE5; Sign Visarga Anudatta |
|`U+1CE6`   | Mark [Mn]        | _null_            | OVERSTRUCK                 | &#x1CE6; Sign Reversed Visarga Anudatta |
|`U+1CE7`   | Mark [Mn]        | _null_            | OVERSTRUCK                 | &#x1CE7; Sign Visarga Udatta With Tail |
|`U+1CE8`   | Mark [Mn]        | AVAGRAHA          | OVERSTRUCK                 | &#x1CE8; Sign Visarga Anudatta With Tail |
|`U+1CE9`   | Letter           | SYMBOL            | _null_                     | &#x1CE9; Sign Anusvara Antargomukha |
|`U+1CEA`   | Letter           | _null_            | _null_                     | &#x1CEA; Sign Anusvara Bahirgomukha |
|`U+1CEB`   | Letter           | _null_            | _null_                     | &#x1CEB; Sign Anusvara Vamagomukha |
|`U+1CEC`   | Letter           | SYMBOL            | _null_                     | &#x1CEC; Sign Anusvara Vamagomukha With Tail |
|`U+1CED`   | Mark [Mn]        | AVAGRAHA          | BOTTOM_POSITION            | &#x1CED; Sign Tiryak         |
|`U+1CEE`   | Letter           | SYMBOL            | _null_                     | &#x1CEE; Sign Hexiform Long Anusvara |
|`U+1CEF`   | Letter           | _null_            | _null_                     | &#x1CEF; Sign Long Anusvara  |
| | | | |																		
|`U+1CF0`   | Letter           | _null_            | _null_                     | &#x1CF0; Sign Rthang Long Anusvara |
|`U+1CF2`   | Letter           | CONSONANT_DEAD    | _null_                     | &#x1CF2; Sign Ardhavisarga   |
|`U+1CF3`   | Letter           | CONSONANT_DEAD    | _null_                     | &#x1CF3; Sign Rotated Ardhavisarga |
|`U+1CF3`   | Mark [Mc]        | VISARGA           | _null_                     | &#x1CF3; Sign Rotated Ardhavisarga |
|`U+1CF4`   | Mark [Mn]        | CANTILLATION      | TOP_POSITION               | &#x1CF4; Tone Candra Above   |
|`U+1CF5`   | Letter           | CONSONANT_WITH_STACKER | _null_                | &#x1CF5; Sign Jihvamuliya    |
|`U+1CF6`   | Letter           | CONSONANT_WITH_STACKER | _null_                | &#x1CF6; Sign Upadhmaniya    |
|`U+1CF7`   | Mark [Mc]        | _null_            | _null_                     | &#x1CF7; Sign Atikrama       |
|`U+1CF8`   | Mark [Mn]        | CANTILLATION      | _null_                     | &#x1CF8; Tone Ring Above     |
|`U+1CF9`   | Mark [Mn]        | CANTILLATION      | _null_                     | &#x1CF9; Tone Double Ring Above |
|`U+1CFA`   | Letter           | PLACEHOLDER       | _null_                     | &#x1CFA; Sign Double Anusvara Antargomukha |
|`U+1CFB`   | _unassigned_     |                   |                            |                              |
|`U+1CFC`   | _unassigned_     |                   |                            |                              |
|`U+1CFD`   | _unassigned_     |                   |                            |                              |
|`U+1CFE`   | _unassigned_     |                   |                            |                              |
|`U+1CFF`   | _unassigned_     |                   |                            |                              |
:::


## Miscellaneous character table ##

Other important characters that may be encountered when shaping runs
of Sinhala text include the dotted-circle placeholder (`U+25CC`), the
zero-width joiner (`U+200D`) and zero-width non-joiner (`U+200C`), and
the no-break space (`U+00A0`).

The dotted-circle placeholder is frequently used when displaying a
dependent vowel (matra) or a combining mark in isolation. Real-world
text syllables may also use other characters, such as hyphens or dashes,
in a similar placeholder fashion; shaping engines should cope with
this situation gracefully.


:::{table} Miscellaneous character table

| Codepoint | Unicode category | Shaping class     | Mark-placement subclass    | Glyph                          |
|:----------|:-----------------|:------------------|:---------------------------|:-------------------------------|
|`U+00A0`   | Separator        | PLACEHOLDER       | _null_                     | &#x00A0; No-break space        |
|`U+200C`   | Other            | NON_JOINER        | _null_                     | &#x200C; Zero-width non-joiner |
|`U+200D`   | Other            | JOINER            | _null_                     | &#x200D; Zero-width joiner     |
|`U+2010`   | Punctuation      | PLACEHOLDER       | _null_                     | &#x2010; Hyphen                |
|`U+2011`   | Punctuation      | PLACEHOLDER       | _null_                     | &#x2011; No-break hyphen       |
|`U+2012`   | Punctuation      | PLACEHOLDER       | _null_                     | &#x2012; Figure dash           |
|`U+2013`   | Punctuation      | PLACEHOLDER       | _null_                     | &#x2013; En dash               |
|`U+2014`   | Punctuation      | PLACEHOLDER       | _null_                     | &#x2014; Em dash               |
|`U+25CC`   | Symbol           | DOTTED_CIRCLE     | _null_                     | &#x25CC; Dotted circle         |
:::


The zero-width joiner (<abbr>ZWJ</abbr>) is used to request the subjoined form
of a consonant. The sequence "Consonant_1,Halant,ZWJ,Consonant_2" is
used to specify the subjoined form of "Consonant_2".

A secondary usage of the zero-width joiner is to explicitly request
the formation of "Reph". An initial "Ra,Halant,ZWJ" sequence should
produce a "Reph".

The zero-width non-joiner (<abbr>ZWNJ</abbr>) is not used in shaping runs of
Sinhala text. The <abbr title="Zero-Width Non Joiner">ZWNJ</abbr> is referenced below in various regular
expressions and shaping rules, however, because it is used by other
Indic scripts.

The no-break space (<abbr>NBSP</abbr>) is primarily used to display those
codepoints that are defined as non-spacing (marks, dependent vowels
(matras), below-base consonant forms, and post-base consonant forms)
in an isolated context, as an alternative to displaying them
superimposed on the dotted-circle placeholder. These sequences will
match "NBSP,ZWJ,Halant,_Consonant_", "NBSP,_mark_", or "NBSP,_matra_".


================================================
FILE: character-tables/character-tables-syriac.md
================================================
# Syriac character tables #

This document lists the per-character shaping information needed to
[shape Syriac text](../opentype-shaping-syriac.md).

**Contents**

  - [Syriac character table](#syriac-character-table)
  - [Syriac Supplement character table](#syriac-supplement-character-table)
  - [Miscellaneous character table](#miscellaneous-character-table)


## Syriac character table ##

Syriac glyphs should be classified as in the following
table. Codepoints in the Syriac block with no assigned meaning are
designated as _unassigned_ in the _Unicode category_ column.

The _Joining type_ column indicates whether each codepoint is defined
as joining with adjacent characters on the left side, right side, left
and right sides ("DUAL"), or neither side ("NON_JOINING"). Codepoints
designated TRANSPARENT in the _Joining type_ column do not join with
adjacent characters and, in addition, do not affect the joining
behavior of surrounding characters. Non-spacing marks are of type
TRANSPARENT. Codepoints designated JOIN_CAUSING force adjacent
characters to join.

The _Joining group_ column lists the fundamental letter that the
listed codepoint behaves like for joining purposes.

Assigned codepoints with a _null_ in the _Joining group_
column evoke no special behavior from the shaping engine during the
join-computation stage.

The _Mark class_ column indicates the Canonical Combining Class
for the codepoint.  Marks are assigned non-zero combining classes so
that sequences of adjacent marks can be reordered as required by the
orthography. 

For Syriac, a subset of marks in the 220 and 230 classes are also
designated _Modifier Combining Marks_ (<abbr>MCM</abbr>). These are denoted with
_220_MCM_ and _230_MCM_ in the _Mark class_ column. The <abbr title="Modifier Combining Mark">MCM</abbr> marks are
treated differently during the mark-reordering stage.


:::{table} Syriac character table

| Codepoint | Unicode category | Joining type | Joining group        | Mark class | Glyph                                         |
|:----------|:-----------------|:-------------|:---------------------|:-----------|-----------------------------------------------|
|`U+0700`   | Punctuation      | NON_JOINING  | _null_               | _0_        | &#x0700; End of Paragraph                     |
|`U+0701`   | Punctuation      | NON_JOINING  | _null_               | _0_        | &#x0701; Supralinear Full Stop                |
|`U+0702`   | Punctuation      | NON_JOINING  | _null_               | _0_        | &#x0702; Sublinear Full Stop                  |
|`U+0703`   | Punctuation      | NON_JOINING  | _null_               | _0_        | &#x0703; Supralinear Colon                    |
|`U+0704`   | Punctuation      | NON_JOINING  | _null_               | _0_        | &#x0704; Sublinear Colon                      |
|`U+0705`   | Punctuation      | NON_JOINING  | _null_               | _0_        | &#x0705; Horizontal Colon                     |
|`U+0706`   | Punctuation      | NON_JOINING  | _null_               | _0_        | &#x0706; Colon Skewed Left                    |
|`U+0707`   | Punctuation      | NON_JOINING  | _null_               | _0_        | &#x0707; Colon Skewed Right                   |
|`U+0708`   | Punctuation      | NON_JOINING  | _null_               | _0_        | &#x0708; Supralinear Colon Skewed Left        |
|`U+0709`   | Punctuation      | NON_JOINING  | _null_               | _0_        | &#x0709; Sublinear Colon Skewed Right         |
|`U+070A`   | Punctuation      | NON_JOINING  | _null_               | _0_        | &#x070A; Contraction                          |
|`U+070B`   | Punctuation      | NON_JOINING  | _null_               | _0_        | &#x070B; Harklean Obelus                      |
|`U+070C`   | Punctuation      | NON_JOINING  | _null_               | _0_        | &#x070C; Harklean Metobelus                   |
|`U+070D`   | Punctuation      | NON_JOINING  | _null_               | _0_        | &#x070D; Harklean Asteriscus                  |
|`U+070E`   | _unassigned_     |              |                      |            |                                               |
|`U+070F`   | Other            | TRANSPARENT  | _null_               | _0_        | &#x070F; Syriac Abbreviation Mark             |
| | | | | |                                                                                                                      
|`U+0710`   | Letter           | RIGHT        | ALAPH                | _0_        | &#x0710; Alaph                                |
|`U+0711`   | Mark [Mn]        | TRANSPARENT  | _null_               | 36         | &#x0711; Superscript Alaph                    |
|`U+0712`   | Letter           | DUAL         | BETH                 | _0_        | &#x0712; Beth                                 |
|`U+0713`   | Letter           | DUAL         | GAMAL                | _0_        | &#x0713; Gamal                                |
|`U+0714`   | Letter           | DUAL         | GAMAL                | _0_        | &#x0714; Gamal Garshuni                       |
|`U+0715`   | Letter           | RIGHT        | DALATH_RISH          | _0_        | &#x0715; Dalath                               |
|`U+0716`   | Letter           | RIGHT        | DALATH_RISH          | _0_        | &#x0716; Dotless Dalath Rish                  |
|`U+0717`   | Letter           | RIGHT        | HE                   | _0_        | &#x0717; He                                   |
|`U+0718`   | Letter           | RIGHT        | SYRIAC_WAW           | _0_        | &#x0718; Waw                                  |
|`U+0719`   | Letter           | RIGHT        | ZAIN                 | _0_        | &#x0719; Zain                                 |
|`U+071A`   | Letter           | DUAL         | HETH                 | _0_        | &#x071A; Heth                                 |
|`U+071B`   | Letter           | DUAL         | TETH                 | _0_        | &#x071B; Teth                                 |
|`U+071C`   | Letter           | DUAL         | TETH                 | _0_        | &#x071C; Teth Garshuni                        |
|`U+071D`   | Letter           | DUAL         | YUDH                 | _0_        | &#x071D; Yudh                                 |
|`U+071E`   | Letter           | RIGHT        | YUDH_HE              | _0_        | &#x071E; Yudh He                              |
|`U+071F`   | Letter           | DUAL         | KAPH                 | _0_        | &#x071F; Kaph                                 |
| | | | | |                                                                                                                      
|`U+0720`   | Letter           | DUAL         | LAMADH               | _0_        | &#x0720; Lamadh                               |
|`U+0721`   | Letter           | DUAL         | MIM                  | _0_        | &#x0721; Mim                                  |
|`U+0722`   | Letter           | DUAL         | NUN                  | _0_        | &#x0722; Nun                                  |
|`U+0723`   | Letter           | DUAL         | SEMKATH              | _0_        | &#x0723; Semkath                              |
|`U+0724`   | Letter           | DUAL         | FINAL_SEMKATH        | _0_        | &#x0724; Final Semkath                        |
|`U+0725`   | Letter           | DUAL         | E                    | _0_        | &#x0725; E                                    |
|`U+0727`   | Letter           | DUAL         | PE                   | _0_        | &#x0727; Pe                                   |
|`U+0727`   | Letter           | DUAL         | REVERSED_PE          | _0_        | &#x0727; Reversed Pe                          |
|`U+0728`   | Letter           | RIGHT        | SADHE                | _0_        | &#x0728; Sadhe                                |
|`U+0729`   | Letter           | DUAL         | QAPH                 | _0_        | &#x0729; Qaph                                 |
|`U+072A`   | Letter           | RIGHT        | DALATH_RISH          | _0_        | &#x072A; Rish                                 |
|`U+072B`   | Letter           | DUAL         | SHIN                 | _0_        | &#x072B; Shin                                 |
|`U+072C`   | Letter           | RIGHT        | TAW                  | _0_        | &#x072C; Taw                                  |
|`U+072D`   | Letter           | DUAL         | BETH                 | _0_        | &#x072D; Persian Bheth                        |
|`U+072E`   | Letter           | DUAL         | GAMAL                | _0_        | &#x072E; Persian Ghamal                       |
|`U+072F`   | Letter           | RIGHT        | DALATH_RISH          | _0_        | &#x072F; Persian Dhalath                      |
| | | | | |                                                                                                                      
|`U+0730`   | Mark [Mn]        | TRANSPARENT  | _null_               | 230        | &#x0730; Pthaha Above                         |
|`U+0731`   | Mark [Mn]        | TRANSPARENT  | _null_               | 220        | &#x0731; Pthaha Below                         |
|`U+0732`   | Mark [Mn]        | TRANSPARENT  | _null_               | 230        | &#x0732; Pthaha Dotted                        |
|`U+0733`   | Mark [Mn]        | TRANSPARENT  | _null_               | 230        | &#x0733; Zqapha Above                         |
|`U+0734`   | Mark [Mn]        | TRANSPARENT  | _null_               | 220        | &#x0734; Zqapha Below                         |
|`U+0735`   | Mark [Mn]        | TRANSPARENT  | _null_               | 230        | &#x0735; Zqapha Dotted                        |
|`U+0736`   | Mark [Mn]        | TRANSPARENT  | _null_               | 230        | &#x0736; Rbasa Above                          |
|`U+0737`   | Mark [Mn]        | TRANSPARENT  | _null_               | 220        | &#x0737; Rbasa Below                          |
|`U+0738`   | Mark [Mn]        | TRANSPARENT  | _null_               | 220        | &#x0738; Dotted Zlama Horizontal              |
|`U+0739`   | Mark [Mn]        | TRANSPARENT  | _null_               | 220        | &#x0739; Dotted Zlama Angular                 |
|`U+073A`   | Mark [Mn]        | TRANSPARENT  | _null_               | 230        | &#x073A; Hbasa Above                          |
|`U+073B`   | Mark [Mn]        | TRANSPARENT  | _null_               | 220        | &#x073B; Hbasa Below                          |
|`U+073C`   | Mark [Mn]        | TRANSPARENT  | _null_               | 220        | &#x073C; Hbasa-Esasa Dotted                   |
|`U+073D`   | Mark [Mn]        | TRANSPARENT  | _null_               | 230        | &#x073D; Esasa Above                          |
|`U+073E`   | Mark [Mn]        | TRANSPARENT  | _null_               | 220        | &#x073E; Esasa Below                          |
|`U+073F`   | Mark [Mn]        | TRANSPARENT  | _null_               | 230        | &#x073F; Rwaha                                |
| | | | | |                                                                                                                      
|`U+0740`   | Mark [Mn]        | TRANSPARENT  | _null_               | 230        | &#x0740; Feminine Dot                         |
|`U+0741`   | Mark [Mn]        | TRANSPARENT  | _null_               | 230        | &#x0741; Qushshaya                            |
|`U+0742`   | Mark [Mn]        | TRANSPARENT  | _null_               | 220        | &#x0742; Rukkakha                             |
|`U+0743`   | Mark [Mn]        | TRANSPARENT  | _null_               | 230        | &#x0743; Two Vertical Dots Above              |
|`U+0744`   | Mark [Mn]        | TRANSPARENT  | _null_               | 220        | &#x0744; Two Vertical Dots Below              |
|`U+0745`   | Mark [Mn]        | TRANSPARENT  | _null_               | 230        | &#x0745; Three Dots Above                     |
|`U+0746`   | Mark [Mn]        | TRANSPARENT  | _null_               | 220        | &#x0746; Three Dots Below                     |
|`U+0747`   | Mark [Mn]        | TRANSPARENT  | _null_               | 220        | &#x0747; Oblique Line Above                   |
|`U+0748`   | Mark [Mn]        | TRANSPARENT  | _null_               | 230        | &#x0748; Oblique Line Below                   |
|`U+0749`   | Mark [Mn]        | TRANSPARENT  | _null_               | 230        | &#x0749; Music                                |
|`U+074A`   | Mark [Mn]        | TRANSPARENT  | _null_               | 230        | &#x074A; Barrekh                              |
|`U+074B`   | _unassigned_     |              |                      |            |                                               |
|`U+074C`   | _unassigned_     |              |                      |            |                                               |
|`U+074D`   | Letter           | RIGHT        | ZHAIN                | _0_        | &#x074D; Sogdian Zhain                        |
|`U+074E`   | Letter           | DUAL         | KHAPH                | _0_        | &#x074E; Sogdian Khaph                        |
|`U+074F`   | Letter           | DUAL         | FE                   | _0_        | &#x074F; Sogdian Fe                           |
:::


## Syriac Supplement character table ##

The Syriac Supplement block includes letters needed to write Suriyani
Malayalam, also known as Garshuni or Syriac Malayalam.

:::{table} Syriac Supplement character table

| Codepoint | Unicode category | Joining type | Joining group        | Mark class | Glyph                                         |
|:----------|:-----------------|:-------------|:---------------------|:-----------|-----------------------------------------------|
|`U+0860`   | Letter           | DUAL         | MALAYALAM_NGA        | _0_        | &#x0860; Malayalam Nga                        |
|`U+0861`   | Letter           | NON_JOINING  | MALAYALAM_JA         | _0_        | &#x0861; Malayalam Ja                         |
|`U+0862`   | Letter           | DUAL         | MALAYALAM_NYA        | _0_        | &#x0862; Malayalam Nya                        |
|`U+0863`   | Letter           | DUAL         | MALAYALAM_TTA        | _0_        | &#x0863; Malayalam Tta                        |
|`U+0864`   | Letter           | DUAL         | MALAYALAM_NNA        | _0_        | &#x0864; Malayalam Nna                        |
|`U+0865`   | Letter           | DUAL         | MALAYALAM_NNNA       | _0_        | &#x0865; Malayalam Nnna                       |
|`U+0866`   | Letter           | NON_JOINING  | MALAYALAM_BHA        | _0_        | &#x0866; Malayalam Bha                        |
|`U+0867`   | Letter           | RIGHT        | MALAYALAM_RA         | _0_        | &#x0867; Malayalam Ra                         |
|`U+0868`   | Letter           | DUAL         | MALAYALAM_LLA        | _0_        | &#x0868; Malayalam Lla                        |
|`U+0869`   | Letter           | RIGHT        | MALAYALAM_LLLA       | _0_        | &#x0869; Malayalam Llla                       |
|`U+086A`   | Letter           | RIGHT        | MALAYALAM_SSA        | _0_        | &#x086A; Malayalam Ssa                        |
|`U+086B`   | _unassigned_     |              |                      |            |                                               |
|`U+086C`   | _unassigned_     |              |                      |            |                                               |
|`U+086D`   | _unassigned_     |              |                      |            |                                               |
|`U+086E`   | _unassigned_     |              |                      |            |                                               |
|`U+086F`   | _unassigned_     |              |                      |            |                                               |
:::


## Miscellaneous character table ##

Other important characters that may be encountered when shaping runs
of Syriac text include the dotted-circle placeholder (`U+25CC`), the
combining grapheme joiner (`U+034F`), the zero-width joiner (`U+200D`)
and zero-width non-joiner (`U+200C`), the left-to-right text marker
(`U+200E`) and right-to-left text marker (`U+200F`), and the no-break
space (`U+00A0`).

The dotted-circle placeholder is frequently used when displaying a
combining mark in isolation. Real-world text syllables may also use
other characters, such as hyphens or dashes, in a similar placeholder
fashion; shaping engines should cope with this situation gracefully.

In addition, Syriac text runs may include the "Tatweel" or kashida
codepoint (`U+0640`) from the Arabic block, because the Syriac block
does not encode a separate kashida character.

:::{table} Miscellaneous character table

| Codepoint | Unicode category | Joining type | Joining group        | Mark class | Glyph                          |
|:----------|:-----------------|:-------------|:---------------------|:-----------|--------------------------------|
|`U+00A0`   | Separator        | NON_JOINING  | _null_               | _0_        | &#x00A0; No-break space        |
|`U+034F`   | Other            | NON_JOINING  | _null_               | _0_        | &#x034F; Combining grapheme joiner |
|`U+0640`   | Letter modifier  | JOIN_CAUSING | _null_               | _0_        | &#x0640; Arabic Tatweel        |
|`U+200C`   | Other            | NON_JOINING  | _null_               | _0_        | &#x200C; Zero-width non-joiner |
|`U+200D`   | Other            | JOIN_CAUSING | _null_               | _0_        | &#x200D; Zero-width joiner     |
|`U+200E`   | Other            | NON_JOINING  | _null_               | _0_        | &#x200E; Left-to-Right marker  |
|`U+200F`   | Other            | NON_JOINING  | _null_               | _0_        | &#x200F; Right-to-Left marker  |
|`U+2010`   | Punctuation      | NON_JOINING  | _null_               | _0_        | &#x2010; Hyphen                |
|`U+2011`   | Punctuation      | NON_JOINING  | _null_               | _0_        | &#x2011; No-break hyphen       |
|`U+2012`   | Punctuation      | NON_JOINING  | _null_               | _0_        | &#x2012; Figure dash           |
|`U+2013`   | Punctuation      | NON_JOINING  | _null_               | _0_        | &#x2013; En dash               |
|`U+2014`   | Punctuation      | NON_JOINING  | _null_               | _0_        | &#x2014; Em dash               |
|`U+25CC`   | Symbol           | NON_JOINING  | _null_               | _0_        | &#x25CC; Dotted circle         |
| | | | | | |
:::


The combining grapheme joiner (<abbr>CGJ</abbr>) is primarily used to alter the
order in which adjacent marks are positioned during the
mark-reordering stage, in order to adhere to the needs of a
non-default language orthography.
<!--- combining grapheme joiner explanation --->

The zero-width joiner (<abbr>ZWJ</abbr>) is primarily used to force the usage of the
cursive connecting form of a letter even when the context of the
adjoining letters would not trigger the connecting form. 

For example, to show the initial form of a letter in isolation (such
as for displaying it in a table of forms), the sequence "_Letter_,ZWJ"
would be used. To show the medial form of a letter in isolation, the
sequence "ZWJ,_Letter_,ZWJ" would be used.


<!--- Zero-Width Non Joiner explanation --->

The right-to-left mark (<abbr>RLM</abbr>) and left-to-right mark (<abbr>LRM</abbr>) are used by
the Unicode bidirectionality algorithm (BiDi) to indicate the points
in a text run at which the writing direction changes.


<!--- How shaping is affected by the <abbr title="Left-To-Right">LTR</abbr> and <abbr title="Right-To-Left">RTL</abbr> markers explanation --->


The no-break space is primarily used to display those codepoints that
are defined as non-spacing (such as vowel or diacritical marks and "Hamza") in an
isolated context, as an alternative to displaying them superimposed on
the dotted-circle placeholder.


================================================
FILE: character-tables/character-tables-tamil.md
================================================
# Tamil character tables #

This document lists the per-character shaping information needed to
[shape Tamil text](../opentype-shaping-tamil.md).

**Contents**

  - [Tamil character table](#tamil-character-table)
  - [Tamil Supplement character table](#tamil-supplement-character-table)
  - [Grantha marks character table](#grantha-marks-character-table)
  - [Vedic Extensions character table](#vedic-extensions-character-table)
  - [Miscellaneous character table](#miscellaneous-character-table)
	  

## Tamil character table ##

Tamil glyphs should be classified as in the following
table. Codepoints in the Tamil block with no assigned meaning are
designated as _unassigned_ in the _Unicode category_ column. 

Assigned codepoints with a _null_ in the _Shaping class_
column evoke no special behavior from the shaping engine. Note that
this does include some valid codepoints, such as currency marks,
punctuation, and other symbols.

> Note: the `NUMBER` and `SYMBOL` _Shaping classes_ are important
> during syllable identification, but generally evoke no further
> special behavior during the rest of the shaping process. 

The _Mark-placement subclass_ column indicates mark-placement
positioning for codepoints in the _Mark_ category. Assigned, non-mark
codepoints have a _null_ in this column and evoke no special
mark-placement behavior. Marks tagged with [Mn] in the _Unicode
category_ column are categorized as non-spacing; marks tagged with
[Mc] are categorized as spacing-combining.

Some codepoints in the following table use a _Shaping class_ that
differs from the codepoint's Unicode _General Category_. The _Shaping
class_ takes precedence during OpenType shaping, as it captures more
specific, script-aware behavior.


:::{table} Tamil character table

| Codepoint | Unicode category | Shaping class     | Mark-placement subclass    | Glyph                        |
|:----------|:-----------------|:------------------|:---------------------------|:-----------------------------|
|`U+0B80`   | _unassigned_     |                   |                            |                              |
|`U+0B81`   | _unassigned_     |                   |                            |                              |
|`U+0B82`   | Mark [Mn]        | BINDU             | TOP_POSITION               | &#x0B82; Anusvara            |
|`U+0B83`   | Letter           | MODIFYING_LETTER  | _null_                     | &#x0B83; Visarga             |
|`U+0B84`   | _unassigned_     |                   |                            |                              |
|`U+0B85`   | Letter           | VOWEL_INDEPENDENT | _null_                     | &#x0B85; A                   |
|`U+0B86`   | Letter           | VOWEL_INDEPENDENT | _null_                     | &#x0B86; Aa                  |
|`U+0B87`   | Letter           | VOWEL_INDEPENDENT | _null_                     | &#x0B87; I                   |
|`U+0B88`   | Letter           | VOWEL_INDEPENDENT | _null_                     | &#x0B88; Ii                  |
|`U+0B89`   | Letter           | VOWEL_INDEPENDENT | _null_                     | &#x0B89; U                   |
|`U+0B8A`   | Letter           | VOWEL_INDEPENDENT | _null_                     | &#x0B8A; Uu                  |
|`U+0B8B`   | _unassigned_     |                   |                            |                              |
|`U+0B8C`   | _unassigned_     |                   |                            |                              |
|`U+0B8D`   | _unassigned_     |                   |                            |                              |
|`U+0B8E`   | Letter           | VOWEL_INDEPENDENT | _null_                     | &#x0B8E; E                   |
|`U+0B8F`   | Letter           | VOWEL_INDEPENDENT | _null_                     | &#x0B8F; Ee                  |
| | | | |																		
|`U+0B90`   | Letter           | VOWEL_INDEPENDENT | _null_                     | &#x0B90; Ai                  |
|`U+0B91`   | _unassigned_     |                   |                            |                              |
|`U+0B92`   | Letter           | VOWEL_INDEPENDENT | _null_                     | &#x0B92; O                   |
|`U+0B93`   | Letter           | VOWEL_INDEPENDENT | _null_                     | &#x0B93; Oo                  |
|`U+0B94`   | Letter           | VOWEL_INDEPENDENT | _null_                     | &#x0B94; Au                  |
|`U+0B95`   | Letter           | CONSONANT         | _null_                     | &#x0B95; Ka                  |
|`U+0B96`   | _unassigned_     |                   |                            |                              |
|`U+0B97`   | _unassigned_     |                   |                            |                              |
|`U+0B98`   | _unassigned_     |                   |                            |                              |
|`U+0B99`   | Letter           | CONSONANT         | _null_                     | &#x0B99; Nga                 |
|`U+0B9A`   | Letter           | CONSONANT         | _null_                     | &#x0B9A; Ca                  |
|`U+0B9B`   | _unassigned_     |                   |                            |                              |
|`U+0B9C`   | Letter           | CONSONANT         | _null_                     | &#x0B9C; Ja                  |
|`U+0B9D`   | _unassigned_     |                   |                            |                              |
|`U+0B9E`   | Letter           | CONSONANT         | _null_                     | &#x0B9E; Nya                 |
|`U+0B9F`   | Letter           | CONSONANT         | _null_                     | &#x0B9F; Tta                 |
| | | | |																		
|`U+0BA0`   | _unassigned_     |                   |                            |                              |
|`U+0BA1`   | _unassigned_     |                   |                            |                              |
|`U+0BA2`   | _unassigned_     |                   |                            |                              |
|`U+0BA3`   | Letter           | CONSONANT         | _null_                     | &#x0BA3; Nna                 |
|`U+0BA4`   | Letter           | CONSONANT         | _null_                     | &#x0BA4; Ta                  |
|`U+0BA5`   | _unassigned_     |                   |                            |                              |
|`U+0BA6`   | _unassigned_     |                   |                            |                              |
|`U+0BA7`   | _unassigned_     |                   |                            |                              |
|`U+0BA8`   | Letter           | CONSONANT         | _null_                     | &#x0BA8; Na                  |
|`U+0BA9`   | Letter           | CONSONANT         | _null_                     | &#x0BA9; Nnna                |
|`U+0BAA`   | Letter           | CONSONANT         | _null_                     | &#x0BAA; Pa                  |
|`U+0BAB`   | _unassigned_     |                   |                            |                              |
|`U+0BAC`   | _unassigned_     |                   |                            |                              |
|`U+0BAD`   | _unassigned_     |                   |                            |                              |
|`U+0BAE`   | Letter           | CONSONANT         | _null_                     | &#x0BAE; Ma                  |
|`U+0BAF`   | Letter           | CONSONANT         | _null_                     | &#x0BAF; Ya                  |
| | | | |																		
|`U+0BB0`   | Letter           | CONSONANT         | _null_                     | &#x0BB0; Ra                  |
|`U+0BB1`   | Letter           | CONSONANT         | _null_                     | &#x0BB1; Rra                 |
|`U+0BB2`   | Letter           | CONSONANT         | _null_                     | &#x0BB2; La                  |
|`U+0BB3`   | Letter           | CONSONANT         | _null_                     | &#x0BB3; Lla                 |
|`U+0BB4`   | Letter           | CONSONANT         | _null_                     | &#x0BB4; Llla                |
|`U+0BB5`   | Letter           | CONSONANT         | _null_                     | &#x0BB5; Va                  |
|`U+0BB6`   | Letter           | CONSONANT         | _null_                     | &#x0BB6; Sha                 |
|`U+0BB7`   | Letter           | CONSONANT         | _null_                     | &#x0BB7; Ssa                 |
|`U+0BB8`   | Letter           | CONSONANT         | _null_                     | &#x0BB8; Sa                  |
|`U+0BB9`   | Letter           | CONSONANT         | _null_                     | &#x0BB9; Ha                  |
|`U+0BBA`   | _unassigned_     |                   |                            |                              |
|`U+0BBB`   | _unassigned_     |                   |                            |                              |
|`U+0BBC`   | _unassigned_     |                   |                            |                              |
|`U+0BBD`   | _unassigned_     |                   |                            |                              |
|`U+0BBE`   | Mark [Mc]        | VOWEL_DEPENDENT   | RIGHT_POSITION             | &#x0BBE; Sign Aa             |
|`U+0BBF`   | Mark [Mc]        | VOWEL_DEPENDENT   | RIGHT_POSITION             | &#x0BBF; Sign I              |
| | | | |																		
|`U+0BC0`   | Mark [Mn]        | VOWEL_DEPENDENT   | TOP_POSITION               | &#x0BC0; Sign Ii             |
|`U+0BC1`   | Mark [Mc]        | VOWEL_DEPENDENT   | RIGHT_POSITION             | &#x0BC1; Sign U              |
|`U+0BC2`   | Mark [Mc]        | VOWEL_DEPENDENT   | RIGHT_POSITION             | &#x0BC2; Sign Uu             |
|`U+0BC3`   | _unassigned_     |                   |                            |                              |
|`U+0BC4`   | _unassigned_     |                   |                            |                              |
|`U+0BC5`   | _unassigned_     |                   |                            |                              |
|`U+0BC6`   | Mark [Mc]        | VOWEL_DEPENDENT   | LEFT_POSITION              | &#x0BC6; Sign E              |
|`U+0BC7`   | Mark [Mc]        | VOWEL_DEPENDENT   | LEFT_POSITION              | &#x0BC7; Sign Ee             |
|`U+0BC8`   | Mark [Mc]        | VOWEL_DEPENDENT   | LEFT_POSITION              | &#x0BC8; Sign Ai             |
|`U+0BC9`   | _unassigned_     |                   |                            |                              |
|`U+0BCA`   | Mark [Mc]        | VOWEL_DEPENDENT   | LEFT_AND_RIGHT_POSITION    | &#x0BCA; Sign O              |
|`U+0BCB`   | Mark [Mc]        | VOWEL_DEPENDENT   | LEFT_AND_RIGHT_POSITION    | &#x0BCB; Sign Oo             |
|`U+0BCC`   | Mark [Mc]        | VOWEL_DEPENDENT   | LEFT_AND_RIGHT_POSITION    | &#x0BCC; Sign Au             |
|`U+0BCD`   | Mark [Mn]        | VIRAMA            | TOP_POSITION               | &#x0BCD; Virama              |
|`U+0BCE`   | _unassigned_     |                   |                            |                              |
|`U+0BCF`   | _unassigned_     |                   |                            |                              |
| | | | |																		
|`U+0BD0`   | Letter           | _null_            | _null_                     | &#x0BD0; Om                  |
|`U+0BD1`   | _unassigned_     |                   |                            |                              |
|`U+0BD2`   | _unassigned_     |                   |                            |                              |
|`U+0BD3`   | _unassigned_     |                   |                            |                              |
|`U+0BD4`   | _unassigned_     |                   |                            |                              |
|`U+0BD5`   | _unassigned_     |                   |                            |                              |
|`U+0BD6`   | _unassigned_     |                   |                            |                              |
|`U+0BD7`   | Mark [Mc]        | VOWEL_DEPENDENT   | RIGHT_POSITION             | &#x0BD7; Au Length Mark      |
|`U+0BD8`   | _unassigned_     |                   |                            |                              |
|`U+0BD9`   | _unassigned_     |                   |                            |                              |
|`U+0BDA`   | _unassigned_     |                   |                            |                              |
|`U+0BDB`   | _unassigned_     |                   |                            |                              |
|`U+0BDC`   | _unassigned_     |                   |                            |                              |
|`U+0BDD`   | _unassigned_     |                   |                            |                              |
|`U+0BDE`   | _unassigned_     |                   |                            |                              |
|`U+0BDF`   | _unassigned_     |                   |                            |                              |
| | | | |																		
|`U+0BE0`   | _unassigned_     |                   |                            |                              |
|`U+0BE1`   | _unassigned_     |                   |                            |                              |
|`U+0BE2`   | _unassigned_     |                   |                            |                              |
|`U+0BE3`   | _unassigned_     |                   |                            |                              |
|`U+0BE4`   | _unassigned_     |                   |                            |                              |
|`U+0BE5`   | _unassigned_     |                   |                            |                              |
|`U+0BE6`   | Number           | NUMBER            | _null_                     | &#x0BE6; Digit Zero          |
|`U+0BE7`   | Number           | NUMBER            | _null_                     | &#x0BE7; Digit One           |
|`U+0BE8`   | Number           | NUMBER            | _null_                     | &#x0BE8; Digit Two           |
|`U+0BE9`   | Number           | NUMBER            | _null_                     | &#x0BE9; Digit Three         |
|`U+0BEA`   | Number           | NUMBER            | _null_                     | &#x0BEA; Digit Four          |
|`U+0BEB`   | Number           | NUMBER            | _null_                     | &#x0BEB; Digit Five          |
|`U+0BEC`   | Number           | NUMBER            | _null_                     | &#x0BEC; Digit Six           |
|`U+0BED`   | Number           | NUMBER            | _null_                     | &#x0BED; Digit Seven         |
|`U+0BEE`   | Number           | NUMBER            | _null_                     | &#x0BEE; Digit Eight         |
|`U+0BEF`   | Number           | NUMBER            | _null_                     | &#x0BEF; Digit Nine          |
| | | | |																		
|`U+0BF0`   | Number           | NUMBER            | _null_                     | &#x0BF0; Number Ten          |
|`U+0BF1`   | Number           | NUMBER            | _null_                     | &#x0BF1; Number One Hundred  |
|`U+0BF2`   | Number           | NUMBER            | _null_                     | &#x0BF2; Number One Thousand |
|`U+0BF3`   | Symbol           | SYMBOL            | _null_                     | &#x0BF3; Day Sign            |
|`U+0BF4`   | Symbol           | SYMBOL            | _null_                     | &#x0BF4; Month Sign          |
|`U+0BF5`   | Symbol           | SYMBOL            | _null_                     | &#x0BF5; Year Sign           |
|`U+0BF6`   | Symbol           | SYMBOL            | _null_                     | &#x0BF6; Debit Sign          |
|`U+0BF7`   | Symbol           | SYMBOL            | _null_                     | &#x0BF7; Credit Sign         |
|`U+0BF8`   | Symbol           | SYMBOL            | _null_                     | &#x0BF8; As Above Sign       |
|`U+0BF9`   | Symbol           | SYMBOL            | _null_                     | &#x0BF9; Tamil Rupee Sign    |
|`U+0BFA`   | Symbol           | SYMBOL            | _null_                     | &#x0BFA; Number Sign         |
|`U+0BFB`   | _unassigned_     |                   |                            |                              |
|`U+0BFC`   | _unassigned_     |                   |                            |                              |
|`U+0BFD`   | _unassigned_     |                   |                            |                              |
|`U+0BFE`   | _unassigned_     |                   |                            |                              |
|`U+0BFF`   | _unassigned_     |                   |                            |                              |
:::


## Tamil Supplement character table ##

Tamil text runs may also include historical symbols and fractions from
the Tamil Supplement block. These characters should be classified as
follows.


:::{table} Tamil Supplement character table

| Codepoint | Unicode category | Shaping class | Mark-placement subclass | Glyph                         |
|:----------|:-----------------|:--------------|:------------------------|:------------------------------|
| `U+11FC0` | Number           | NUMBER        | _null_                  | &#x11FC0; Fraction One Three-Hundred-And-Twentieth |
| `U+11FC1` | Number           | NUMBER        | _null_                  | &#x11FC1; Fraction One One-Hundred-And-Sixtieth |
| `U+11FC2` | Number           | NUMBER        | _null_                  | &#x11FC2; Fraction One Eightieth |
| `U+11FC3` | Number           | NUMBER        | _null_                  | &#x11FC3; Fraction One Sixty-Fourth |
| `U+11FC4` | Number           | NUMBER        | _null_                  | &#x11FC4; Fraction One Fortieth |
| `U+11FC5` | Number           | NUMBER        | _null_                  | &#x11FC5; Fraction One Thirty-Second |
| `U+11FC6` | Number           | NUMBER        | _null_                  | &#x11FC6; Fraction Three Eightieths |
| `U+11FC7` | Number           | NUMBER        | _null_                  | &#x11FC7; Fraction Three Sixty-Fourths |
| `U+11FC8` | Number           | NUMBER        | _null_                  | &#x11FC8; Fraction One Twentieth |
| `U+11FC9` | Number           | NUMBER        | _null_                  | &#x11FC9; Fraction One Sixteenth-1 |
| `U+11FCA` | Number           | NUMBER        | _null_                  | &#x11FCA; Fraction One Sixteenth-2 |
| `U+11FCB` | Number           | NUMBER        | _null_                  | &#x11FCB; Fraction One Tenth  |
| `U+11FCC` | Number           | NUMBER        | _null_                  | &#x11FCC; Fraction One Eighth |
| `U+11FCD` | Number           | NUMBER        | _null_                  | &#x11FCD; Fraction Three Twentieths |
| `U+11FCE` | Number           | NUMBER        | _null_                  | &#x11FCE; Fraction Three Sixteenths |
| `U+11FCF` | Number           | NUMBER        | _null_                  | &#x11FCF; Fraction One Fifth  |
| | | | |																			           
| `U+11FD0` | Number           | NUMBER        | _null_                  | &#x11FD0; Fraction One Quarter |
| `U+11FD1` | Number           | NUMBER        | _null_                  | &#x11FD1; Fraction One Half-1 |
| `U+11FD2` | Number           | NUMBER        | _null_                  | &#x11FD2; Fraction One Half-2 |
| `U+11FD3` | Number           | NUMBER        | _null_                  | &#x11FD3; Fraction Three Quarters |
| `U+11FD4` | Number           | NUMBER        | _null_                  | &#x11FD4; Fraction Downscaling Factor Kiizh |
| `U+11FD5` | Symbol           | SYMBOL        | _null_                  | &#x11FD5; Sign Nel            |
| `U+11FD6` | Symbol           | SYMBOL        | _null_                  | &#x11FD6; Sign Cevitu         |
| `U+11FD7` | Symbol           | SYMBOL        | _null_                  | &#x11FD7; Sign Aazhaakku      |
| `U+11FD8` | Symbol           | SYMBOL        | _null_                  | &#x11FD8; Sign Uzhakku        |
| `U+11FD9` | Symbol           | SYMBOL        | _null_                  | &#x11FD9; Sign Muuvuzhakku    |
| `U+11FDA` | Symbol           | SYMBOL        | _null_                  | &#x11FDA; Sign Kuruni         |
| `U+11FDB` | Symbol           | SYMBOL        | _null_                  | &#x11FDB; Sign Pathakku       |
| `U+11FDC` | Symbol           | SYMBOL        | _null_                  | &#x11FDC; Sign Mukkuruni      |
| `U+11FDD` | Symbol           | SYMBOL        | _null_                  | &#x11FDD; Sign Kaacu          |
| `U+11FDE` | Symbol           | SYMBOL        | _null_                  | &#x11FDE; Sign Panam          |
| `U+11FDF` | Symbol           | SYMBOL        | _null_                  | &#x11FDF; Sign Pon            |
| | | | |																			      
| `U+11FE0` | Symbol           | SYMBOL        | _null_                  | &#x11FE0; Sign Varaakan       |
| `U+11FE1` | Symbol           | SYMBOL        | _null_                  | &#x11FE1; Sign Paaram         |
| `U+11FE2` | Symbol           | SYMBOL        | _null_                  | &#x11FE2; Sign Kuzhi          |
| `U+11FE3` | Symbol           | SYMBOL        | _null_                  | &#x11FE3; Sign Veli           |
| `U+11FE4` | Symbol           | SYMBOL        | _null_                  | &#x11FE4; Wet Cultivation Sign |
| `U+11FE5` | Symbol           | SYMBOL        | _null_                  | &#x11FE5; Dry Cultivation Sign |
| `U+11FE6` | Symbol           | SYMBOL        | _null_                  | &#x11FE6; Land Sign           |
| `U+11FE7` | Symbol           | SYMBOL        | _null_                  | &#x11FE7; Salt Pan Sign       |
| `U+11FE8` | Symbol           | SYMBOL        | _null_                  | &#x11FE8; Traditional Credit Sign |
| `U+11FE9` | Symbol           | SYMBOL        | _null_                  | &#x11FE9; Traditional Number Sign |
| `U+11FEA` | Symbol           | SYMBOL        | _null_                  | &#x11FEA; Current Sign        |
| `U+11FEB` | Symbol           | SYMBOL        | _null_                  | &#x11FEB; And Odd Sign        |
| `U+11FEC` | Symbol           | SYMBOL        | _null_                  | &#x11FEC; Spent Sign          |
| `U+11FED` | Symbol           | SYMBOL        | _null_                  | &#x11FED; Total Sign          |
| `U+11FEE` | Symbol           | SYMBOL        | _null_                  | &#x11FEE; In Possession Sign  |
| `U+11FEF` | Symbol           | SYMBOL        | _null_                  | &#x11FEF; Starting From Sign  |
| | | | |
| `U+11FF0` | Symbol           | SYMBOL        | _null_                  | &#x11FF0; Sign Muthaliya      |
| `U+11FF1` | Symbol           | SYMBOL        | _null_                  | &#x11FF1; Sign Vakaiyaraa     |
| `U+11FF2` | _unassigned_     |               |                         |                               |
| `U+11FF3` | _unassigned_     |               |                         |                               |
| `U+11FF4` | _unassigned_     |               |                         |                               |
| `U+11FF5` | _unassigned_     |               |                         |                               |
| `U+11FF6` | _unassigned_     |               |                         |                               |
| `U+11FF7` | _unassigned_     |               |                         |                               |
| `U+11FF8` | _unassigned_     |               |                         |                               |
| `U+11FF9` | _unassigned_     |               |                         |                               |
| `U+11FFA` | _unassigned_     |               |                         |                               |
| `U+11FFB` | _unassigned_     |               |                         |                               |
| `U+11FFC` | _unassigned_     |               |                         |                               |
| `U+11FFD` | _unassigned_     |               |                         |                               |
| `U+11FFE` | _unassigned_     |               |                         |                               |
| `U+11FFF` | Punctuation      | _null_        | _null_                  | &#x11FFF; End Of Text         |
:::


## Grantha marks character table ##

Tamil text runs may also include diacritical and syllable-modifier
marks from the Grantha block. These characters should be classified as
follows.


:::{table} Grantha marks character table

| Codepoint | Unicode category | Shaping class     | Mark-placement subclass    | Glyph                        |
|:----------|:-----------------|:------------------|:---------------------------|:-----------------------------|
|`U+11301`  | Mark [Mn]        | BINDU             | TOP_POSITION               | &#x11301; Grantha Candrabindu|
|`U+11303`  | Mark [Mc]        | VISARGA           | RIGHT_POSITION             | &#x11303; Grantha Visarga    |
|`U+1133B`  | Mark [Mn]        | NUKTA             | BOTTOM_POSITION            | &#x1133b; Combining Bindu Below |
|`U+1133C`  | Mark [Mn]        | NUKTA             | BOTTOM_POSITION            | &#x1133c; Grantha Nukta      |
:::


## Vedic Extensions character table ##

Sanskrit runs written in the Tamil script may also include
characters from the Vedic Extensions block. These characters should be
classified as follows.

> Note: See the [Vedic Extensions](../opentype-shaping-vedic-extensions.md) 
> document for additional information.


:::{table} Vedic Extensions character table

| Codepoint | Unicode category | Shaping class     | Mark-placement subclass    | Glyph                        |
|:----------|:-----------------|:------------------|:---------------------------|:-----------------------------|
|`U+1CD0`   | Mark [Mn]        | CANTILLATION      | TOP_POSITION               | &#x1CD0; Tone Karshana       |
|`U+1CD1`   | Mark [Mn]        | CANTILLATION      | TOP_POSITION               | &#x1CD1; Tone Shara          |
|`U+1CD2`   | Mark [Mn]        | CANTILLATION      | TOP_POSITION               | &#x1CD2; Tone Prenkha        |
|`U+1CD3`   | Punctuation      | _null_            | _null_                     | &#x1CD3; Sign Nihshvasa      |
|`U+1CD4`   | Mark [Mn]        | CANTILLATION      | OVERSTRUCK                 | &#x1CD4; Tone Midline Svarita |
|`U+1CD5`   | Mark [Mn]        | CANTILLATION      | BOTTOM_POSITION            | &#x1CD5; Tone Aggravated Independent Svarita |
|`U+1CD6`   | Mark [Mn]        | CANTILLATION      | BOTTOM_POSITION            | &#x1CD6; Tone Independent Svarita |
|`U+1CD7`   | Mark [Mn]        | CANTILLATION      | BOTTOM_POSITION            | &#x1CD7; Tone Kathaka Independent Svarita |
|`U+1CD8`   | Mark [Mn]        | CANTILLATION      | BOTTOM_POSITION            | &#x1CD8; Tone Candra Below   |
|`U+1CD9`   | Mark [Mn]        | CANTILLATION      | BOTTOM_POSITION            | &#x1CD9; Tone Kathaka Independent Svarita Schroeder |
|`U+1CDA`   | Mark [Mn]        | CANTILLATION      | TOP_POSITION               | &#x1CDA; Tone Double Svarita |
|`U+1CDB`   | Mark [Mn]        | CANTILLATION      | TOP_POSITION               | &#x1CDB; Tone Triple Svarita |
|`U+1CDC`   | Mark [Mn]        | CANTILLATION      | BOTTOM_POSITION            | &#x1CDC; Tone Kathaka Anudatta |
|`U+1CDD`   | Mark [Mn]        | CANTILLATION      | BOTTOM_POSITION            | &#x1CDD; Tone Dot Below      |
|`U+1CDE`   | Mark [Mn]        | CANTILLATION      | BOTTOM_POSITION            | &#x1CDE; Tone Two Dots Below |
|`U+1CDF`   | Mark [Mn]        | CANTILLATION      | BOTTOM_POSITION            | &#x1CDF; Tone Three Dots Below |
| | | | |																		
|`U+1CE0`   | Mark [Mn]        | CANTILLATION      | TOP_POSITION               | &#x1CE0; Tone Rigvedic Kashmiri Independent Svarita |
|`U+1CE1`   | Mark [Mc]        | CANTILLATION      | RIGHT_POSITION             | &#x1CE1; Tone Atharavedic Independent Svarita |
|`U+1CE2`   | Mark [Mn]        | AVAGRAHA          | OVERSTRUCK                 | &#x1CE2; Sign Visarga Svarita |
|`U+1CE3`   | Mark [Mn]        | _null_            | OVERSTRUCK                 | &#x1CE3; Sign Visarga Udatta |
|`U+1CE4`   | Mark [Mn]        | _null_            | OVERSTRUCK                 | &#x1CE4; Sign Reversed Visarga Udatta |
|`U+1CE5`   | Mark [Mn]        | _null_            | OVERSTRUCK                 | &#x1CE5; Sign Visarga Anudatta |
|`U+1CE6`   | Mark [Mn]        | _null_            | OVERSTRUCK                 | &#x1CE6; Sign Reversed Visarga Anudatta |
|`U+1CE7`   | Mark [Mn]        | _null_            | OVERSTRUCK                 | &#x1CE7; Sign Visarga Udatta With Tail |
|`U+1CE8`   | Mark [Mn]        | AVAGRAHA          | OVERSTRUCK                 | &#x1CE8; Sign Visarga Anudatta With Tail |
|`U+1CE9`   | Letter           | SYMBOL            | _null_                     | &#x1CE9; Sign Anusvara Antargomukha |
|`U+1CEA`   | Letter           | _null_            | _null_                     | &#x1CEA; Sign Anusvara Bahirgomukha |
|`U+1CEB`   | Letter           | _null_            | _null_                     | &#x1CEB; Sign Anusvara Vamagomukha |
|`U+1CEC`   | Letter           | SYMBOL            | _null_                     | &#x1CEC; Sign Anusvara Vamagomukha With Tail |
|`U+1CED`   | Mark [Mn]        | AVAGRAHA          | BOTTOM_POSITION            | &#x1CED; Sign Tiryak         |
|`U+1CEE`   | Letter           | SYMBOL            | _null_                     | &#x1CEE; Sign Hexiform Long Anusvara |
|`U+1CEF`   | Letter           | _null_            | _null_                     | &#x1CEF; Sign Long Anusvara  |
| | | | |																		
|`U+1CF0`   | Letter           | _null_            | _null_                     | &#x1CF0; Sign Rthang Long Anusvara |
|`U+1CF2`   | Letter           | CONSONANT_DEAD    | _null_                     | &#x1CF2; Sign Ardhavisarga   |
|`U+1CF3`   | Letter           | CONSONANT_DEAD    | _null_                     | &#x1CF3; Sign Rotated Ardhavisarga |
|`U+1CF3`   | Mark [Mc]        | VISARGA           | _null_                     | &#x1CF3; Sign Rotated Ardhavisarga |
|`U+1CF4`   | Mark [Mn]        | CANTILLATION      | TOP_POSITION               | &#x1CF4; Tone Candra Above   |
|`U+1CF5`   | Letter           | CONSONANT_WITH_STACKER | _null_                | &#x1CF5; Sign Jihvamuliya    |
|`U+1CF6`   | Letter           | CONSONANT_WITH_STACKER | _null_                | &#x1CF6; Sign Upadhmaniya    |
|`U+1CF7`   | Mark [Mc]        | _null_            | _null_                     | &#x1CF7; Sign Atikrama       |
|`U+1CF8`   | Mark [Mn]        | CANTILLATION      | _null_                     | &#x1CF8; Tone Ring Above     |
|`U+1CF9`   | Mark [Mn]        | CANTILLATION      | _null_                     | &#x1CF9; Tone Double Ring Above |
|`U+1CFA`   | Letter           | PLACEHOLDER       | _null_                     | &#x1CFA; Sign Double Anusvara Antargomukha |
|`U+1CFB`   | _unassigned_     |                   |                            |                              |
|`U+1CFC`   | _unassigned_     |                   |                            |                              |
|`U+1CFD`   | _unassigned_     |                   |                            |                              |
|`U+1CFE`   | _unassigned_     |                   |                            |                              |
|`U+1CFF`   | _unassigned_     |                   |                            |                              |
:::


## Miscellaneous character table ##

In addition to general punctuation, runs of Tamil text often use the
danda (`U+0964`) and double danda (`U+0965`) punctuation marks from
the Devanagari block. Tamil text can also incorporate the udatta
(`U+0951`) and anudatta (`U+0952`) signs from the Devanagari block.


:::{table} Additional punctuation character table

| Codepoint | Unicode category | Shaping class     | Mark-placement subclass    | Glyph                          |
|:----------|:-----------------|:------------------|:---------------------------|:-------------------------------|
|`U+0951`   | Mark [Mn]        | CANTILLATION      | TOP_POSITION               | &#x0951; Udatta              |
|`U+0952`   | Mark [Mn]        | CANTILLATION      | BOTTOM_POSITION            | &#x0952; Anudatta            |
|`U+0964`   | Punctuation      | _null_            | _null_                     | &#x0964; Danda               |
|`U+0965`   | Punctuation      | _null_            | _null_                     | &#x0965; Double Danda        |
:::


Other important characters that may be encountered when shaping runs
of Tamil text include the dotted-circle placeholder (`U+25CC`), the
zero-width joiner (`U+200D`) and zero-width non-joiner (`U+200C`), and
the no-break space (`U+00A0`).

The dotted-circle placeholder is frequently used when displaying a
dependent vowel (matra) or a combining mark in isolation. Real-world
text syllables may also use other characters, such as hyphens or dashes,
in a similar placeholder fashion; shaping engines should cope with
this situation gracefully.


:::{table} Miscellaneous character table

| Codepoint | Unicode category | Shaping class     | Mark-placement subclass    | Glyph                          |
|:----------|:-----------------|:------------------|:---------------------------|:-------------------------------|
|`U+00A0`   | Separator        | PLACEHOLDER       | _null_                     | &#x00A0; No-break space        |
|`U+00B2`   | Number           | SYLLABLE_MODIFIER | TOP                        | &#x00B2; Superscript Two       |
|`U+00B3`   | Number           | SYLLABLE_MODIFIER | TOP                        | &#x00B3; Superscript Three     |
|`U+200C`   | Other            | NON_JOINER        | _null_                     | &#x200C; Zero-width non-joiner |
|`U+200D`   | Other            | JOINER            | _null_                     | &#x200D; Zero-width joiner     |
|`U+2010`   | Punctuation      | PLACEHOLDER       | _null_                     | &#x2010; Hyphen                |
|`U+2011`   | Punctuation      | PLACEHOLDER       | _null_                     | &#x2011; No-break hyphen       |
|`U+2012`   | Punctuation      | PLACEHOLDER       | _null_                     | &#x2012; Figure dash           |
|`U+2013`   | Punctuation      | PLACEHOLDER       | _null_                     | &#x2013; En dash               |
|`U+2014`   | Punctuation      | PLACEHOLDER       | _null_                     | &#x2014; Em dash               |
|`U+2074`   | Number           | SYLLABLE_MODIFIER | TOP                        | &#x2074; Superscript Four      |
|`U+2082`   | Number           | SYLLABLE_MODIFIER | TOP                        | &#x2082; Subscript Two       |
|`U+2083`   | Number           | SYLLABLE_MODIFIER | TOP                        | &#x2083; Subscript Three     |
|`U+2084`   | Number           | SYLLABLE_MODIFIER | TOP                        | &#x2084; Subscript Four      |
|`U+25CC`   | Symbol           | DOTTED_CIRCLE     | _null_                     | &#x25CC; Dotted circle         |
:::


The zero-width joiner (<abbr>ZWJ</abbr>) is primarily used to prevent the formation
of a conjunct from a "_Consonant_,Halant,_Consonant_" sequence. The
sequence "_Consonant_,Halant,ZWJ,_Consonant_" blocks the formation of
a conjunct between the two consonants. 

Note, however, that the "_Consonant_,Halant" subsequence in the above
example may still trigger a half-forms feature. To prevent the
application of the half-forms feature in addition to preventing the
conjunct, the zero-width non-joiner (<abbr>ZWNJ</abbr>) must be used instead. The
sequence "_Consonant_,Halant,ZWNJ,_Consonant_" should produce the
first consonant in its standard form, followed by an explicit
"Halant".

A secondary usage of the zero-width joiner is to prevent the formation of
"Reph". An initial "Ra,Halant,ZWJ" sequence should not produce a "Reph",
where an initial "Ra,Halant" sequence without the zero-width joiner
otherwise would.

The no-break space (<abbr>NBSP</abbr>) is primarily used to display those
codepoints that are defined as non-spacing (marks, dependent vowels
(matras), below-base consonant forms, and post-base consonant forms)
in an isolated context, as an alternative to displaying them
superimposed on the dotted-circle placeholder. These sequences will
match "NBSP,ZWJ,Halant,_Consonant_", "NBSP,_mark_", or "NBSP,_matra_".

Tamil text sometimes uses the Latin numerals 2, 3, and 4 in
superscript or subscript positions to annotate Sanskrit. When used in
this fashion, the superscripts and subscripts are treated as
`SYLLABLE_MODIFIER` signs for shaping purposes.


================================================
FILE: character-tables/character-tables-telugu.md
================================================
# Telugu character tables #

This document lists the per-character shaping information needed to
[shape Telugu text](../opentype-shaping-telugu.md).

**Contents**

  - [Telugu character table](#telugu-character-table)
  - [Vedic Extensions character table](#vedic-extensions-character-table)
  - [Miscellaneous character table](#miscellaneous-character-table)
	  

## Telugu character table ##

Telugu glyphs should be classified as in the following
table. Codepoints in the Telugu block with no assigned meaning are
designated as _unassigned_ in the _Unicode category_ column. 

Assigned codepoints with a _null_ in the _Shaping class_
column evoke no special behavior from the shaping engine. Note that
this does include some valid codepoints, such as currency marks,
punctuation, and other symbols.

> Note: the `NUMBER` and `SYMBOL` _Shaping classes_ are important
> during syllable identification, but generally evoke no further
> special behavior during the rest of the shaping process.

The _Mark-placement subclass_ column indicates mark-placement
positioning for codepoints in the _Mark_ category. Assigned, non-mark
codepoints have a _null_ in this column and evoke no special
mark-placement behavior. Marks tagged with [Mn] in the _Unicode
category_ column are categorized as non-spacing; marks tagged with
[Mc] are categorized as spacing-combining.

Some codepoints in the following table use a _Shaping class_ that
differs from the codepoint's Unicode _General Category_. The _Shaping
class_ takes precedence during OpenType shaping, as it captures more
specific, script-aware behavior.


:::{table} Telugu character table

| Codepoint | Unicode category | Shaping class     | Mark-placement subclass    | Glyph                        |
|:----------|:-----------------|:------------------|:---------------------------|:-----------------------------|
|`U+0C00`   | Mark [Mn]        | BINDU             | TOP_POSITION               | &#x0C00; Combining Candrabindu Above |
|`U+0C01`   | Mark [Mc]        | BINDU             | RIGHT_POSITION             | &#x0C01; Candrabindu         |
|`U+0C02`   | Mark [Mc]        | BINDU             | RIGHT_POSITION             | &#x0C02; Anusvara            |
|`U+0C03`   | Mark [Mc]        | VISARGA           | RIGHT_POSITION             | &#x0C03; Visarga             |
|`U+0C04`   | Mark [Mn]        | BINDU             | TOP_POSITION               | &#x0C04; Combining Anusvara Above |
|`U+0C05`   | Letter           | VOWEL_INDEPENDENT | _null_                     | &#x0C05; A                   |
|`U+0C06`   | Letter           | VOWEL_INDEPENDENT | _null_                     | &#x0C06; Aa                  |
|`U+0C07`   | Letter           | VOWEL_INDEPENDENT | _null_                     | &#x0C07; I                   |
|`U+0C08`   | Letter           | VOWEL_INDEPENDENT | _null_                     | &#x0C08; Ii                  |
|`U+0C09`   | Letter           | VOWEL_INDEPENDENT | _null_                     | &#x0C09; U                   |
|`U+0C0A`   | Letter           | VOWEL_INDEPENDENT | _null_                     | &#x0C0A; Uu                  |
|`U+0C0B`   | Letter           | VOWEL_INDEPENDENT | _null_                     | &#x0C0B; Vocalic R           |
|`U+0C0C`   | Letter           | VOWEL_INDEPENDENT | _null_                     | &#x0C0C; Vocalic L           |
|`U+0C0D`   | _unassigned_     |                   |                            |                              |
|`U+0C0E`   | Letter           | VOWEL_INDEPENDENT | _null_                     | &#x0C0E; E                   |
|`U+0C0F`   | Letter           | VOWEL_INDEPENDENT | _null_                     | &#x0C0F; Ee                  |
| | | | |																		
|`U+0C10`   | Letter           | VOWEL_INDEPENDENT | _null_                     | &#x0C10; Ai                  |
|`U+0C11`   | _unassigned_     |                   |                            |                              |
|`U+0C12`   | Letter           | VOWEL_INDEPENDENT | _null_                     | &#x0C12; O                   |
|`U+0C13`   | Letter           | VOWEL_INDEPENDENT | _null_                     | &#x0C13; Oo                  |
|`U+0C14`   | Letter           | VOWEL_INDEPENDENT | _null_                     | &#x0C14; Au                  |
|`U+0C15`   | Letter           | CONSONANT         | _null_                     | &#x0C15; Ka                  |
|`U+0C16`   | Letter           | CONSONANT         | _null_                     | &#x0C16; Kha                 |
|`U+0C17`   | Letter           | CONSONANT         | _null_                     | &#x0C17; Ga                  |
|`U+0C18`   | Letter           | CONSONANT         | _null_                     | &#x0C18; Gha                 |
|`U+0C19`   | Letter           | CONSONANT         | _null_                     | &#x0C19; Nga                 |
|`U+0C1A`   | Letter           | CONSONANT         | _null_                     | &#x0C1A; Ca                  |
|`U+0C1B`   | Letter           | CONSONANT         | _null_                     | &#x0C1B; Cha                 |
|`U+0C1C`   | Letter           | CONSONANT         | _null_                     | &#x0C1C; Ja                  |
|`U+0C1D`   | Letter           | CONSONANT         | _null_                     | &#x0C1D; Jha                 |
|`U+0C1E`   | Letter           | CONSONANT         | _null_                     | &#x0C1E; Nya                 |
|`U+0C1F`   | Letter           | CONSONANT         | _null_                     | &#x0C1F; Tta                 |
| | | | |																		
|`U+0C20`   | Letter           | CONSONANT         | _null_                     | &#x0C20; Ttha                |
|`U+0C21`   | Letter           | CONSONANT         | _null_                     | &#x0C21; Dda                 |
|`U+0C22`   | Letter           | CONSONANT         | _null_                     | &#x0C22; Ddha                |
|`U+0C23`   | Letter           | CONSONANT         | _null_                     | &#x0C23; Nna                 |
|`U+0C24`   | Letter           | CONSONANT         | _null_                     | &#x0C24; Ta                  |
|`U+0C25`   | Letter           | CONSONANT         | _null_                     | &#x0C25; Tha                 |
|`U+0C26`   | Letter           | CONSONANT         | _null_                     | &#x0C26; Da                  |
|`U+0C27`   | Letter           | CONSONANT         | _null_                     | &#x0C27; Dha                 |
|`U+0C28`   | Letter           | CONSONANT         | _null_                     | &#x0C28; Na                  |
|`U+0C29`   | _unassigned_     |                   |                            |                              |
|`U+0C2A`   | Letter           | CONSONANT         | _null_                     | &#x0C2A; Pa                  |
|`U+0C2B`   | Letter           | CONSONANT         | _null_                     | &#x0C2B; Pha                 |
|`U+0C2C`   | Letter           | CONSONANT         | _null_                     | &#x0C2C; Ba                  |
|`U+0C2D`   | Letter           | CONSONANT         | _null_                     | &#x0C2D; Bha                 |
|`U+0C2E`   | Letter           | CONSONANT         | _null_                     | &#x0C2E; Ma                  |
|`U+0C2F`   | Letter           | CONSONANT         | _null_                     | &#x0C2F; Ya                  |
| | | | |																		
|`U+0C30`   | Letter           | CONSONANT         | _null_                     | &#x0C30; Ra                  |
|`U+0C31`   | Letter           | CONSONANT         | _null_                     | &#x0C31; Rra                 |
|`U+0C32`   | Letter           | CONSONANT         | _null_                     | &#x0C32; La                  |
|`U+0C33`   | Letter           | CONSONANT         | _null_                     | &#x0C33; Lla                 |
|`U+0C34`   | Letter           | CONSONANT         | _null_                     | &#x0C34; Llla                |
|`U+0C35`   | Letter           | CONSONANT         | _null_                     | &#x0C35; Va                  |
|`U+0C36`   | Letter           | CONSONANT         | _null_                     | &#x0C36; Sha                 |
|`U+0C37`   | Letter           | CONSONANT         | _null_                     | &#x0C37; Ssa                 |
|`U+0C38`   | Letter           | CONSONANT         | _null_                     | &#x0C38; Sa                  |
|`U+0C39`   | Letter           | CONSONANT         | _null_                     | &#x0C39; Ha                  |
|`U+0C3A`   | _unassigned_     |                   |                            |                              |
|`U+0C3B`   | _unassigned_     |                   |                            |                              |
|`U+0C3C`   | Mark [Mn]        | NUKTA             | BOTTOM_POSITION            | &#x0C3C; Nukta               |
|`U+0C3D`   | Letter           | AVAGRAHA          | _null_                     | &#x0C3D; Avagraha            |
|`U+0C3E`   | Mark [Mn]        | VOWEL_DEPENDENT   | TOP_POSITION               | &#x0C3E; Sign Aa             |
|`U+0C3F`   | Mark [Mn]        | VOWEL_DEPENDENT   | TOP_POSITION               | &#x0C3F; Sign I              |
| | | | |																		
|`U+0C40`   | Mark [Mn]        | VOWEL_DEPENDENT   | TOP_POSITION               | &#x0C40; Sign Ii             |
|`U+0C41`   | Mark [Mc]        | VOWEL_DEPENDENT   | RIGHT_POSITION             | &#x0C41; Sign U              |
|`U+0C42`   | Mark [Mc]        | VOWEL_DEPENDENT   | RIGHT_POSITION             | &#x0C42; Sign Uu             |
|`U+0C43`   | Mark [Mc]        | VOWEL_DEPENDENT   | RIGHT_POSITION             | &#x0C43; Sign Vocalic R      |
|`U+0C44`   | Mark [Mc]        | VOWEL_DEPENDENT   | RIGHT_POSITION             | &#x0C44; Sign Vocalic Rr     |
|`U+0C45`   | _unassigned_     |                   |                            |                              |
|`U+0C46`   | Mark [Mn]        | VOWEL_DEPENDENT   | TOP_POSITION               | &#x0C46; Sign E              |
|`U+0C47`   | Mark [Mn]        | VOWEL_DEPENDENT   | TOP_POSITION               | &#x0C47; Sign Ee             |
|`U+0C48`   | Mark [Mn]        | VOWEL_DEPENDENT   | TOP_AND_BOTTOM_POSITION    | &#x0C48; Sign Ai             |
|`U+0C49`   | _unassigned_     |                   |                            |                              |
|`U+0C4A`   | Mark [Mn]        | VOWEL_DEPENDENT   | TOP_POSITION               | &#x0C4A; Sign O              |
|`U+0C4B`   | Mark [Mn]        | VOWEL_DEPENDENT   | TOP_POSITION               | &#x0C4B; Sign Oo             |
|`U+0C4C`   | Mark [Mn]        | VOWEL_DEPENDENT   | TOP_POSITION               | &#x0C4C; Sign Au             |
|`U+0C4D`   | Mark [Mn]        | VIRAMA            | TOP_POSITION               | &#x0C4D; Virama              |
|`U+0C4E`   | _unassigned_     |                   |                            |                              |
|`U+0C4F`   | _unassigned_     |                   |                            |                              |
| | | | |																		
|`U+0C50`   | _unassigned_     |                   |                            |                              |
|`U+0C51`   | _unassigned_     |                   |                            |                              |
|`U+0C52`   | _unassigned_     |                   |                            |                              |
|`U+0C53`   | _unassigned_     |                   |                            |                              |
|`U+0C54`   | _unassigned_     |                   |                            |                              |
|`U+0C55`   | Mark [Mn]        | VOWEL_DEPENDENT   | TOP_POSITION               | &#x0C55; Length Mark         |
|`U+0C56`   | Mark [Mn]        | VOWEL_DEPENDENT   | BOTTOM_POSITION            | &#x0C56; Ai Length Mark      |
|`U+0C57`   | _unassigned_     |                   |                            |                              |
|`U+0C58`   | Letter           | CONSONANT         | _null_                     | &#x0C58; Tsa                 |
|`U+0C59`   | Letter           | CONSONANT         | _null_                     | &#x0C59; Dza                 |
|`U+0C5A`   | Letter           | CONSONANT         | _null_                     | &#x0C5A; Rrra                |
|`U+0C5B`   | _unassigned_     |                   |                            |                              |
|`U+0C5C`   | _unassigned_     |                   |                            |                              |
|`U+0C5D`   | Letter           | CONSONANT_DEAD    | _null_                     | &#x0C5D; Nakaara Pollu       |
|`U+0C5E`   | _unassigned_     |                   |                            |                              |
|`U+0C5F`   | _unassigned_     |                   |                            |                              |
| | | | |																		
|`U+0C60`   | Letter           | VOWEL_INDEPENDENT | _null_                     | &#x0C60; Vocalic Rr          |
|`U+0C61`   | Letter           | VOWEL_INDEPENDENT | _null_                     | &#x0C61; Vocalic Ll          |
|`U+0C62`   | Mark [Mn]        | VOWEL_DEPENDENT   | BOTTOM_POSITION            | &#x0C62; Sign Vocalic L      |
|`U+0C63`   | Mark [Mn]        | VOWEL_DEPENDENT   | BOTTOM_POSITION            | &#x0C63; Sign Vocalic Ll     |
|`U+0C64`   | _unassigned_     |                   |                            |                              |
|`U+0C65`   | _unassigned_     |                   |                            |                              |
|`U+0C66`   | Number           | NUMBER            | _null_                     | &#x0C66; Digit Zero          |
|`U+0C67`   | Number           | NUMBER            | _null_                     | &#x0C67; Digit One           |
|`U+0C68`   | Number           | NUMBER            | _null_                     | &#x0C68; Digit Two           |
|`U+0C69`   | Number           | NUMBER            | _null_                     | &#x0C69; Digit Three         |
|`U+0C6A`   | Number           | NUMBER            | _null_                     | &#x0C6A; Digit Four          |
|`U+0C6B`   | Number           | NUMBER            | _null_                     | &#x0C6B; Digit Five          |
|`U+0C6C`   | Number           | NUMBER            | _null_                     | &#x0C6C; Digit Six           |
|`U+0C6D`   | Number           | NUMBER            | _null_                     | &#x0C6D; Digit Seven         |
|`U+0C6E`   | Number           | NUMBER            | _null_                     | &#x0C6E; Digit Eight         |
|`U+0C6F`   | Number           | NUMBER            | _null_                     | &#x0C6F; Digit Nine          |
| | | | |																		
|`U+0C70`   | _unassigned_     |                   |                            |                              |
|`U+0C71`   | _unassigned_     |                   |                            |                              |
|`U+0C72`   | _unassigned_     |                   |                            |                              |
|`U+0C73`   | _unassigned_     |                   |                            |                              |
|`U+0C74`   | _unassigned_     |                   |                            |                              |
|`U+0C75`   | _unassigned_     |                   |                            |                              |
|`U+0C76`   | _unassigned_     |                   |                            |                              |
|`U+0C77`   | Punctuation      | _null_            | _null_                     | &#x0C77; Sign Siddham        |
|`U+0C78`   | Number           | NUMBER            | _null_                     | &#x0C78; Fraction Zero Odd P |
|`U+0C79`   | Number           | NUMBER            | _null_                     | &#x0C79; Fraction One Odd P  |
|`U+0C7A`   | Number           | NUMBER            | _null_                     | &#x0C7A; Fraction Two Odd P  |
|`U+0C7B`   | Number           | NUMBER            | _null_                     | &#x0C7B; Fraction Three Odd P|
|`U+0C7C`   | Number           | NUMBER            | _null_                     | &#x0C7C; Fraction One Even P |
|`U+0C7D`   | Number           | NUMBER            | _null_                     | &#x0C7D; Fraction Two Even P |
|`U+0C7E`   | Number           | NUMBER            | _null_                     | &#x0C7E; Fraction Three Even P|
|`U+0C7F`   | Symbol           | SYMBOL            | _null_                     | &#x0C7F; Tuumu               |
:::


## Vedic Extensions character table ##

Sanskrit runs written in the Telugu script may also include
characters from the Vedic Extensions block. These characters should be
classified as follows.

> Note: See the [Vedic Extensions](../opentype-shaping-vedic-extensions.md) 
> document for additional information.


:::{table} Vedic Extensions character table

| Codepoint | Unicode category | Shaping class     | Mark-placement subclass    | Glyph                        |
|:----------|:-----------------|:------------------|:---------------------------|:-----------------------------|
|`U+1CD0`   | Mark [Mn]        | CANTILLATION      | TOP_POSITION               | &#x1CD0; Tone Karshana       |
|`U+1CD1`   | Mark [Mn]        | CANTILLATION      | TOP_POSITION               | &#x1CD1; Tone Shara          |
|`U+1CD2`   | Mark [Mn]        | CANTILLATION      | TOP_POSITION               | &#x1CD2; Tone Prenkha        |
|`U+1CD3`   | Punctuation      | _null_            | _null_                     | &#x1CD3; Sign Nihshvasa      |
|`U+1CD4`   | Mark [Mn]        | CANTILLATION      | OVERSTRUCK                 | &#x1CD4; Tone Midline Svarita |
|`U+1CD5`   | Mark [Mn]        | CANTILLATION      | BOTTOM_POSITION            | &#x1CD5; Tone Aggravated Independent Svarita |
|`U+1CD6`   | Mark [Mn]        | CANTILLATION      | BOTTOM_POSITION            | &#x1CD6; Tone Independent Svarita |
|`U+1CD7`   | Mark [Mn]        | CANTILLATION      | BOTTOM_POSITION            | &#x1CD7; Tone Kathaka Independent Svarita |
|`U+1CD8`   | Mark [Mn]        | CANTILLATION      | BOTTOM_POSITION            | &#x1CD8; Tone Candra Below   |
|`U+1CD9`   | Mark [Mn]        | CANTILLATION      | BOTTOM_POSITION            | &#x1CD9; Tone Kathaka Independent Svarita Schroeder |
|`U+1CDA`   | Mark [Mn]        | CANTILLATION      | TOP_POSITION               | &#x1CDA; Tone Double Svarita |
|`U+1CDB`   | Mark [Mn]        | CANTILLATION      | TOP_POSITION               | &#x1CDB; Tone Triple Svarita |
|`U+1CDC`   | Mark [Mn]        | CANTILLATION      | BOTTOM_POSITION            | &#x1CDC; Tone Kathaka Anudatta |
|`U+1CDD`   | Mark [Mn]        | CANTILLATION      | BOTTOM_POSITION            | &#x1CDD; Tone Dot Below      |
|`U+1CDE`   | Mark [Mn]        | CANTILLATION      | BOTTOM_POSITION            | &#x1CDE; Tone Two Dots Below |
|`U+1CDF`   | Mark [Mn]        | CANTILLATION      | BOTTOM_POSITION            | &#x1CDF; Tone Three Dots Below |
| | | | |																		
|`U+1CE0`   | Mark [Mn]        | CANTILLATION      | TOP_POSITION               | &#x1CE0; Tone Rigvedic Kashmiri Independent Svarita |
|`U+1CE1`   | Mark [Mc]        | CANTILLATION      | RIGHT_POSITION             | &#x1CE1; Tone Atharavedic Independent Svarita |
|`U+1CE2`   | Mark [Mn]        | AVAGRAHA          | OVERSTRUCK                 | &#x1CE2; Sign Visarga Svarita |
|`U+1CE3`   | Mark [Mn]        | _null_            | OVERSTRUCK                 | &#x1CE3; Sign Visarga Udatta |
|`U+1CE4`   | Mark [Mn]        | _null_            | OVERSTRUCK                 | &#x1CE4; Sign Reversed Visarga Udatta |
|`U+1CE5`   | Mark [Mn]        | _null_            | OVERSTRUCK                 | &#x1CE5; Sign Visarga Anudatta |
|`U+1CE6`   | Mark [Mn]        | _null_            | OVERSTRUCK                 | &#x1CE6; Sign Reversed Visarga Anudatta |
|`U+1CE7`   | Mark [Mn]        | _null_            | OVERSTRUCK                 | &#x1CE7; Sign Visarga Udatta With Tail |
|`U+1CE8`   | Mark [Mn]        | AVAGRAHA          | OVERSTRUCK                 | &#x1CE8; Sign Visarga Anudatta With Tail |
|`U+1CE9`   | Letter           | SYMBOL            | _null_                     | &#x1CE9; Sign Anusvara Antargomukha |
|`U+1CEA`   | Letter           | _null_            | _null_                     | &#x1CEA; Sign Anusvara Bahirgomukha |
|`U+1CEB`   | Letter           | _null_            | _null_                     | &#x1CEB; Sign Anusvara Vamagomukha |
|`U+1CEC`   | Letter           | SYMBOL            | _null_                     | &#x1CEC; Sign Anusvara Vamagomukha With Tail |
|`U+1CED`   | Mark [Mn]        | AVAGRAHA          | BOTTOM_POSITION            | &#x1CED; Sign Tiryak         |
|`U+1CEE`   | Letter           | SYMBOL            | _null_                     | &#x1CEE; Sign Hexiform Long Anusvara |
|`U+1CEF`   | Letter           | _null_            | _null_                     | &#x1CEF; Sign Long Anusvara  |
| | | | |																		
|`U+1CF0`   | Letter           | _null_            | _null_                     | &#x1CF0; Sign Rthang Long Anusvara |
|`U+1CF1`   | Letter           | SYMBOL            | _null_                     | &#x1CF1; Sign Anusvara Ubhayato Mukha |
|`U+1CF2`   | Letter           | CONSONANT_DEAD    | _null_                     | &#x1CF2; Sign Ardhavisarga   |
|`U+1CF3`   | Letter           | CONSONANT_DEAD    | _null_                     | &#x1CF3; Sign Rotated Ardhavisarga |
|`U+1CF4`   | Mark [Mn]        | CANTILLATION      | TOP_POSITION               | &#x1CF4; Tone Candra Above   |
|`U+1CF5`   | Letter           | CONSONANT_WITH_STACKER | _null_                | &#x1CF5; Sign Jihvamuliya    |
|`U+1CF6`   | Letter           | CONSONANT_WITH_STACKER | _null_                | &#x1CF6; Sign Upadhmaniya    |
|`U+1CF7`   | Mark [Mc]        | _null_            | _null_                     | &#x1CF7; Sign Atikrama       |
|`U+1CF8`   | Mark [Mn]        | CANTILLATION      | _null_                     | &#x1CF8; Tone Ring Above     |
|`U+1CF9`   | Mark [Mn]        | CANTILLATION      | _null_                     | &#x1CF9; Tone Double Ring Above |
|`U+1CFA`   | Letter           | PLACEHOLDER       | _null_                     | &#x1CFA; Sign Double Anusvara Antargomukha |
|`U+1CFB`   | _unassigned_     |                   |                            |                              |
|`U+1CFC`   | _unassigned_     |                   |                            |                              |
|`U+1CFD`   | _unassigned_     |                   |                            |                              |
|`U+1CFE`   | _unassigned_     |                   |                            |                              |
|`U+1CFF`   | _unassigned_     |                   |                            |                              |
:::


## Miscellaneous character table ##

In addition to general punctuation, runs of Telugu text often use the
danda (`U+0964`) and double danda (`U+0965`) punctuation marks from
the Devanagari block. Telugu text can also incorporate the udatta
(`U+0951`) and anudatta (`U+0952`) signs from the Devanagari block.


:::{table} Additional punctuation character table

| Codepoint | Unicode category | Shaping class     | Mark-placement subclass    | Glyph                          |
|:----------|:-----------------|:------------------|:---------------------------|:-------------------------------|
|`U+0951`   | Mark [Mn]        | CANTILLATION      | TOP_POSITION               | &#x0951; Udatta              |
|`U+0952`   | Mark [Mn]        | CANTILLATION      | BOTTOM_POSITION            | &#x0952; Anudatta            |
|`U+0964`   | Punctuation      | _null_            | _null_                     | &#x0964; Danda               |
|`U+0965`   | Punctuation      | _null_            | _null_                     | &#x0965; Double Danda        |
:::


Other important characters that may be encountered when shaping runs
of Telugu text include the dotted-circle placeholder (`U+25CC`), the
zero-width joiner (`U+200D`) and zero-width non-joiner (`U+200C`), and
the no-break space (`U+00A0`).

The dotted-circle placeholder is frequently used when displaying a
dependent vowel (matra) or a combining mark in isolation. Real-world
text syllables may also use other characters, such as hyphens or dashes,
in a similar placeholder fashion; shaping engines should cope with
this situation gracefully.


:::{table} Miscellaneous character table

| Codepoint | Unicode category | Shaping class     | Mark-placement subclass    | Glyph                          |
|:----------|:-----------------|:------------------|:---------------------------|:-------------------------------|
|`U+00A0`   | Separator        | PLACEHOLDER       | _null_                     | &#x00A0; No-break space        |
|`U+200C`   | Other            | NON_JOINER        | _null_                     | &#x200C; Zero-width non-joiner |
|`U+200D`   | Other            | JOINER            | _null_                     | &#x200D; Zero-width joiner     |
|`U+2010`   | Punctuation      | PLACEHOLDER       | _null_                     | &#x2010; Hyphen                |
|`U+2011`   | Punctuation      | PLACEHOLDER       | _null_                     | &#x2011; No-break hyphen       |
|`U+2012`   | Punctuation      | PLACEHOLDER       | _null_                     | &#x2012; Figure dash           |
|`U+2013`   | Punctuation      | PLACEHOLDER       | _null_                     | &#x2013; En dash               |
|`U+2014`   | Punctuation      | PLACEHOLDER       | _null_                     | &#x2014; Em dash               |
|`U+25CC`   | Symbol           | DOTTED_CIRCLE     | _null_                     | &#x25CC; Dotted circle         |
:::


The zero-width joiner (<abbr>ZWJ</abbr>) is primarily used to prevent the formation
of a conjunct from a "_Consonant_,Halant,_Consonant_" sequence. The
sequence "_Consonant_,Halant,ZWJ,_Consonant_" blocks the formation of
a conjunct between the two consonants. 

Note, however, that the "_Consonant_,Halant" subsequence in the above
example may still trigger a half-forms feature. To prevent the
application of the half-forms feature in addition to preventing the
conjunct, the zero-width non-joiner (<abbr>ZWNJ</abbr>) must be used instead. The
sequence "_Consonant_,Halant,ZWNJ,_Consonant_" should produce the
first consonant in its standard form, followed by an explicit
"Halant".

A secondary usage of the zero-width joiner is to prevent the formation of
"Reph". An initial "Ra,Halant,ZWJ" sequence should not produce a "Reph",
where an initial "Ra,Halant" sequence without the zero-width joiner
otherwise would.

The no-break space (<abbr>NBSP</abbr>) is primarily used to display those
codepoints that are defined as non-spacing (marks, dependent vowels
(matras), below-base consonant forms, and post-base consonant forms)
in an isolated context, as an alternative to displaying them
superimposed on the dotted-circle placeholder. These sequences will
match "NBSP,ZWJ,Halant,_Consonant_", "NBSP,_mark_", or "NBSP,_matra_".


================================================
FILE: character-tables/character-tables-thai.md
================================================
# Thai character tables #

This document lists the per-character shaping information needed to
[shape Thai text](../opentype-shaping-thai-lao.md#the-thailao-shaping-model).

**Contents**

  - [Thai character table](#thai-character-table)
  - [Miscellaneous character table](#miscellaneous-character-table)


## Thai character table ##

Thai glyphs should be classified as in the following
table. Codepoints in the Thai block with no assigned meaning are
designated as _unassigned_ in the _Unicode category_ column.

Assigned codepoints with a _null_ in the _Shaping class_
column evoke no special behavior from the shaping engine. Note that
this does include some valid codepoints, such as currency marks,
punctuation, and other symbols.

> Note: the `NUMBER` and `SYMBOL` _Shaping classes_ are important
> during syllable identification, but generally evoke no further
> special behavior during the rest of the shaping process.

The _Mark-placement subclass_ column indicates mark-placement
positioning for codepoints in the _Mark_ category. Assigned, non-mark
codepoints have a _null_ in this column and evoke no special
mark-placement behavior. Marks tagged with [Mn] in the _Unicode
category_ column are categorized as non-spacing; marks tagged with
[Mc] are categorized as spacing-combining.

Some codepoints in the following table use a _Shaping class_ that
differs from the codepoint's Unicode _General Category_. The _Shaping
class_ takes precedence during OpenType shaping, as it captures more
specific, script-aware behavior.


:::{table} Thai character table

| Codepoint | Unicode category | Shaping class     | Mark-placement subclass | Combining class | PUA    | Glyph                         |
|:----------|:-----------------|:------------------|:------------------------|:----------------|:-------|:------------------------------|
|`U+0E00`   | _unassigned_     |                   |                         |                 |        |                               |
|`U+0E01`   | Letter           | CONSONANT         | _null_                  | _0_             | NC     | &#x0E01; Ko Kai               |
|`U+0E02`   | Letter           | CONSONANT         | _null_                  | _0_             | NC     | &#x0E02; Kho Khai             |
|`U+0E03`   | Letter           | CONSONANT         | _null_                  | _0_             | NC     | &#x0E03; Kho Khuat            |
|`U+0E04`   | Letter           | CONSONANT         | _null_                  | _0_             | NC     | &#x0E04; Kho Khwai            |
|`U+0E05`   | Letter           | CONSONANT         | _null_                  | _0_             | NC     | &#x0E05; Kho Khon             |
|`U+0E06`   | Letter           | CONSONANT         | _null_                  | _0_             | NC     | &#x0E06; Kho Rakhang          |
|`U+0E07`   | Letter           | CONSONANT         | _null_                  | _0_             | NC     | &#x0E07; Ngo Ngu              |
|`U+0E08`   | Letter           | CONSONANT         | _null_                  | _0_             | NC     | &#x0E08; Cho Chan             |
|`U+0E09`   | Letter           | CONSONANT         | _null_                  | _0_             | NC     | &#x0E09; Cho Ching            |
|`U+0E0A`   | Letter           | CONSONANT         | _null_                  | _0_             | NC     | &#x0E0A; Cho Chang            |
|`U+0E0B`   | Letter           | CONSONANT         | _null_                  | _0_             | NC     | &#x0E0B; So So                |
|`U+0E0C`   | Letter           | CONSONANT         | _null_                  | _0_             | NC     | &#x0E0C; Cho Choe             |
|`U+0E0D`   | Letter           | CONSONANT         | _null_                  | _0_             | RC     | &#x0E0D; Yo Ying              |
|`U+0E0E`   | Letter           | CONSONANT         | _null_                  | _0_             | DC     | &#x0E0E; Do Chada             |
|`U+0E0F`   | Letter           | CONSONANT         | _null_                  | _0_             | DC     | &#x0E0F; To Patak             |
| | | | | | | |   
|`U+0E10`   | Letter           | CONSONANT         | _null_                  | _0_             | RC     | &#x0E10; Tho Than             |
|`U+0E11`   | Letter           | CONSONANT         | _null_                  | _0_             | NC     | &#x0E11; Tho Nangmontho       |
|`U+0E12`   | Letter           | CONSONANT         | _null_                  | _0_             | NC     | &#x0E12; Tho Phuthao          |
|`U+0E13`   | Letter           | CONSONANT         | _null_                  | _0_             | NC     | &#x0E13; No Nen               |
|`U+0E14`   | Letter           | CONSONANT         | _null_                  | _0_             | NC     | &#x0E14; Do Dek               |
|`U+0E15`   | Letter           | CONSONANT         | _null_                  | _0_             | NC     | &#x0E15; To Tao               |
|`U+0E16`   | Letter           | CONSONANT         | _null_                  | _0_             | NC     | &#x0E16; Tho Thung            |
|`U+0E17`   | Letter           | CONSONANT         | _null_                  | _0_             | NC     | &#x0E17; Tho Thahan           |
|`U+0E18`   | Letter           | CONSONANT         | _null_                  | _0_             | NC     | &#x0E18; Tho Thong            |
|`U+0E19`   | Letter           | CONSONANT         | _null_                  | _0_             | NC     | &#x0E19; No Nu                |
|`U+0E1A`   | Letter           | CONSONANT         | _null_                  | _0_             | NC     | &#x0E1A; Bo Baimai            |
|`U+0E1B`   | Letter           | CONSONANT         | _null_                  | _0_             | AC     | &#x0E1B; Po Pla               |
|`U+0E1C`   | Letter           | CONSONANT         | _null_                  | _0_             | NC     | &#x0E1C; Pho Phung            |
|`U+0E1D`   | Letter           | CONSONANT         | _null_                  | _0_             | AC     | &#x0E1D; Fo Fa                |
|`U+0E1E`   | Letter           | CONSONANT         | _null_                  | _0_             | NC     | &#x0E1E; Pho Phan             |
|`U+0E1F`   | Letter           | CONSONANT         | _null_                  | _0_             | AC     | &#X0e1f; Fo Fan               |
| | | | | | | |   
|`U+0E20`   | Letter           | CONSONANT         | _null_                  | _0_             | NC     | &#X0e20; Pho Samphao          |
|`U+0E21`   | Letter           | CONSONANT         | _null_                  | _0_             | NC     | &#x0E21; Mo Ma                |
|`U+0E22`   | Letter           | CONSONANT         | _null_                  | _0_             | NC     | &#x0E22; Yo Yak               |
|`U+0E23`   | Letter           | CONSONANT         | _null_                  | _0_             | NC     | &#x0E23; Ro Rua               |
|`U+0E24`   | Letter           | CONSONANT         | _null_                  | _0_             | NC     | &#x0E24; Ru                   |
|`U+0E25`   | Letter           | CONSONANT         | _null_                  | _0_             | NC     | &#x0E25; Lo Ling              |
|`U+0E26`   | Letter           | CONSONANT         | _null_                  | _0_             | NC     | &#x0E26; Lu                   |
|`U+0E27`   | Letter           | CONSONANT         | _null_                  | _0_             | NC     | &#x0E27; Wo Waen              |
|`U+0E28`   | Letter           | CONSONANT         | _null_                  | _0_             | NC     | &#x0E28; So Sala              |
|`U+0E29`   | Letter           | CONSONANT         | _null_                  | _0_             | NC     | &#x0E29; So Rusi              |
|`U+0E2A`   | Letter           | CONSONANT         | _null_                  | _0_             | NC     | &#x0E2A; So Sua               |
|`U+0E2B`   | Letter           | CONSONANT         | _null_                  | _0_             | NC     | &#x0E2B; Ho Hip               |
|`U+0E2C`   | Letter           | CONSONANT         | _null_                  | _0_             | NC     | &#x0E2C; Lo Chula             |
|`U+0E2D`   | Letter           | CONSONANT         | _null_                  | _0_             | NC     | &#x0E2D; O Ang                |
|`U+0E2E`   | Letter           | CONSONANT         | _null_                  | _0_             | NC     | &#x0E2E; Ho Nokhuk            |
|`U+0E2F`   | Letter           | CONSONANT         | _null_                  | _0_             | _null_ | &#x0E2F; Paiyannoi            |
| | | | | | | |
|`U+0E30`   | Letter           | VOWEL_DEPENDENT   | RIGHT_POSITION          | _0_             | CV     | &#x0E30; Sara A               |
|`U+0E31`   | Mark [Mn]        | VOWEL_DEPENDENT   | TOP_POSITION            | _0_             | AV     | &#x0E31; Mai Han-akat         |
|`U+0E32`   | Letter           | VOWEL_DEPENDENT   | RIGHT_POSITION          | _0_             | CV     | &#x0E32; Sara Aa              |
|`U+0E33`   | Letter           | VOWEL_DEPENDENT   | RIGHT_POSITION          | _0_             | _null_ | &#x0E33; Sara Am              |
|`U+0E34`   | Mark [Mn]        | VOWEL_DEPENDENT   | TOP_POSITION            | _0_             | AV     | &#x0E34; Sara I               |
|`U+0E35`   | Mark [Mn]        | VOWEL_DEPENDENT   | TOP_POSITION            | _0_             | AV     | &#x0E35; Sara Ii              |
|`U+0E36`   | Mark [Mn]        | VOWEL_DEPENDENT   | TOP_POSITION            | _0_             | AV     | &#x0E36; Sara Ue              |
|`U+0E37`   | Mark [Mn]        | VOWEL_DEPENDENT   | TOP_POSITION            | _0_             | AV     | &#x0E37; Sara Uee             |
|`U+0E38`   | Mark [Mn]        | VOWEL_DEPENDENT   | BOTTOM_POSITION         | 3               | BV     | &#x0E38; Sara U               |
|`U+0E39`   | Mark [Mn]        | VOWEL_DEPENDENT   | BOTTOM_POSITION         | 3               | BV     | &#x0E39; Sara Uu              |
|`U+0E3A`   | Mark [Mn]        | PURE_KILLER       | BOTTOM_POSITION         | 9               | BV     | &#x0E3A; Phinthu              |
|`U+0E3B`   | _unassigned_     |                   |                         |                 |        |                               |
|`U+0E3C`   | _unassigned_     |                   |                         |                 |        |                               |
|`U+0E3D`   | _unassigned_     |                   |                         |                 |        |                               |
|`U+0E3E`   | _unassigned_     |                   |                         |                 |        |                               |
|`U+0E3F`   | Symbol           | SYMBOL            | _null_                  | _0_             | _null_ | &#x0E3F; Currency symbol Baht |
| | | | | | | |
|`U+0E40`   | Letter           | VOWEL_DEPENDENT   | VISUAL_ORDER_LEFT       | _0_             | CV     | &#x0E40; Sara E               |
|`U+0E41`   | Letter           | VOWEL_DEPENDENT   | VISUAL_ORDER_LEFT       | _0_             | CV     | &#x0E41; Sara Ae              |
|`U+0E42`   | Letter           | VOWEL_DEPENDENT   | VISUAL_ORDER_LEFT       | _0_             | CV     | &#x0E42; Sara O               |
|`U+0E43`   | Letter           | VOWEL_DEPENDENT   | VISUAL_ORDER_LEFT       | _0_             | CV     | &#x0E43; Sara Ai Maimuan      |
|`U+0E44`   | Letter           | VOWEL_DEPENDENT   | VISUAL_ORDER_LEFT       | _0_             | CV     | &#x0E44; Sara Ai Maimalai     |
|`U+0E45`   | Letter           | VOWEL_DEPENDENT   | RIGHT_POSITION          | _0_             | CV     | &#x0E45; Lakkhangyao          |
|`U+0E46`   | Letter Modifier  | _null_            | _null_                  | _0_             | _null_ | &#x0E46; Maiyamok             |
|`U+0E47`   | Mark [Mn]        | VOWEL_DEPENDENT   | TOP_POSITION            | _0_             | AV     | &#x0E47; Maitaikhu            |
|`U+0E48`   | Mark [Mn]        | TONE_MARKER       | TOP_POSITION            | 107             | TV     | &#x0E48; Mai Ek               |
|`U+0E49`   | Mark [Mn]        | TONE_MARKER       | TOP_POSITION            | 107             | TV     | &#x0E49; Mai Tho              |
|`U+0E4A`   | Mark [Mn]        | TONE_MARKER       | TOP_POSITION            | 107             | TV     | &#x0E4A; Mai Tri              |
|`U+0E4B`   | Mark [Mn]        | TONE_MARKER       | TOP_POSITION            | 107             | TV     | &#x0E4B; Mai Chattawa         |
|`U+0E4C`   | Mark [Mn]        | CONSONANT_KILLER  | TOP_POSITION            | _0_             | TV     | &#x0E4C; Thanthakhat          |
|`U+0E4D`   | Mark [Mn]        | BINDU             | TOP_POSITION            | _0_             | AV     | &#x0E4D; Nikhahit             |
|`U+0E4E`   | Mark [Mn]        | PURE_KILLER       | TOP_POSITION            | _0_             | AV     | &#x0E4E; Yamakkan             |
|`U+0E4F`   | Punctuation      | _null_            | _null_                  | _0_             | _null_ | &#x0E4F; Fongman              |
| | | | | | | |
|`U+0E50`   | Number           | NUMBER            | _null_                  | _0_             | _null_ | &#x0E50; Digit zero           |
|`U+0E51`   | Number           | NUMBER            | _null_                  | _0_             | _null_ | &#x0E51; Digit one            |
|`U+0E52`   | Number           | NUMBER            | _null_                  | _0_             | _null_ | &#x0E52; Digit two            |
|`U+0E53`   | Number           | NUMBER            | _null_                  | _0_             | _null_ | &#x0E53; Digit three          |
|`U+0E54`   | Number           | NUMBER            | _null_                  | _0_             | _null_ | &#x0E54; Digit four           |
|`U+0E55`   | Number           | NUMBER            | _null_                  | _0_             | _null_ | &#x0E55; Digit five           |
|`U+0E56`   | Number           | NUMBER            | _null_                  | _0_             | _null_ | &#x0E56; Digit six            |
|`U+0E57`   | Number           | NUMBER            | _null_                  | _0_             | _null_ | &#x0E57; Digit seven          |
|`U+0E58`   | Number           | NUMBER            | _null_                  | _0_             | _null_ | &#x0E58; Digit eight          |
|`U+0E59`   | Number           | NUMBER            | _null_                  | _0_             | _null_ | &#x0E59; Digit nine           |
|`U+0E5A`   | Punctuation      | _null_            | _null_                  | _0_             | _null_ | &#x0E5A; Angkhankhu           |
|`U+0E5B`   | Punctuation      | _null_            | _null_                  | _0_             | _null_ | &#x0E5B; Khomut               |
|`U+0E5C`   | _unassigned_     |                   |                         |                 |        |                               |
|`U+0E5D`   | _unassigned_     |                   |                         |                 |        |                               |
|`U+0E5E`   | _unassigned_     |                   |                         |                 |        |                               |
|`U+0E5F`   | _unassigned_     |                   |                         |                 |        |                               |
| | | | | | | |
|`U+0E60`   | _unassigned_     |                   |                         |                 |        |                               |
|`U+0E61`   | _unassigned_     |                   |                         |                 |        |                               |
|`U+0E62`   | _unassigned_     |                   |                         |                 |        |                               |
|`U+0E63`   | _unassigned_     |                   |                         |                 |        |                               |
|`U+0E64`   | _unassigned_     |                   |                         |                 |        |                               |
|`U+0E65`   | _unassigned_     |                   |                         |                 |        |                               |
|`U+0E66`   | _unassigned_     |                   |                         |                 |        |                               |
|`U+0E67`   | _unassigned_     |                   |                         |                 |        |                               |
|`U+0E68`   | _unassigned_     |                   |                         |                 |        |                               |
|`U+0E69`   | _unassigned_     |                   |                         |                 |        |                               |
|`U+0E6A`   | _unassigned_     |                   |                         |                 |        |                               |
|`U+0E6B`   | _unassigned_     |                   |                         |                 |        |                               |
|`U+0E6C`   | _unassigned_     |                   |                         |                 |        |                               |
|`U+0E6D`   | _unassigned_     |                   |                         |                 |        |                               |
|`U+0E6E`   | _unassigned_     |                   |                         |                 |        |                               |
|`U+0E6F`   | _unassigned_     |                   |                         |                 |        |                               |
| | | | | | | |
|`U+0E70`   | _unassigned_     |                   |                         |                 |        |                               |
|`U+0E71`   | _unassigned_     |                   |                         |                 |        |                               |
|`U+0E72`   | _unassigned_     |                   |                         |                 |        |                               |
|`U+0E73`   | _unassigned_     |                   |                         |                 |        |                               |
|`U+0E74`   | _unassigned_     |                   |                         |                 |        |                               |
|`U+0E75`   | _unassigned_     |                   |                         |                 |        |                               |
|`U+0E76`   | _unassigned_     |                   |                         |                 |        |                               |
|`U+0E77`   | _unassigned_     |                   |                         |                 |        |                               |
|`U+0E78`   | _unassigned_     |                   |                         |                 |        |                               |
|`U+0E79`   | _unassigned_     |                   |                         |                 |        |                               |
|`U+0E7A`   | _unassigned_     |                   |                         |                 |        |                               |
|`U+0E7B`   | _unassigned_     |                   |                         |                 |        |                               |
|`U+0E7C`   | _unassigned_     |                   |                         |                 |        |                               |
|`U+0E7D`   | _unassigned_     |                   |                         |                 |        |                               |
|`U+0E7E`   | _unassigned_     |                   |                         |                 |        |                               |
|`U+0E7F`   | _unassigned_     |                   |                         |                 |        |                               |
:::


## Miscellaneous character table ##

In addition to general punctuation, runs of Thai text often use the
combining macron below (`U+0331 `), combining tilde (`U+0303`), modifier letter
apostrophe (`U+02BC`), and modifier letter minus sign (`U+02D7`), from the
Combining Diacritical Marks block, particularly when used to write minority
languages.

In addition, Thai text typically does not insert spaces between words.
Consequently, the Zero-Width Space (`U+200B`) character is often used to insert
invisible break points that may be converted to line breaks.


:::{table} Additional punctuation character table

| Codepoint | Unicode category | Shaping class     | Mark-placement subclass    | Glyph                          |
|:----------|:-----------------|:------------------|:---------------------------|:-------------------------------|
|`U+02BC`   | Mark [Mn]        | TONE_MARKER       | TOP_POSITION               | &#x02BC; Modifier apostrophe   |
|`U+02D7`   | Mark [Mn]        | TONE_MARKER       | BOTTOM_POSITION            | &#x02D7; Modifier minus sign   |
|`U+0303`   | Mark [Mn]        | TONE_MARKER       | TOP_POSITION               | &#x0303; Combining tilde       |
|`U+0331`   | Mark [Mn]        | TONE_MARKER       | TOP_POSITION               | &#x0331; Combining macron below|
|`U+200B`   | Separator        | PLACEHOLDER       | _null_                     | &#x200B; Zero-width space      |
:::


Other important characters that may be encountered when shaping runs
of Thai text include the dotted-circle placeholder (`U+25CC`), the
zero-width joiner (`U+200D`) and zero-width non-joiner (`U+200C`), and
the no-break space (`U+00A0`).

The dotted-circle placeholder is frequently used when displaying a
dependent vowel or a combining mark in isolation. Real-world
text syllables may also use other characters, such as hyphens or dashes,
in a similar placeholder fashion; shaping engines should cope with
this situation gracefully.


:::{table} Miscellaneous character table

| Codepoint | Unicode category | Shaping class     | Mark-placement subclass    | Glyph                          |
|:----------|:-----------------|:------------------|:---------------------------|:-------------------------------|
|`U+00A0`   | Separator        | PLACEHOLDER       | _null_                     | &#x00A0; No-break space        |
|`U+200C`   | Other            | NON_JOINER        | _null_                     | &#x200C; Zero-width non-joiner |
|`U+200D`   | Other            | JOINER            | _null_                     | &#x200D; Zero-width joiner     |
|`U+2010`   | Punctuation      | PLACEHOLDER       | _null_                     | &#x2010; Hyphen                |
|`U+2011`   | Punctuation      | PLACEHOLDER       | _null_                     | &#x2011; No-break hyphen       |
|`U+2012`   | Punctuation      | PLACEHOLDER       | _null_                     | &#x2012; Figure dash           |
|`U+2013`   | Punctuation      | PLACEHOLDER       | _null_                     | &#x2013; En dash               |
|`U+2014`   | Punctuation      | PLACEHOLDER       | _null_                     | &#x2014; Em dash               |
|`U+25CC`   | Symbol           | DOTTED_CIRCLE     | _null_                     | &#x25CC; Dotted circle         |
:::


================================================
FILE: character-tables/character-tables-tibetan.md
================================================
# Tibetan character tables #

This document lists the per-character shaping information needed to
[shape Tibetan text](../opentype-shaping-tibetan.md).

**Contents**

  - [Tibetan character table](#tibetan-character-table)
  - [Miscellaneous character table](#miscellaneous-character-table)


## Tibetan character table ##

Tibetan glyphs should be classified as in the following
table. Codepoints in the Tibetan block with no assigned meaning are
designated as _unassigned_ in the _Unicode category_ column.

Assigned codepoints with a _null_ in the _Shaping class_
column evoke no special behavior from the shaping engine. Note that
this does include some valid codepoints, such as currency marks,
punctuation, and other symbols.

> Note: the `NUMBER` and `SYMBOL` _Shaping classes_ are important
> during syllable identification, but generally evoke no further
> special behavior during the rest of the shaping process.

The _Mark-placement subclass_ column indicates mark-placement
positioning for codepoints in the _Mark_ category. Assigned, non-mark
codepoints have a _null_ in this column and evoke no special
mark-placement behavior. Marks tagged with [Mn] in the _Unicode
category_ column are categorized as non-spacing; marks tagged with
[Mc] are categorized as spacing-combining.

Some codepoints in the following table use a _Shaping class_ that
differs from the codepoint's Unicode _General Category_. The _Shaping
class_ takes precedence during OpenType shaping, as it captures more
specific, script-aware behavior.


:::{table} Tibetan character table

| Codepoint | Unicode category | Shaping class     | Mark-placement subclass    | Glyph                                            |
|:----------|:-----------------|:------------------|:---------------------------|:-------------------------------------------------|
| `U+0F00`  | Letter           | _null_            | _null_                     | &#x0F00; Syllable Om                             |
| `U+0F01`  | Symbol           | SYMBOL            | _null_                     | &#x0F01; Gter Yig Mgo Truncated A                |
| `U+0F02`  | Symbol           | SYMBOL            | _null_                     | &#x0F02; Gter Yig Mgo -Um Rnam Bcad Ma           |
| `U+0F03`  | Symbol           | SYMBOL            | _null_                     | &#x0F03; Gter Yig Mgo -Um Gter Tsheg Ma          |
| `U+0F04`  | Punctuation      | _null_            | _null_                     | &#x0F04; Initial Yig Mgo Mdun Ma                 |
| `U+0F05`  | Punctuation      | _null_            | _null_                     | &#x0F05; Closing Yig Mgo Sgab Ma                 |
| `U+0F06`  | Punctuation      | _null_            | _null_                     | &#x0F06; Caret Yig Mgo Phur Shad Ma              |
| `U+0F07`  | Punctuation      | _null_            | _null_                     | &#x0F07; Yig Mgo Tsheg Shad Ma                   |
| `U+0F08`  | Punctuation      | _null_            | _null_                     | &#x0F08; Sbrul Shad                              |
| `U+0F09`  | Punctuation      | _null_            | _null_                     | &#x0F09; Bskur Yig Mgo                           |
| `U+0F0A`  | Punctuation      | _null_            | _null_                     | &#x0F0A; Bka- Shog Yig Mgo                       |
| `U+0F0B`  | Punctuation      | _null_            | _null_                     | &#x0F0B; Intersyllabic Tsheg                     |
| `U+0F0C`  | Punctuation      | _null_            | _null_                     | &#x0F0C; Delimiter Tsheg Bstar                   |
| `U+0F0D`  | Punctuation      | _null_            | _null_                     | &#x0F0D; Shad                                    |
| `U+0F0E`  | Punctuation      | _null_            | _null_                     | &#x0F0E; Nyis Shad                               |
| `U+0F0F`  | Punctuation      | _null_            | _null_                     | &#x0F0F; Tsheg Shad                              |
| | | | | |
| `U+0F10`  | Punctuation      | _null_            | _null_                     | &#x0F10; Nyis Tsheg Shad                         |
| `U+0F11`  | Punctuation      | _null_            | _null_                     | &#x0F11; Rin Chen Spungs Shad                    |
| `U+0F12`  | Punctuation      | _null_            | _null_                     | &#x0F12; Rgya Gram Shad                          |
| `U+0F13`  | Symbol           | SYMBOL            | _null_                     | &#x0F13; Caret -Dzud Rtags Me Long Can           |
| `U+0F14`  | Punctuation      | _null_            | _null_                     | &#x0F14; Gter Tsheg                              |
| `U+0F15`  | Symbol           | SYMBOL            | _null_                     | &#x0F15; Logotype Sign Chad Rtags                |
| `U+0F16`  | Symbol           | SYMBOL            | _null_                     | &#x0F16; Logotype Sign Lhag Rtags                |
| `U+0F17`  | Symbol           | SYMBOL            | _null_                     | &#x0F17; Astrological Sign Sgra Gcan -Char Rtags |
| `U+0F18`  | Mark [Mn]        | VOWEL_DEPENDENT   | BOTTOM_POSITION            | &#x0F18; Astrological Sign -Khyud Pa             |
| `U+0F19`  | Mark [Mn]        | VOWEL_DEPENDENT   | BOTTOM_POSITION            | &#x0F19; Astrological Sign Sdong Tshugs          |
| `U+0F1A`  | Symbol           | SYMBOL            | _null_                     | &#x0F1A; Sign Rdel Dkar Gcig                     |
| `U+0F1B`  | Symbol           | SYMBOL            | _null_                     | &#x0F1B; Sign Rdel Dkar Gnyis                    |
| `U+0F1C`  | Symbol           | SYMBOL            | _null_                     | &#x0F1C; Sign Rdel Dkar Gsum                     |
| `U+0F1D`  | Symbol           | SYMBOL            | _null_                     | &#x0F1D; Sign Rdel Nag Gcig                      |
| `U+0F1E`  | Symbol           | SYMBOL            | _null_                     | &#x0F1E; Sign Rdel Nag Gnyis                     |
| `U+0F1F`  | Symbol           | SYMBOL            | _null_                     | &#x0F1F; Sign Rdel Dkar Rdel Nag                 |
| | | | | |
| `U+0F20`  | Number           | NUMBER            | _null_                     | &#x0F20; Digit Zero                              |
| `U+0F21`  | Number           | NUMBER            | _null_                     | &#x0F21; Digit One                               |
| `U+0F22`  | Number           | NUMBER            | _null_                     | &#x0F22; Digit Two                               |
| `U+0F23`  | Number           | NUMBER            | _null_                     | &#x0F23; Digit Three                             |
| `U+0F24`  | Number           | NUMBER            | _null_                     | &#x0F24; Digit Four                              |
| `U+0F25`  | Number           | NUMBER            | _null_                     | &#x0F25; Digit Five                              |
| `U+0F26`  | Number           | NUMBER            | _null_                     | &#x0F26; Digit Six                               |
| `U+0F27`  | Number           | NUMBER            | _null_                     | &#x0F27; Digit Seven                             |
| `U+0F28`  | Number           | NUMBER            | _null_                     | &#x0F28; Digit Eight                             |
| `U+0F29`  | Number           | NUMBER            | _null_                     | &#x0F29; Digit Nine                              |
| `U+0F2A`  | Number           | NUMBER            | _null_                     | &#x0F2A; Digit Half One                          |
| `U+0F2B`  | Number           | NUMBER            | _null_                     | &#x0F2B; Digit Half Two                          |
| `U+0F2C`  | Number           | NUMBER            | _null_                     | &#x0F2C; Digit Half Three                        |
| `U+0F2D`  | Number           | NUMBER            | _null_                     | &#x0F2D; Digit Half Four                         |
| `U+0F2E`  | Number           | NUMBER            | _null_                     | &#x0F2E; Digit Half Five                         |
| `U+0F2F`  | Number           | NUMBER            | _null_                     | &#x0F2F; Digit Half Six                          |
| | | | | |
| `U+0F30`  | Number           | NUMBER            | _null_                     | &#x0F30; Digit Half Seven                        |
| `U+0F31`  | Number           | NUMBER            | _null_                     | &#x0F31; Digit Half Eight                        |
| `U+0F32`  | Number           | NUMBER            | _null_                     | &#x0F32; Digit Half Nine                         |
| `U+0F33`  | Number           | NUMBER            | _null_                     | &#x0F33; Digit Half Zero                         |
| `U+0F34`  | Symbol           | SYMBOL            | _null_                     | &#x0F34; Bsdus Rtags                             |
| `U+0F35`  | Mark [Mn]        | SYLLABLE_MODIFIER | BOTTOM_POSITION            | &#x0F35; Ngas Bzung Nyi Zla                      |
| `U+0F36`  | Symbol           | SYMBOL            | _null_                     | &#x0F36; Caret -Dzud Rtags Bzhi Mig Can          |
| `U+0F37`  | Mark [Mn]        | SYLLABLE_MODIFIER | BOTTOM_POSITION            | &#x0F37; Ngas Bzung Sgor Rtags                   |
| `U+0F38`  | Symbol           | SYMBOL            | _null_                     | &#x0F38; Che Mgo                                 |
| `U+0F39`  | Mark [Mn]        | NUKTA             | TOP_POSITION               | &#x0F39; Tsa -Phru                               |
| `U+0F3A`  | Punctuation [Ps] | _null_            | _null_                     | &#x0F3A; Gug Rtags Gyon                          |
| `U+0F3B`  | Punctuation [Pe] | _null_            | _null_                     | &#x0F3B; Gug Rtags Gyas                          |
| `U+0F3C`  | Punctuation [Ps] | _null_            | _null_                     | &#x0F3C; Ang Khang Gyon                          |
| `U+0F3D`  | Punctuation [Pe] | _null_            | _null_                     | &#x0F3D; Ang Khang Gyas                          |
| `U+0F3E`  | Mark [Mc]        | VOWEL_DEPENDENT   | RIGHT_POSITION             | &#x0F3E; Sign Yar Tshes                          |
| `U+0F3F`  | Mark [Mc]        | VOWEL_DEPENDENT   | LEFT_POSITION              | &#x0F3F; Sign Mar Tshes                          |
| | | | | |
| `U+0F40`  | Letter           | CONSONANT         | _null_                     | &#x0F40; Ka                                      |
| `U+0F41`  | Letter           | CONSONANT         | _null_                     | &#x0F41; Kha                                     |
| `U+0F42`  | Letter           | CONSONANT         | _null_                     | &#x0F42; Ga                                      |
| `U+0F43`  | Letter           | CONSONANT         | _null_                     | &#x0F43; Gha                                     |
| `U+0F44`  | Letter           | CONSONANT         | _null_                     | &#x0F44; Nga                                     |
| `U+0F45`  | Letter           | CONSONANT         | _null_                     | &#x0F45; Ca                                      |
| `U+0F46`  | Letter           | CONSONANT         | _null_                     | &#x0F46; Cha                                     |
| `U+0F47`  | Letter           | CONSONANT         | _null_                     | &#x0F47; Ja                                      |
| `U+0F48`  | _unassigned_     |                   |                            |                                                  |
| `U+0F49`  | Letter           | CONSONANT         | _null_                     | &#x0F49; Nya                                     |
| `U+0F4A`  | Letter           | CONSONANT         | _null_                     | &#x0F4A; Tta                                     |
| `U+0F4B`  | Letter           | CONSONANT         | _null_                     | &#x0F4B; Ttha                                    |
| `U+0F4C`  | Letter           | CONSONANT         | _null_                     | &#x0F4C; Dda                                     |
| `U+0F4D`  | Letter           | CONSONANT         | _null_                     | &#x0F4D; Ddha                                    |
| `U+0F4E`  | Letter           | CONSONANT         | _null_                     | &#x0F4E; Nna                                     |
| `U+0F4F`  | Letter           | CONSONANT         | _null_                     | &#x0F4F; Ta                                      |
| | | | | |						 
| `U+0F50`  | Letter           | CONSONANT         | _null_                     | &#x0F50; Tha                                     |
| `U+0F51`  | Letter           | CONSONANT         | _null_                     | &#x0F51; Da                                      |
| `U+0F52`  | Letter           | CONSONANT         | _null_                     | &#x0F52; Dha                                     |
| `U+0F53`  | Letter           | CONSONANT         | _null_                     | &#x0F53; Na                                      |
| `U+0F54`  | Letter           | CONSONANT         | _null_                     | &#x0F54; Pa                                      |
| `U+0F55`  | Letter           | CONSONANT         | _null_                     | &#x0F55; Pha                                     |
| `U+0F56`  | Letter           | CONSONANT         | _null_                     | &#x0F56; Ba                                      |
| `U+0F57`  | Letter           | CONSONANT         | _null_                     | &#x0F57; Bha                                     |
| `U+0F58`  | Letter           | CONSONANT         | _null_                     | &#x0F58; Ma                                      |
| `U+0F59`  | Letter           | CONSONANT         | _null_                     | &#x0F59; Tsa                                     |
| `U+0F5A`  | Letter           | CONSONANT         | _null_                     | &#x0F5A; Tsha                                    |
| `U+0F5B`  | Letter           | CONSONANT         | _null_                     | &#x0F5B; Dza                                     |
| `U+0F5C`  | Letter           | CONSONANT         | _null_                     | &#x0F5C; Dzha                                    |
| `U+0F5D`  | Letter           | CONSONANT         | _null_                     | &#x0F5D; Wa                                      |
| `U+0F5E`  | Letter           | CONSONANT         | _null_                     | &#x0F5E; Zha                                     |
| `U+0F5F`  | Letter           | CONSONANT         | _null_                     | &#x0F5F; Za                                      |
| | | | | |						 
| `U+0F60`  | Letter           | CONSONANT         | _null_                     | &#x0F60; -A                                      |
| `U+0F61`  | Letter           | CONSONANT         | _null_                     | &#x0F61; Ya                                      |
| `U+0F62`  | Letter           | CONSONANT         | _null_                     | &#x0F62; Ra                                      |
| `U+0F63`  | Letter           | CONSONANT         | _null_                     | &#x0F63; La                                      |
| `U+0F64`  | Letter           | CONSONANT         | _null_                     | &#x0F64; Sha                                     |
| `U+0F65`  | Letter           | CONSONANT         | _null_                     | &#x0F65; Ssa                                     |
| `U+0F66`  | Letter           | CONSONANT         | _null_                     | &#x0F66; Sa                                      |
| `U+0F67`  | Letter           | CONSONANT         | _null_                     | &#x0F67; Ha                                      |
| `U+0F68`  | Letter           | CONSONANT         | _null_                     | &#x0F68; A                                       |
| `U+0F69`  | Letter           | CONSONANT         | _null_                     | &#x0F69; Kssa                                    |
| `U+0F6A`  | Letter           | CONSONANT         | _null_                     | &#x0F6A; Fixed-Form Ra                           |
| `U+0F6B`  | Letter           | CONSONANT         | _null_                     | &#x0F6B; Kka                                     |
| `U+0F6C`  | Letter           | CONSONANT         | _null_                     | &#x0F6C; Rra                                     |
| `U+0F6D`  | _unassigned_     |                   |                            |                                                  |
| `U+0F6E`  | _unassigned_     |                   |                            |                                                  |
| `U+0F6F`  | _unassigned_     |                   |                            |                                                  |
| | | | | |
| `U+0F70`  | _unassigned_     |                   |                            |                                                  |
| `U+0F71`  | Mark [Mn]        | VOWEL_DEPENDENT   | BOTTOM_POSITION            | &#x0F71; Sign Aa                                 |
| `U+0F72`  | Mark [Mn]        | VOWEL_DEPENDENT   | TOP_POSITION               | &#x0F72; Sign I                                  |
| `U+0F73`  | Mark [Mn]        | VOWEL_DEPENDENT   | TOP_AND_BOTTOM_POSITION    | &#x0F73; Sign Ii                                 |
| `U+0F74`  | Mark [Mn]        | VOWEL_DEPENDENT   | BOTTOM_POSITION            | &#x0F74; Sign U                                  |
| `U+0F75`  | Mark [Mn]        | VOWEL_DEPENDENT   | BOTTOM_POSITION            | &#x0F75; Sign Uu                                 |
| `U+0F76`  | Mark [Mn]        | VOWEL_DEPENDENT   | TOP_AND_BOTTOM_POSITION    | &#x0F76; Sign Vocalic R                          |
| `U+0F77`  | Mark [Mn]        | VOWEL_DEPENDENT   | TOP_AND_BOTTOM_POSITION    | &#x0F77; Sign Vocalic Rr                         |
| `U+0F78`  | Mark [Mn]        | VOWEL_DEPENDENT   | TOP_AND_BOTTOM_POSITION    | &#x0F78; Sign Vocalic L                          |
| `U+0F79`  | Mark [Mn]        | VOWEL_DEPENDENT   | TOP_AND_BOTTOM_POSITION    | &#x0F79; Sign Vocalic Ll                         |
| `U+0F7A`  | Mark [Mn]        | VOWEL_DEPENDENT   | TOP_POSITION               | &#x0F7A; Sign E                                  |
| `U+0F7B`  | Mark [Mn]        | VOWEL_DEPENDENT   | TOP_POSITION               | &#x0F7B; Sign Ee                                 |
| `U+0F7C`  | Mark [Mn]        | VOWEL_DEPENDENT   | TOP_POSITION               | &#x0F7C; Sign O                                  |
| `U+0F7D`  | Mark [Mn]        | VOWEL_DEPENDENT   | TOP_POSITION               | &#x0F7D; Sign Oo                                 |
| `U+0F7E`  | Mark [Mn]        | BINDU             | TOP_POSITION               | &#x0F7E; Sign Rjes Su Nga Ro                     |
| `U+0F7F`  | Mark [Mc]        | VISARGA           | RIGHT_POSITION             | &#x0F7F; Sign Rnam Bcad                          |
| | | | | |
| `U+0F80`  | Mark [Mn]        | VOWEL_DEPENDENT   | TOP_POSITION               | &#x0F80; Sign Reversed I                         |
| `U+0F81`  | Mark [Mn]        | VOWEL_DEPENDENT   | TOP_AND_BOTTOM_POSITION    | &#x0F81; Sign Reversed Ii                        |
| `U+0F82`  | Mark [Mn]        | BINDU             | TOP_POSITION               | &#x0F82; Sign Nyi Zla Naa Da                     |
| `U+0F83`  | Mark [Mn]        | BINDU             | TOP_POSITION               | &#x0F83; Sign Sna Ldan                           |
| `U+0F84`  | Mark [Mn]        | VIRAMA            | BOTTOM_POSITION            | &#x0F84; Halanta                                 |
| `U+0F85`  | Punctuation      | AVAGRAHA          | _null_                     | &#x0F85; Paluta                                  |
| `U+0F86`  | Mark [Mn]        | TONE_MARKER       | TOP_POSITION               | &#x0F86; Sign Lci Rtags                          |
| `U+0F87`  | Mark [Mn]        | TONE_MARKER       | TOP_POSITION               | &#x0F87; Sign Yang Rtags                         |
| `U+0F88`  | Letter           | CONSONANT_HEAD    | _null_                     | &#x0F88; Sign Lce Tsa Can                        |
| `U+0F89`  | Letter           | CONSONANT_HEAD    | _null_                     | &#x0F89; Sign Mchu Can                           |
| `U+0F8A`  | Letter           | CONSONANT_HEAD    | _null_                     | &#x0F8A; Sign Gru Can Rgyings                    |
| `U+0F8B`  | Letter           | CONSONANT_HEAD    | _null_                     | &#x0F8B; Sign Gru Med Rgyings                    |
| `U+0F8C`  | Letter           | CONSONANT_HEAD    | _null_                     | &#x0F8C; Sign Inverted Mchu Can                  |
| `U+0F8D`  | Mark [Mn]        |CONSONANT_SUBJOINED| BOTTOM_POSITION            | &#x0F8D; Subjoined Sign Lce Tsa Can              |
| `U+0F8E`  | Mark [Mn]        |CONSONANT_SUBJOINED| BOTTOM_POSITION            | &#x0F8E; Subjoined Sign Mchu Can                 |
| `U+0F8F`  | Mark [Mn]        |CONSONANT_SUBJOINED| BOTTOM_POSITION            | &#x0F8F; Subjoined Sign Inverted Mchu Can        |
| | | | | |
| `U+0F90`  | Mark [Mn]        |CONSONANT_SUBJOINED| BOTTOM_POSITION            | &#x0F90; Subjoined Ka                            |
| `U+0F91`  | Mark [Mn]        |CONSONANT_SUBJOINED| BOTTOM_POSITION            | &#x0F91; Subjoined Kha                           |
| `U+0F92`  | Mark [Mn]        |CONSONANT_SUBJOINED| BOTTOM_POSITION            | &#x0F92; Subjoined Ga                            |
| `U+0F93`  | Mark [Mn]        |CONSONANT_SUBJOINED| BOTTOM_POSITION            | &#x0F93; Subjoined Gha                           |
| `U+0F94`  | Mark [Mn]        |CONSONANT_SUBJOINED| BOTTOM_POSITION            | &#x0F94; Subjoined Nga                           |
| `U+0F95`  | Mark [Mn]        |CONSONANT_SUBJOINED| BOTTOM_POSITION            | &#x0F95; Subjoined Ca                            |
| `U+0F96`  | Mark [Mn]        |CONSONANT_SUBJOINED| BOTTOM_POSITION            | &#x0F96; Subjoined Cha                           |
| `U+0F97`  | Mark [Mn]        |CONSONANT_SUBJOINED| BOTTOM_POSITION            | &#x0F97; Subjoined Ja                            |
| `U+0F98`  | _unassigned_     |                   |                            |                                                  |
| `U+0F99`  | Mark [Mn]        |CONSONANT_SUBJOINED| BOTTOM_POSITION            | &#x0F99; Subjoined Nya                           |
| `U+0F9A`  | Mark [Mn]        |CONSONANT_SUBJOINED| BOTTOM_POSITION            | &#x0F9A; Subjoined Tta                           |
| `U+0F9B`  | Mark [Mn]        |CONSONANT_SUBJOINED| BOTTOM_POSITION            | &#x0F9B; Subjoined Ttha                          |
| `U+0F9C`  | Mark [Mn]        |CONSONANT_SUBJOINED| BOTTOM_POSITION            | &#x0F9C; Subjoined Dda                           |
| `U+0F9D`  | Mark [Mn]        |CONSONANT_SUBJOINED| BOTTOM_POSITION            | &#x0F9D; Subjoined Ddha                          |
| `U+0F9E`  | Mark [Mn]        |CONSONANT_SUBJOINED| BOTTOM_POSITION            | &#x0F9E; Subjoined Nna                           |
| `U+0F9F`  | Mark [Mn]        |CONSONANT_SUBJOINED| BOTTOM_POSITION            | &#x0F9F; Subjoined Ta                            |
| | | | | |						
| `U+0FA0`  | Mark [Mn]        |CONSONANT_SUBJOINED| BOTTOM_POSITION            | &#x0FA0; Subjoined Tha                           |
| `U+0FA1`  | Mark [Mn]        |CONSONANT_SUBJOINED| BOTTOM_POSITION            | &#x0FA1; Subjoined Da                            |
| `U+0FA2`  | Mark [Mn]        |CONSONANT_SUBJOINED| BOTTOM_POSITION            | &#x0FA2; Subjoined Dha                           |
| `U+0FA3`  | Mark [Mn]        |CONSONANT_SUBJOINED| BOTTOM_POSITION            | &#x0FA3; Subjoined Na                            |
| `U+0FA4`  | Mark [Mn]        |CONSONANT_SUBJOINED| BOTTOM_POSITION            | &#x0FA4; Subjoined Pa                            |
| `U+0FA5`  | Mark [Mn]        |CONSONANT_SUBJOINED| BOTTOM_POSITION            | &#x0FA5; Subjoined Pha                           |
| `U+0FA6`  | Mark [Mn]        |CONSONANT_SUBJOINED| BOTTOM_POSITION            | &#x0FA6; Subjoined Ba                            |
| `U+0FA7`  | Mark [Mn]        |CONSONANT_SUBJOINED| BOTTOM_POSITION            | &#x0FA7; Subjoined Bha                           |
| `U+0FA8`  | Mark [Mn]        |CONSONANT_SUBJOINED| BOTTOM_POSITION            | &#x0FA8; Subjoined Ma                            |
| `U+0FA9`  | Mark [Mn]        |CONSONANT_SUBJOINED| BOTTOM_POSITION            | &#x0FA9; Subjoined Tsa                           |
| `U+0FAA`  | Mark [Mn]        |CONSONANT_SUBJOINED| BOTTOM_POSITION            | &#x0FAA; Subjoined Tsha                          |
| `U+0FAB`  | Mark [Mn]        |CONSONANT_SUBJOINED| BOTTOM_POSITION            | &#x0FAB; Subjoined Dza                           |
| `U+0FAC`  | Mark [Mn]        |CONSONANT_SUBJOINED| BOTTOM_POSITION            | &#x0FAC; Subjoined Dzha                          |
| `U+0FAD`  | Mark [Mn]        |CONSONANT_SUBJOINED| BOTTOM_POSITION            | &#x0FAD; Subjoined Wa                            |
| `U+0FAE`  | Mark [Mn]        |CONSONANT_SUBJOINED| BOTTOM_POSITION            | &#x0FAE; Subjoined Zha                           |
| `U+0FAF`  | Mark [Mn]        |CONSONANT_SUBJOINED| BOTTOM_POSITION            | &#x0FAF; Subjoined Za                            |
| | | | | |						
| `U+0FB0`  | Mark [Mn]        |CONSONANT_SUBJOINED| BOTTOM_POSITION            | &#x0FB0; Subjoined -A                            |
| `U+0FB1`  | Mark [Mn]        |CONSONANT_SUBJOINED| BOTTOM_POSITION            | &#x0FB1; Subjoined Ya                            |
| `U+0FB2`  | Mark [Mn]        |CONSONANT_SUBJOINED| BOTTOM_POSITION            | &#x0FB2; Subjoined Ra                            |
| `U+0FB3`  | Mark [Mn]        |CONSONANT_SUBJOINED| BOTTOM_POSITION            | &#x0FB3; Subjoined La                            |
| `U+0FB4`  | Mark [Mn]        |CONSONANT_SUBJOINED| BOTTOM_POSITION            | &#x0FB4; Subjoined Sha                           |
| `U+0FB5`  | Mark [Mn]        |CONSONANT_SUBJOINED| BOTTOM_POSITION            | &#x0FB5; Subjoined Ssa                           |
| `U+0FB6`  | Mark [Mn]        |CONSONANT_SUBJOINED| BOTTOM_POSITION            | &#x0FB6; Subjoined Sa                            |
| `U+0FB7`  | Mark [Mn]        |CONSONANT_SUBJOINED| BOTTOM_POSITION            | &#x0FB7; Subjoined Ha                            |
| `U+0FB8`  | Mark [Mn]        |CONSONANT_SUBJOINED| BOTTOM_POSITION            | &#x0FB8; Subjoined A                             |
| `U+0FB9`  | Mark [Mn]        |CONSONANT_SUBJOINED| BOTTOM_POSITION            | &#x0FB9; Subjoined Kssa                          |
| `U+0FBA`  | Mark [Mn]        |CONSONANT_SUBJOINED| BOTTOM_POSITION            | &#x0FBA; Subjoined Fixed-Form Wa                 |
| `U+0FBB`  | Mark [Mn]        |CONSONANT_SUBJOINED| BOTTOM_POSITION            | &#x0FBB; Subjoined Fixed-Form Ya                 |
| `U+0FBC`  | Mark [Mn]        |CONSONANT_SUBJOINED| BOTTOM_POSITION            | &#x0FBC; Subjoined Fixed-Form Ra                 |
| `U+0FBD`  | _unassigned_     |                   |                            |                                                  |
| `U+0FBE`  | Symbol           | SYMBOL            | _null_                     | &#x0FBE; Ku Ru Kha                               |
| `U+0FBF`  | Symbol           | SYMBOL            | _null_                     | &#x0FBF; Ku Ru Kha Bzhi Mig Can                  |
| | | | | |
| `U+0FC0`  | Symbol           | SYMBOL            | _null_                     | &#x0FC0; Cantillation Sign Heavy Beat            |
| `U+0FC1`  | Symbol           | SYMBOL            | _null_                     | &#x0FC1; Cantillation Sign Light Beat            |
| `U+0FC2`  | Symbol           | SYMBOL            | _null_                     | &#x0FC2; Cantillation Sign Cang Te-U             |
| `U+0FC3`  | Symbol           | SYMBOL            | _null_                     | &#x0FC3; Cantillation Sign Sbub -Chal            |
| `U+0FC4`  | Symbol           | SYMBOL            | _null_                     | &#x0FC4; Symbol Dril Bu                          |
| `U+0FC5`  | Symbol           | SYMBOL            | _null_                     | &#x0FC5; Symbol Rdo Rje                          |
| `U+0FC6`  | Mark [Mn]        | SYLLABLE_MODIFIER | BOTTOM_POSITION            | &#x0FC6; Symbol Padma Gdan                       |
| `U+0FC7`  | Symbol           | SYMBOL            | _null_                     | &#x0FC7; Symbol Rdo Rje Rgya Gram                |
| `U+0FC8`  | Symbol           | SYMBOL            | _null_                     | &#x0FC8; Symbol Phur Pa                          |
| `U+0FC9`  | Symbol           | SYMBOL            | _null_                     | &#x0FC9; Symbol Nor Bu                           |
| `U+0FCA`  | Symbol           | SYMBOL            | _null_                     | &#x0FCA; Symbol Nor Bu Nyis -Khyil               |
| `U+0FCB`  | Symbol           | SYMBOL            | _null_                     | &#x0FCB; Symbol Nor Bu Gsum -Khyil               |
| `U+0FCC`  | Symbol           | SYMBOL            | _null_                     | &#x0FCC; Symbol Nor Bu Bzhi -Khyil               |
| `U+0FCD`  | _unassigned_     |                   |                            |                                                  |
| `U+0FCE`  | Symbol           | SYMBOL            | _null_                     | &#x0FCE; Sign Rdel Nag Rdel Dkar                 |
| `U+0FCF`  | Symbol           | SYMBOL            | _null_                     | &#x0FCF; Sign Rdel Nag Gsum                      |
| | | | | |
| `U+0FD0`  | Punctuation      | _null_            | _null_                     | &#x0FD0; Bska- Shog Gi Mgo Rgyan                 |
| `U+0FD1`  | Punctuation      | _null_            | _null_                     | &#x0FD1; Mnyam Yig Gi Mgo Rgyan                  |
| `U+0FD2`  | Punctuation      | _null_            | _null_                     | &#x0FD2; Nyis Tsheg                              |
| `U+0FD3`  | Punctuation      | _null_            | _null_                     | &#x0FD3; Initial Brda Rnying Yig Mgo Mdun        |
| `U+0FD4`  | Punctuation      | _null_            | _null_                     | &#x0FD4; Closing Brda Rnying Yig Mgo Sgab        |
| `U+0FD5`  | Symbol           | SYMBOL            | _null_                     | &#x0FD5; Right-Facing Svasti Sign                |
| `U+0FD6`  | Symbol           | SYMBOL            | _null_                     | &#x0FD6; Left-Facing Svasti Sign                 |
| `U+0FD7`  | Symbol           | SYMBOL            | _null_                     | &#x0FD7; Right-Facing Svasti Sign With Dots      |
| `U+0FD8`  | Symbol           | SYMBOL            | _null_                     | &#x0FD8; Left-Facing Svasti Sign With Dots       |
| `U+0FD9`  | Punctuation      | _null_            | _null_                     | &#x0FD9; Leading Mchan Rtags                     |
| `U+0FDA`  | Punctuation      | _null_            | _null_                     | &#x0FDA; Trailing Mchan Rtags                    |
| `U+0FDB`  | _unassigned_     |                   |                            |                                                  |
| `U+0FDC`  | _unassigned_     |                   |                            |                                                  |
| `U+0FDD`  | _unassigned_     |                   |                            |                                                  |
| `U+0FDE`  | _unassigned_     |                   |                            |                                                  |
| `U+0FDF`  | _unassigned_     |                   |                            |                                                  |
| | | | | |
:::


## Miscellaneous character table ##

Other important characters that may be encountered when shaping runs
of Tibetan text include the dotted-circle placeholder (`U+25CC`), the
zero-width joiner (`U+200D`) and zero-width non-joiner (`U+200C`), and
the no-break space (`U+00A0`).

The dotted-circle placeholder is frequently used when displaying a
dependent vowel (matra) or a combining mark in isolation. Real-world
text syllables may also use other characters, such as hyphens or dashes,
in a similar placeholder fashion; shaping engines should cope with
this situation gracefully.


:::{table} Miscellaneous character table

| Codepoint | Unicode category | Shaping class     | Mark-placement subclass    | Glyph                          |
|:----------|:-----------------|:------------------|:---------------------------|:-------------------------------|
|`U+00A0`   | Separator        | PLACEHOLDER       | _null_                     | &#x00A0; No-break space        |
|`U+200C`   | Other            | NON_JOINER        | _null_                     | &#x200C; Zero-width non-joiner |
|`U+200D`   | Other            | JOINER            | _null_                     | &#x200D; Zero-width joiner     |
|`U+25CC`   | Symbol           | DOTTED_CIRCLE     | _null_                     | &#x25CC; Dotted circle         |
|`U+2638`   | Symbol           | SYMBOL            | _null_                     | &#x2638; Wheel of Dharma       |
:::


The zero-width joiner (<abbr>ZWJ</abbr>) is primarily used to prevent the formation of a conjunct
from a "_consonant_,Halant,_consonant_" sequence. The sequence
"_consonant_,Halant,ZWJ,_consonant_" blocks the formation of a
conjunct between the two consonants.

Note, however, that the "_consonant_,Halant" subsequence in the above
example may still trigger a half-forms feature. To prevent the
application of the half-forms feature in addition to preventing the
conjunct, the zero-width non-joiner (<abbr>ZWNJ</abbr>) must be used instead. The sequence
"_consonant_,Halant,ZWNJ,_consonant_" should produce the first
consonant in its standard form, followed by an explicit "Halant".

A secondary usage of the zero-width joiner is to prevent the formation of
"Reph". An initial "Ra,Halant,ZWJ" sequence should not produce a "Reph",
where an initial "Ra,Halant" sequence without the zero-width joiner
otherwise would.

The no-break space (<abbr>NBSP</abbr>) is primarily used to display
those codepoints that are defined as non-spacing (marks, dependent
vowels (matras), below-base consonant forms, and post-base consonant
forms) in an isolated context, as an alternative to displaying them
superimposed on the dotted-circle placeholder. These sequences will
match "NBSP,ZWJ,Halant,_consonant_", "NBSP,_mark_", or "NBSP,_matra_".


================================================
FILE: character-tables/index.md
================================================
# Character tables #

The section includes per-srcipt reference tables showing the
shaping-related properties of the codepoints used for each script,
as well as auxiliary information about the codepoints and notes on
control characters and other special-purpose codepoints that could
prove relevant to shapinh-engine implementers.


  - Indic
      - [Devanagari](character-tables-devanagari.md)
      - [Bengali](character-tables-bengali.md)
      - [Gujarati](character-tables-gujarati.md)
      - [Gurmukhi](character-tables-gurmukhi.md)
      - [Kannada](character-tables-kannada.md)
      - [Malayalam](character-tables-malayalam.md)
      - [Oriya](character-tables-oriya.md)
      - [Tamil](character-tables-tamil.md)
      - [Telugu](character-tables-telugu.md)
      - [Sinhala](character-tables-sinhala.md)
	  - _Vedic Extensions tables are included in each Indic script_
  - Arabic
      - [Arabic](character-tables-arabic.md)
      - [Syriac](character-tables-syriac.md)
      - [N'Ko](character-tables-nko.md)
      - [Mongolian](character-tables-mongolian.md)
  - Hangul
      - [Hangul Jamo](character-tables-hangul.md)
  - Hebrew
      - [Hebrew](character-tables-hebrew.md)
  - Khmer
      - [Khmer](character-tables-khmer.md)
  - Lao
      - [Lao](character-tables-lao.md)
  - Myanmar
      - [Myanmar](character-tables-myanmar.md)
  - Thai
      - [Thai](character-tables-thai.md)
  - Tibetan
      - [Tibetan](character-tables-tibetan.md)


:::{note}
Tables are not provided for the default or Universal Shaping Engine
(<abbr>USE</abbr>) shaping documents, each of which covers a
multitude of individual scripts, nor for the emoji shaping document,
because emoji usage is not specific to any individual script.
:::


================================================
FILE: conf.py
================================================
# Configuration file for the Sphinx documentation builder.
#
# For the full list of built-in configuration values, see the documentation:
# https://www.sphinx-doc.org/en/master/usage/configuration.html
import sys
from pathlib import Path

# -- Project information -----------------------------------------------------
# https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information

project = 'OpenType<br>Shaping<br>Documents'
copyright = '2022, Sponsored by YesLogic'
author = 'Sponsored by YesLogic'

version = "0.9"
release = "0.9alpha1"

# -- General configuration ---------------------------------------------------
# https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration

sys.path.append(str(Path('_ext').resolve()))

extensions = ['myst_parser', 'sphinx_external_toc', 'sphinx_inline_svg', 'shapingdocs_svg_color_toggles']

source_suffix = {'.md': 'markdown'}

templates_path = ['_templates']
exclude_patterns = ['_build', '_ext', 'test', 'Thumbs.db', '.DS_Store', 'BUILD.md', 'README.md', '**-image-generation-log.md', 'character-tables/README.md', 'images/images-index.md', 'images/README.md', 'notes/README.md'] # Eventually need to remove the links to image-generation-logs from the root README.md

root_doc = 'index' # Renamed to split GitHub README from production index

numfig = True
numfig_secnum_depth = 2

myst_heading_anchors = 6

# attrs_inline to specify HTML element attributes like img 'title' that are getting lost on build.
myst_enable_extensions = ['substitution', 'smartquotes', 'colon_fence', 'attrs_inline']

myst_substitutions = {
    'opentogglebutton': '<br><button onclick="toggleColor(',
    'closetogglebutton': ')">Substitution Toggle cluster colors</button><br>',
    'khmer_midsyllable_mark_table_workaround': 'Mid-syllable marks that must be tagged for sorting with above-base consonants',
}

external_toc_path = "_toc.yml"

# Starting with sphinx_external_toc 1.1.0, "multitoc numbering" is activated
# by this configuration key and the standalone sphinx_multitoc_numbering
# extension is not required
use_multitoc_numbering = True 

# -- Options for HTML output -------------------------------------------------
# https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output

html_theme = 'alabaster'
html_static_path = ['_static']
html_js_files = ['toggleSvgColors.js']
html_sidebars = {
    '**': [
        'about.html',
        'static_nav.html',
#        'navigation.html', # Replacing default navigation with static version above
        'searchbox.html',
        'sourcelink.html',
        ]
    }
html_theme_options = {
    'page_width': '1200px',
    'sidebar_width': '300px',
    'github_user': 'n8willis',
    'github_repo': 'opentype-shaping-documents',
    'font_family': 'Source\ Serif\ 4',
    'head_font_family': 'Source\ Serif\ 4',
    'caption_font_family': 'Source\ Serif\ 4',
    'code_font_family': 'Source\ Code\ Pro',
    'github_button': True,
    'github_type': 'watch',
    'github_count': True,
    'extra_nav_links': {
        'GitHub issues': 'https://github.com/n8willis/opentype-shaping-documents/issues',
        'Build process': 'https://github.com/n8willis/opentype-shaping-documents/blob/master/BUILD.md', # Fix the directory path after PR merge; Add contributor-guide link
        }
}


================================================
FILE: errata.md
================================================
# OpenType shaping errata #

This document details errata that shaping engines may encounter, such
as ambiguities or omissions in the existing OpenType or Unicode
specification documents.


**Contents**

  - [Unicode](#unicode)
      - [<abbr>ZWJ</abbr> and <abbr>ZWNJ</abbr>](#zwj-and-zwnj)
	      - [Scope of <abbr>ZWJ</abbr> and <abbr>ZWNJ</abbr>](#scope-of-zwj-and-zwnj)
	      - [<abbr>ZWJ</abbr> in redundant ligature lookups](#zwj-in-redundant-ligature-lookups)
      - [Emoji](#emoji)
	      - [Skin-tone permutations](#skin-tone-permutations)
		  - [Gender permutations](#gender-permutations)
  - [OpenType](#opentype)
      - [Null offsets in <abbr>GSUB</abbr> and <abbr>GPOS</abbr>](#null-offsets-in-gsub-and-gpos)
      - [Sorting of <abbr>GSUB</abbr> and <abbr>GPOS</abbr> lookups](#sorting-of-gsub-and-gpos-lookups)
	  - [Per-script applicability of feature tags](#per-script-applicability-of-feature-tags)
      - [Ordering of post-base and below-base consonants in Indic2 base-consonant determination](#ordering-of-post-base-and-below-base-consonants-in-indic2-base-consonant-determination)
      - [Lookup behavior](#lookup-behavior)
          - [Using MultipleSub for glyph deletion](#using-multiplesub-for-glyph-deletion)
		  - [Processing nested contextual lookups](#processing-nested-contextual-lookups)
      - [Adjacent-mark reordering ambiguities](#adjacent-mark-reordering-ambiguities)
      - [Merging of glyph properties](#merging-of-glyph-properties)
  - [See also](#see-also)

  
## Unicode ##

This section lists errata pertaining to the Unicode Standard.

### <abbr>ZWJ</abbr> and <abbr>ZWNJ</abbr> ###

#### Scope of <abbr>ZWJ</abbr> and <abbr>ZWNJ</abbr> ####

Unicode provides the Zero Width Joiner (<abbr>ZWJ</abbr>) and Zero Width Non-Joiner
(<abbr>ZWNJ</abbr>) control characters so that a text sequence can "request a
rendering system to have more or less of a connection between
characters than they would otherwise have."

The generic examples used in the standard show how <abbr title="Zero-Width Joiner">ZWJ</abbr> and <abbr title="Zero-Width Non Joiner">ZWNJ</abbr>
characters can affect the cursive-joining behavior between two
characters or the ligature-forming behavior between two
characters. However, the standard does not explicitly say whether or
not the presence of a <abbr title="Zero-Width Joiner">ZWJ</abbr> or <abbr title="Zero-Width Non Joiner">ZWNJ</abbr> should influence the shaping
behavior of characters for characters not adjacent to the <abbr title="Zero-Width Joiner">ZWJ</abbr> or <abbr title="Zero-Width Non Joiner">ZWNJ</abbr>.

For example, in the sequence <samp>"a,b,ZWNJ,c,d"</samp> the <abbr title="Zero-Width Non Joiner">ZWNJ</abbr> should prevent
the application of a ligature between <samp>"b"</samp> and <samp>"c"</samp> (if such a ligature
lookup exists in the active font).

However, if the active font contains a contextual ligature lookup for
<samp>"c,d"</samp> when preceded by <samp>"b"</samp>, it is not clear whether or not the <abbr title="Zero-Width Non Joiner">ZWNJ</abbr>
in the same <samp>"a,b,ZWNJ,c,d"</samp> sequence should inhibit the application of
the ligature between <samp>"c"</samp> and <samp>"d"</samp>.


#### <abbr>ZWJ</abbr> in redundant ligature lookups ####

An "Implementation Notes" section in chapter 23.2 of the Unicode
Standard says that font vendors should add <abbr title="Zero-Width Joiner">ZWJ</abbr> sequences to ligature
lookups. For example, if the sequence <samp>"f,i"</samp> triggers the <samp>"fi"</samp>
ligature, then the font should also include a lookup that triggers the
<samp>"fi"</samp> ligature for <samp>"f,ZWJ,i"</samp>. 

However, the text of chapter 23.2 prior to the "Implementation Notes"
says that <abbr title="Zero-Width Joiner">ZWJ</abbr> and <abbr title="Zero-Width Non Joiner">ZWNJ</abbr> "are not to be used in all cases where
ligatures or cursive connections are desired; instead, they are meant
only for over-riding the normal behavior of the text." That logic
makes the suggested <samp>"f,ZWJ,i"</samp> ligature lookup superfluous, because it
duplicates the effects of the existing <samp>"f,i"</samp> ligature lookup.

Using <abbr title="Zero-Width Joiner">ZWJ</abbr> within lookup patterns in the manner suggested by the
"Implementation Notes" is not common practice. 

### Emoji ###

#### Skin-tone permutations ####

It is unclear whether <abbr title="Zero-Width Joiner">ZWJ</abbr> multi-person group emoji sequences are
expected to include combinations where some emoji in the sequence are
followed by a Fitzpatrick skin-tone modifier but other emoji in the
sequence are not followed by a Fitzpatrick skin-tone modifier.

For example, it is unclear whether the sequence
<samp>"Man,ZWJ,Handshake,Man,SkinTone-2"</samp> constitues a valid
<abbr title="Zero-Width Joiner">ZWJ</abbr> "Couple holding hands" sequence.


#### Gender permutations ####

It is unclear whether <abbr title="Zero-Width Joiner">ZWJ</abbr> multi-person group emoji sequences are
expected to include combinations where some emoji in the sequence are
are an explicit gender but other emoji in the sequence are not
explicit gender.

For example, it is unclear whether the sequence
<samp>"Man,ZWJ,Handshake,Person"</samp> constitues a valid
<abbr title="Zero-Width Joiner">ZWJ</abbr> "Couple holding hands" sequence.

It is also unclear whether the <abbr title="Zero-Width Joiner">ZWJ</abbr> multi-person family sequence must
have explicit gender-ordering for the adult humans depicted.

For example, it is unclear whether the sequence
<samp>"Man,ZWJ,Woman,ZWJ,Girl"</samp> should be rendered identically to the
sequence <samp>"Woman,ZWJ,Man,ZWJ,Girl"</samp>.


## OpenType ##

This section lists errata pertaining to the OpenType specification.

### Null offsets in <abbr>GSUB</abbr> and <abbr>GPOS</abbr> ###

The headers of the <abbr title="Glyph Substitution table">GSUB</abbr> and <abbr title="Glyph Positioning table">GPOS</abbr> tables include fields that contain
the offsets at which other structures within the font binary are
found. For example, the value of the `featureVariationsOffset` field
indicates the byte value at which the featureVariations structure is
located.

The OpenType specification notes that `featureVariationsOffset` can be
`NULL`, but the specification does not indicate whether or any other
offset values can also be `NULL` (nor, conversely, does it indicate
whether `NULL` should be considered invalid).

In practice, other fields -- such as `scriptListOffset`,
`featureListOffset`, and `lookupListOffset` -- may have `NULL` values.
In such situations, `NULL` is usually intrepreted as meaning that the
structure nominally pointed to by the offset is empty.

Furthermore, font-validation functions may overwrite a `NULL` into an
offset field if the original value encountered was invalid.


### Sorting of <abbr>GSUB</abbr> and <abbr>GPOS</abbr> lookups ###

The OpenType specification requires that lookups in the <abbr title="Glyph Substitution table">GSUB</abbr> table
must be sorted into numeric order before they are applied.

Lookups in the <abbr title="Glyph Positioning table">GPOS</abbr> table, however, are not expected to be sorted
first, because <abbr title="Glyph Positioning table">GPOS</abbr> lookups are applied in a specified order.

### Per-script applicability of feature tags ###

Some OpenType feature tags are defined only to apply to text runs in
specific scripts. Other feature tags are defined to apply to text in
any script.

However, the definitions of some feature tags list a limited number of
example scripts to which the feature should apply, but do not specify
every supported script.

For example, the `pstf` (post-base forms) tag is
[described](https://docs.microsoft.com/en-us/typography/opentype/spec/features_pt#tag-pstf)
as required for "scripts of south and southeast Asia that have
post-base forms for consonants eg: Gurmukhi, Malayalam, Khmer."


### Ordering of post-base and below-base consonants in Indic2 base-consonant determination ###

The Microsoft script-development specification for all Indic2-model
scripts
[states](https://docs.microsoft.com/en-us/typography/script-development/bengali#reorder-characters)
parenthetically that "post-base forms have to follow below-base forms". 

If this statement is taken to be a rule, it would affect the
base-consonant search algorithm.

For example, in the Bengali sequence <samp>"Ka,Halant,Ba,Halant,Ya"</samp>
(`U+0995`,`U+09CD`,`U+09AC`,`U+09CD`,`U+09AF`), <samp>"Ka"</samp> would be
identified as the syllable base, with <samp>"Ba"</samp> designated a below-base
form and <samp>"Ya"</samp> designated a post-base form. However, in the similar
sequence <samp>"Ka,Halant,Ya,Halant,Ba"</samp>
(`U+0995`,`U+09CD`,`U+09AF`,`U+09CD`,`U+09AC`), <samp>"Ya"</samp> would be
identified as the base consonant.

Real-world Bengali texts provide counterexamples that contradict the
assumption that "post-base forms follow below-base forms" is a
requirement.

In other scripts, such as Telugu, the "post-base forms have to follow
below-base forms" statement is, perhaps, statistically likely, but is
certainly not an orthographic rule.

Consequently, it is unclear if the statement should be enforced as a
rule or if it should be regarded as a suggestion, and it is unclear to
what degree that answer varies between the Indic2-model scripts.


### Lookup behavior ###

#### Using MultipleSub for glyph deletion ####

The <abbr title="Glyph Substitution table">GSUB</abbr> specification says that a `MultipleSubst` substitution cannot
be used to delete a glyph: it always substitutes at least one
replacement glyph. However, some implementations allow the
replacement-glyph array to be zero-length. 

#### Processing nested contextual lookups ####

The <abbr title="Glyph Substitution table">GSUB</abbr> specification allows contextual substitutions to invoke other
contextual substitutions. It is unclear how implementations ought to
handle certain cases of these nested lookups.

For example:
```
context: 'a'
subst index 0:
  context: 'ab'
  subst index 1: 'b' → 'ab'
```

This nested set of substitutions could cause an infinite loop on
certain input strings, if it is interpreted in a naive manner:
```
'[]ab' // begin at start of glyph sequence
'[a]b' // context matches
'[ab]' // nested context matches at index 0
'[aab]' // subst applies at index 1
'[a]ab' // return to parent context, uh oh!
'a[]ab' // move on to next glyph
'a[a]b' // context matches, infinite loop!
```

In short, if a nested contextual substitution can insert glyphs ahead
of its parent contextual substitution's context, then it creates a
"stack" that allows Turing-complete computation.


### Adjacent-mark reordering ambiguities ###

The Microsoft script-development specifications
[say](https://docs.microsoft.com/en-us/typography/script-development/devanagari#reorder-characters)
that marks should be reordered "to canonical order" (step 3 in the
linked Devanagari document) in the reordering phase. However, the same
step also describes this step as "Adjacent nukta and halant or nukta
and Vedic sign are always repositioned if necessary, so that the nukta
is first."

Together, it is somewhat ambiguous as to whether only <samp>"Halant,Nukta"</samp>
and <samp>"_Vedic_sign_,Nukta"</samp> sequences should be reordered by moving the
<samp>"Nukta"</samp> to the beginning, or all sequences of marks require reordering
into Unicode canonical combining class order, with <samp>"Nukta"</samp> moving to
the initial position as a special case.


### Merging of glyph properties ###

When the application of a shaping operation merges two or more
adjacent glyphs (for example, when two adjacent glyphs are substituted
with a single ligature glyph), the OpenType specification does not
dictate how shaping engines should combine (for example, merge,
replace, or drop) the properties of the input glyphs to determine the
properties of the output glyph.

This may result in ambiguities when a sequence of glyphs has several
substitutions applied in series.

For example, when shaping Indic scripts, glyphs may be tagged for the
possible application of multiple features, such as `half` and `rkrf`,
which are applied serially.

HarfBuzz and Uniscribe both take the approach of retaining the
properties of the first input glyph in a sequence, propagating those
properties to the merged output glyph.


## See also ##

Shaping engines may also want to offer explicit compatibility with
Microsoft Uniscribe, for the purpose of ensuring that users' existing
documents do not break. Therefore, implementors may wish to consult
the [Uniscribe compatibility notes](notes/uniscribe-bug-compatibility.md).

These compatibilty notes record test-driven observations about
Uniscribe's behavior, and they include any behavior that is a known
bug or a known deviation from specifications. Consequently, the issues
raised by offering Uniscribe compatiblity cannot be considered errata
in the sense that it is described above.


================================================
FILE: images/arabic/arabic-png-image-generation-log.md
================================================
# Commands used to generate the <abbr>PNG</abbr> images in [opentype-shaping-arabic.md](/opentype-shaping-arabic.md)

## Arrow general

hb-view --font-size=110 --output-file=right-arrow.png --background=FFFFFF00 --margin=0,0,0,0 /usr/share/fonts/opentype/gentiumplus/GentiumPlus-R.ttf --unicodes=2192

## 4.1 `locl`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=arabic-locl-before.png --features=-locl --language=urd --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoNaskhArabic-Regular.ttf --unicodes=06f4

hb-view --font-size=110 --margin=2,16,2,16 --output-file=arabic-locl-after.png --features=+locl --language=urd --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoNaskhArabic-Regular.ttf --unicodes=06f4

montage arabic-locl-before.png right-arrow.png arabic-locl-after.png -geometry +0+0 -background transparent arabic-locl.png


## 4.2 `isol`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=arabic-isol-before.png --features=-isol --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoNastaliqUrdu-Regular.ttf --unicodes=0647

hb-view --font-size=110 --margin=2,16,2,16 --output-file=arabic-isol-after.png --features=+isol --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoNastaliqUrdu-Regular.ttf --unicodes=0647

montage arabic-isol-before.png right-arrow.png arabic-isol-after.png -geometry +0+0 -background transparent arabic-isol.png


## 4.3 `fina`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=arabic-fina-before.png --features=-fina --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoNaskhArabic-Regular.ttf --unicodes=25cc,0628

hb-view --font-size=110 --margin=2,16,2,16 --output-file=arabic-fina-after.png --features=+fina --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoNaskhArabic-Regular.ttf --unicodes=25cc,0628

montage arabic-fina-before.png right-arrow.png arabic-fina-after.png -geometry +0+0 -background transparent arabic-fina.png


## 4.6 `medi`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=arabic-medi-before.png --features=-medi --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoNaskhArabic-Regular.ttf --unicodes=25cc,062e,25cc

hb-view --font-size=110 --margin=2,16,2,16 --output-file=arabic-medi-after.png --features=+medi --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoNaskhArabic-Regular.ttf --unicodes=25cc,062e,25cc

montage arabic-medi-before.png right-arrow.png arabic-medi-after.png -geometry +0+0 -background transparent arabic-medi.png


## 4.8 `init`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=arabic-init-before.png --features=-init --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoNaskhArabic-Regular.ttf --unicodes=063a,25cc

hb-view --font-size=110 --margin=2,16,2,16 --output-file=arabic-init-after.png --features=+init --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoNaskhArabic-Regular.ttf --unicodes=063a,25cc

montage arabic-init-before.png right-arrow.png arabic-init-after.png -geometry +0+0 -background transparent arabic-init.png


## 4.9 `rlig`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=arabic-rlig-before.png --features=-rlig --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoNaskhArabic-Regular.ttf --unicodes=0644,0623

hb-view --font-size=110 --margin=2,16,2,16 --output-file=arabic-rlig-after.png --features=+rlig --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoNaskhArabic-Regular.ttf --unicodes=0644,0623

montage arabic-rlig-before.png right-arrow.png arabic-rlig-after.png -geometry +0+0 -background transparent arabic-rlig.png


## 4.10 `rclt`

> None found.


## 4.11 `calt`

> Note: Noto Nastaliq Urdu implements this as a `rlig` lookup for
> unknown reasons.

hb-view --font-size=110 --margin=2,16,2,16 --output-file=arabic-calt-before.png --features=-liga,-rlig --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoNastaliqUrdu-Regular.ttf --unicodes=062d,0645

hb-view --font-size=110 --margin=2,16,2,16 --output-file=arabic-calt-after.png --features=+liga,+rlig --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoNastaliqUrdu-Regular.ttf --unicodes=062d,0645

montage arabic-calt-before.png right-arrow.png arabic-calt-after.png -geometry +0+0 -background transparent arabic-calt.png


## 5.1 `liga`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=arabic-liga-before.png --features=-liga,-fina,-medi,-init --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoKufiArabic-Regular.ttf --unicodes=0631,064a,0627,0644

hb-view --font-size=110 --margin=2,16,2,16 --output-file=arabic-liga-after.png --features=+liga --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoKufiArabic-Regular.ttf --unicodes=0631,064a,0627,0644

montage arabic-liga-before.png right-arrow.png arabic-liga-after.png -geometry +0+0 -background transparent arabic-liga.png


## 5.3 `cswh`

> None found.


## 5.4 `mset`

> None found. Could be emulated with `mark`, however.


## 7.1 `curs`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=arabic-curs-before.png --features=-curs --language=urd --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoNastaliqUrdu-Regular.ttf --unicodes=0642,0633,0645

hb-view --font-size=110 --margin=2,16,2,16 --output-file=arabic-curs-after.png --features=+curs --language=urd --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoNastaliqUrdu-Regular.ttf --unicodes=0642,0633,0645

montage arabic-curs-before.png right-arrow.png arabic-curs-after.png -geometry +0+0 -background transparent arabic-curs.png


## 7.3 `mark`

hb-view --font-size=110 --margin=2,32,2,32 --output-file=arabic-mark-before.png --features=-mark  --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoNastaliqUrdu-Regular.ttf --unicodes=0643,0653

hb-view --font-size=110 --margin=2,32,2,16 --output-file=arabic-mark-after.png --features=+mark  --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoNastaliqUrdu-Regular.ttf --unicodes=0643,0653

montage arabic-mark-before.png right-arrow.png arabic-mark-after.png -geometry +0+0 -background transparent arabic-mark.png


================================================
FILE: images/arabic/arabic-svg-image-generation-log.md
================================================
# Commands used to generate the images in [opentype-shaping-arabic.md](../../opentype-shaping-arabic.md)

## Arrow general

hb-view --font-size=110 --output-file=right-arrow.svg --background=FFFFFF00 --margin=0,0,0,0 /usr/share/fonts/truetype/gentiumplus/GentiumPlus-Regular.ttf --unicodes=2192

## 2 `ccmp`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=arabic-ccmp-before.svg --features=-ccmp --language=urd --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoNaskhArabic-Regular.ttf --unicodes=067e

hb-view --font-size=110 --margin=2,16,2,16 --output-file=arabic-ccmp-after.svg --features=+ccmp --language=urd --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoNaskhArabic-Regular.ttf --unicodes=067e

svg_stack.py --direction=h arabic-ccmp-before.svg right-arrow.svg arabic-ccmp-after.svg > arabic-ccmp.svg


## 4.1 `locl`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=arabic-locl-before.svg --features=-locl --language=urd --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoNaskhArabic-Regular.ttf --unicodes=06f4

hb-view --font-size=110 --margin=2,16,2,16 --output-file=arabic-locl-after.svg --features=+locl --language=urd --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoNaskhArabic-Regular.ttf --unicodes=06f4

svg_stack.py --direction=h arabic-locl-before.svg right-arrow.svg arabic-locl-after.svg > arabic-locl.svg


## 4.2 `isol`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=arabic-isol-before.svg --features=-isol --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoNastaliqUrdu-Regular.ttf --unicodes=0647

hb-view --font-size=110 --margin=2,16,2,16 --output-file=arabic-isol-after.svg --features=+isol --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoNastaliqUrdu-Regular.ttf --unicodes=0647

svg_stack.py --direction=h arabic-isol-before.svg right-arrow.svg arabic-isol-after.svg > arabic-isol.svg


## 4.3 `fina`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=arabic-fina-before.svg --features=-fina --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoNaskhArabic-Regular.ttf --unicodes=0626,200d,0628

hb-view --font-size=110 --margin=2,16,2,16 --output-file=arabic-fina-after.svg --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoNaskhArabic-Regular.ttf --unicodes=0626,0628

svg_stack.py --direction=h arabic-fina-before.svg right-arrow.svg arabic-fina-after.svg > arabic-fina.svg


## 4.6 `medi`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=arabic-medi-before.svg --features=-medi --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoNaskhArabic-Regular.ttf --unicodes=0626,200d,062e,200d,0637

hb-view --font-size=110 --margin=2,16,2,16 --output-file=arabic-medi-after.svg --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoNaskhArabic-Regular.ttf --unicodes=0626,062e,0637

svg_stack.py --direction=h arabic-medi-before.svg right-arrow.svg arabic-medi-after.svg > arabic-medi.svg


## 4.8 `init`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=arabic-init-before.svg --features=-init --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoNaskhArabic-Regular.ttf --unicodes=063a,200d,0626

hb-view --font-size=110 --margin=2,16,2,16 --output-file=arabic-init-after.svg --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoNaskhArabic-Regular.ttf --unicodes=063a,0626

svg_stack.py --direction=h arabic-init-before.svg right-arrow.svg arabic-init-after.svg > arabic-init.svg


## 4.9 `rlig`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=arabic-rlig-before.svg --features=-rlig --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoNaskhArabic-Regular.ttf --unicodes=0644,0623

hb-view --font-size=110 --margin=2,16,2,16 --output-file=arabic-rlig-after.svg --features=+rlig --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoNaskhArabic-Regular.ttf --unicodes=0644,0623

svg_stack.py --direction=h arabic-rlig-before.svg right-arrow.svg arabic-rlig-after.svg > arabic-rlig.svg


## 4.10 `rclt`

> None found.


## 4.11 `calt`

> Note: Noto Nastaliq Urdu implements this as a `rlig` lookup for
> unknown reasons.

hb-view --font-size=110 --margin=2,16,2,16 --output-file=arabic-calt-before.svg --features=-liga,-rlig --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoNastaliqUrdu-Regular.ttf --unicodes=062d,0645

hb-view --font-size=110 --margin=2,16,2,16 --output-file=arabic-calt-after.svg --features=+liga,+rlig --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoNastaliqUrdu-Regular.ttf --unicodes=062d,0645

svg_stack.py --direction=h arabic-calt-before.svg right-arrow.svg arabic-calt-after.svg > arabic-calt.svg


## 5.1 `liga`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=arabic-liga-before.svg --features=-liga,-fina,-medi,-init --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoNaskhArabic-Regular.ttf --unicodes=0627,0644,0644,0651,0670,06c1

hb-view --font-size=110 --margin=2,16,2,16 --output-file=arabic-liga-after.svg --features=+liga --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoNaskhArabic-Regular.ttf --unicodes=0627,0644,0644,0651,0670,06c1

svg_stack.py --direction=h arabic-liga-before.svg right-arrow.svg arabic-liga-after.svg > arabic-liga.svg


## 5.2 `dlig`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=arabic-dlig-before.svg --features=-dlig --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoNaskhArabic-Regular.ttf --unicodes=0644,0644,0647

hb-view --font-size=110 --margin=2,16,2,16 --output-file=arabic-dlig-after.svg --features=+dlig --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoNaskhArabic-Regular.ttf --unicodes=0644,0644,0647

svg_stack.py --direction=h arabic-dlig-before.svg right-arrow.svg arabic-dlig-after.svg > arabic-dlig.svg


## 5.3 `cswh`

> None found.


## 5.4 `mset`

> None found. Could be emulated with `mark`, however.


## 7.1 `curs`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=arabic-curs-before.svg --features=-curs --language=urd --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoNastaliqUrdu-Regular.ttf --unicodes=0642,0633,0645

hb-view --font-size=110 --margin=2,16,2,16 --output-file=arabic-curs-after.svg --features=+curs --language=urd --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoNastaliqUrdu-Regular.ttf --unicodes=0642,0633,0645

svg_stack.py --direction=h arabic-curs-before.svg right-arrow.svg arabic-curs-after.svg > arabic-curs.svg


## 7.2 `dist` (not yet added to document)

hb-view --font-size=110 --margin=2,16,2,16 --output-file=arabic-dist-before.svg --features=-dist --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoNastaliqUrdu-Regular.ttf --unicodes=0628,062f,066e,062d

hb-view --font-size=110 --margin=2,16,2,16 --output-file=arabic-dist-after.svg --features=+dist --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoNastaliqUrdu-Regular.ttf --unicodes=0628,062f,066e,062d

svg_stack.py --direction=h arabic-dist-before.svg right-arrow.svg arabic-dist-after.svg > arabic-dist.svg


## 7.2 `kern`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=arabic-kern-before.svg --features=-kern --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoNastaliqUrdu-Regular.ttf --unicodes=0635,062f,0627

hb-view --font-size=110 --margin=2,16,2,16 --output-file=arabic-kern-after.svg --features=+kern --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoNastaliqUrdu-Regular.ttf --unicodes=0635,062f,0627

svg_stack.py --direction=h arabic-kern-before.svg right-arrow.svg arabic-kern-after.svg > arabic-kern.svg


## 7.3 `mark`

hb-view --font-size=110 --margin=2,32,2,32 --output-file=arabic-mark-before.svg --features=-mark  --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoNastaliqUrdu-Regular.ttf --unicodes=0643,0653

hb-view --font-size=110 --margin=2,32,2,16 --output-file=arabic-mark-after.svg --features=+mark  --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoNastaliqUrdu-Regular.ttf --unicodes=0643,0653

svg_stack.py --direction=h arabic-mark-before.svg right-arrow.svg arabic-mark-after.svg > arabic-mark.svg


================================================
FILE: images/bengali/bengali-png-image-generation-log.md
================================================
# Commands used to generate the images in [opentype-shaping-bengali.md](../../opentype-shaping-bengali.md)

## Arrow general

hb-view --font-size=110 --output-file=right-arrow.png --background=FFFFFF00 --margin=0,0,0,0 /usr/share/fonts/opentype/gentiumplus/GentiumPlus-R.ttf --unicodes=2192


> Note: always use `--features=-init` in examples where the `init`
> feature itself is not being explained.


## 2.2 Matra decomposition

hb-view --font-size=110 --margin=2,16,2,16 --output-file=bengali-matra-decompose-before.png --features=-init --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifBengali-Regular.ttf --unicodes=09cc

hb-view --font-size=110 --margin=2,16,2,16
--output-file=bengali-matra-decompose-after.png --features=-init --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifBengali-Regular.ttf --unicodes=09c7,200c,25cc,09d7

montage bengali-matra-decompose-before.png right-arrow.png bengali-matra-decompose-after.png -geometry +0+0 -background transparent bengali-matra-decompose.png


## 2.7 Post-base consonants

hb-view --font-size=110 --margin=2,16,2,16 --output-file=bengali-yaphala-before.png --features=-init,-pstf --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifBengali-Regular.ttf --unicodes=25cc,09cd,09af

hb-view --font-size=110 --margin=2,16,2,16 --output-file=bengali-yaphala-after.png --features=-init,+pstf --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifBengali-Regular.ttf --unicodes=25cc,09cd,09af

montage bengali-yaphala-before.png right-arrow.png bengali-yaphala-after.png -geometry +0+0 -background transparent bengali-yaphala.png


## 3.2 `nukt`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=bengali-nukt-before.png --features=-init,-nukt --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifBengali-Regular.ttf --unicodes=09a1,25cc,09bc

hb-view --font-size=110 --margin=2,16,2,16 --output-file=bengali-nukt-after.png --features=-init,+nukt --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifBengali-Regular.ttf --unicodes=09a1,09bc

montage bengali-nukt-before.png right-arrow.png bengali-nukt-after.png -geometry +0+0 -background transparent bengali-nukt.png


## 3.3 `akhn`

### KSsa

hb-view --font-size=110 --margin=2,16,2,16 --output-file=bengali-akhn-kssa-before.png --features=-init,-akhn --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifBengali-Regular.ttf --unicodes=0995,25cc,09cd,09b7

hb-view --font-size=110 --margin=2,16,2,16 --output-file=bengali-akhn-kssa-after.png --features=-init,+akhn --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifBengali-Regular.ttf --unicodes=0995,09cd,09b7

montage bengali-akhn-kssa-before.png right-arrow.png bengali-akhn-kssa-after.png -geometry +0+0 -background transparent bengali-akhn-kssa.png

### JNya

hb-view --font-size=110 --margin=2,16,2,16 --output-file=bengali-akhn-jnya-before.png --features=-init,-akhn --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifBengali-Regular.ttf --unicodes=099c,25cc,09cd,099e

hb-view --font-size=110 --margin=2,16,2,16 --output-file=bengali-akhn-jnya-after.png --features=-init,+akhn --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifBengali-Regular.ttf --unicodes=099c,09cd,099e

montage bengali-akhn-jnya-before.png right-arrow.png bengali-akhn-jnya-after.png -geometry +0+0 -background transparent bengali-akhn-jnya.png


## 3.4 `rphf`

### Bengali

hb-view --font-size=110 --margin=2,16,2,16 --output-file=bengali-rphf-before.png --features=-init,-rphf --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifBengali-Regular.ttf --unicodes=09b0,25cc,09cd

hb-view --font-size=110 --margin=2,16,2,16 --output-file=bengali-rphf-after.png --features=-init,+rphf --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifBengali-Regular.ttf --unicodes=09b0,09cd,25cc

montage bengali-rphf-before.png right-arrow.png bengali-rphf-after.png -geometry +0+0 -background transparent bengali-rphf.png


### Assamese

hb-view --font-size=110 --margin=2,16,2,16 --output-file=bengali-rphf-as-before.png --features=-init,-rphf --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifBengali-Regular.ttf --unicodes=09f0,25cc,09cd

hb-view --font-size=110 --margin=2,16,2,16 --output-file=bengali-rphf-as-after.png --features=-init,+rphf --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifBengali-Regular.ttf --unicodes=09f0,09cd,25cc

montage bengali-rphf-as-before.png right-arrow.png bengali-rphf-as-after.png -geometry +0+0 -background transparent bengali-rphf-as.png

## 3.7 `blwf`

### Raphala

hb-view --font-size=110 --margin=2,16,2,16 --output-file=bengali-raphala-before.png --features=-init,-blwf --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifBengali-Regular.ttf --unicodes=25cc,09cd,09b0

hb-view --font-size=110 --margin=2,16,2,16 --output-file=bengali-raphala-after.png --features=-init,+blwf --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifBengali-Regular.ttf --unicodes=25cc,09cd,09b0

montage bengali-raphala-before.png right-arrow.png bengali-raphala-after.png -geometry +0+0 -background transparent bengali-raphala.png


### Baphala

hb-view --font-size=110 --margin=2,16,2,16 --output-file=bengali-baphala-before.png --features=-init,-blwf --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifBengali-Regular.ttf --unicodes=25cc,09cd,09ac

hb-view --font-size=110 --margin=2,16,2,16 --output-file=bengali-baphala-after.png --features=-init,+blwf --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifBengali-Regular.ttf --unicodes=25cc,09cd,09ac

montage bengali-baphala-before.png right-arrow.png bengali-baphala-after.png -geometry +0+0 -background transparent bengali-baphala.png

## 3.9 `half`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=bengali-half-ka-before.png --features=-init,-half --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifBengali-Regular.ttf --unicodes=0995,25cc,09cd,0998

hb-view --font-size=110 --margin=2,16,2,16 --output-file=bengali-half-ka-after.png --features=-init,+half --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifBengali-Regular.ttf --unicodes=0995,09cd,0998

montage bengali-half-ka-before.png right-arrow.png bengali-half-ka-after.png -geometry +0+0 -background transparent bengali-half-ka.png

## 3.10 `pstf`

> Same as 2.7

## 3.11 `vatu`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=bengali-vatu-before.png --features=-init,-vatu --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifBengali-Regular.ttf --unicodes=0995,25cc,09cd,09b0

hb-view --font-size=110 --margin=2,16,2,16 --output-file=bengali-vatu-after.png --features=-init,+vatu --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifBengali-Regular.ttf --unicodes=0995,09cd,09b0

montage bengali-vatu-before.png right-arrow.png bengali-vatu-after.png -geometry +0+0 -background transparent bengali-vatu.png


## 3.12 `cjct`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=bengali-cjct-before.png --features=-init,-cjct --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifBengali-Regular.ttf --unicodes=09aa,25cc,09cd,09a4,25cc

hb-view --font-size=110 --margin=2,16,2,16 --output-file=bengali-cjct-after.png --features=-init,+cjct --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifBengali-Regular.ttf --unicodes=09aa,09cd,09a4,25cc

montage bengali-cjct-before.png right-arrow.png bengali-cjct-after.png -geometry +0+0 -background transparent bengali-cjct.png


## 4.2 Pre-base matras

hb-view --font-size=110 --margin=2,16,2,16 --output-file=bengali-matra-position-before.png --features=-init --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifBengali-Regular.ttf --unicodes=09c8,09b8,09cd,09ae,099a

hb-view --font-size=110 --margin=2,16,2,16 --output-file=bengali-matra-position-after.png --features=-init --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifBengali-Regular.ttf --unicodes=09b8,09cd,09ae,099a,09c8

montage bengali-matra-position-before.png right-arrow.png bengali-matra-position-after.png -geometry +0+0 -background transparent bengali-matra-position.png


## 4.3 Reph position

hb-view --font-size=110 --margin=2,16,2,16 --output-file=bengali-reph-position-before.png --features=-init,+rphf --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifBengali-Regular.ttf --unicodes=09f0,09cd,25cc,09a1,09cd,09a1,09cd,0996,09c1

hb-view --font-size=110 --margin=2,16,2,16 --output-file=bengali-reph-position-after.png --features=-init,+rphf --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifBengali-Regular.ttf --unicodes=09f0,09cd,09a1,09cd,09a1,09cd,0996,09c1

montage bengali-reph-position-before.png right-arrow.png bengali-reph-position-after.png -geometry +0+0 -background transparent bengali-reph-position.png

## 5 `init`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=bengali-init-before.png --features=-init --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifBengali-Regular.ttf --unicodes=0999,09c7

hb-view --font-size=110 --margin=2,16,2,16 --output-file=bengali-init-after.png --features=+init --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifBengali-Regular.ttf --unicodes=0999,09c7

montage bengali-init-before.png right-arrow.png bengali-init-after.png -geometry +0+0 -background transparent bengali-init.png

## 5 `pres`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=bengali-pres-before.png --features=-init,-pres --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifBengali-Regular.ttf --unicodes=099f,09bf

hb-view --font-size=110 --margin=2,16,2,16 --output-file=bengali-pres-after.png --features=-init,+pres --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifBengali-Regular.ttf --unicodes=099f,09bf

montage bengali-pres-before.png right-arrow.png bengali-pres-after.png -geometry +0+0 -background transparent bengali-pres.png


## 5 `abvs`

> Note that Noto Bengali implements this feature in a pres lookup for
unknown reasons.

hb-view --font-size=110 --margin=2,25,2,16 --output-file=bengali-abvs-before.png --features=-init,-pres --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifBengali-Regular.ttf --unicodes=09b0,09cd,25cc,09c0,0981

hb-view --font-size=110 --margin=2,16,2,16 --output-file=bengali-abvs-after.png --features=-init,+pres --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifBengali-Regular.ttf --unicodes=09b0,09cd,25cc,09c0,0981

montage bengali-abvs-before.png right-arrow.png bengali-abvs-after.png -geometry +0+0 -background transparent bengali-abvs.png


# 5 `blws`

> Note that Noto Bengali implements this feature in a pres lookup for
unknown reasons.

hb-view --font-size=110 --margin=2,16,2,16 --output-file=bengali-blws-before.png --features=-init,-pres --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifBengali-Regular.ttf --unicodes=099d,09cd,09ac

hb-view --font-size=110 --margin=2,16,2,16 --output-file=bengali-blws-after.png --features=-init,+pres --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifBengali-Regular.ttf --unicodes=099d,09cd,09ac

montage bengali-blws-before.png right-arrow.png bengali-blws-after.png -geometry +0+0 -background transparent bengali-blws.png


## 5 `psts`

> Note that Noto Bengali implements this feature in a pres lookup for
unknown reasons.

hb-view --font-size=110 --margin=2,16,2,16 --output-file=bengali-psts-before.png --features=-init,-pres --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifBengali-Regular.ttf --unicodes=09a0,09c0

hb-view --font-size=110 --margin=2,16,2,16 --output-file=bengali-psts-after.png --features=-init,+pres --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifBengali-Regular.ttf --unicodes=09a0,09c0

montage bengali-psts-before.png right-arrow.png bengali-psts-after.png -geometry +0+0 -background transparent bengali-psts.png


## 5 `haln`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=bengali-haln-before.png --features=-init,-haln --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifBengali-Regular.ttf --unicodes=099b,09bc,09cd

hb-view --font-size=110 --margin=2,16,2,16 --output-file=bengali-haln-after.png --features=-init,+haln --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifBengali-Regular.ttf --unicodes=099b,09bc,09cd

montage bengali-haln-before.png right-arrow.png bengali-haln-after.png -geometry +0+0 -background transparent bengali-haln.png


## 6 `abvm`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=bengali-abvm-before.png --features=-init,-abvm --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifBengali-Regular.ttf --unicodes=0994,0981

hb-view --font-size=110 --margin=2,16,2,16 --output-file=bengali-abvm-after.png --features=-init,+abvm --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifBengali-Regular.ttf --unicodes=0994,0981

montage bengali-abvm-before.png right-arrow.png bengali-abvm-after.png -geometry +0+0 -background transparent bengali-abvm.png


## 6 `blwm`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=bengali-blwm-after.png --features=-init,+blwm --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifBengali-Regular.ttf --unicodes=09ad,09c2

hb-view --font-size=110 --margin=2,16,2,16 --output-file=bengali-blwm-before.png --features=-init,-blwm --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifBengali-Regular.ttf --unicodes=09ad,09c2

montage bengali-blwm-before.png right-arrow.png bengali-blwm-after.png -geometry +0+0 -background transparent bengali-blwm.png


================================================
FILE: images/bengali/bengali-svg-image-generation-log.md
================================================
# Commands used to generate the images in [opentype-shaping-bengali.md](../../opentype-shaping-bengali.md)

## Arrow general

hb-view --font-size=110 --output-file=right-arrow.svg --background=FFFFFF00 --margin=0,0,0,0 /usr/share/fonts/truetype/gentiumplus/GentiumPlus-Regular.ttf --unicodes=2192

cluster_styles = None

> Note: always use `--features=-init` in examples where the `init`
> feature itself is not being explained.


## 2.2 Matra decomposition

hb-view --font-size=110 --margin=2,16,2,16 --output-file=bengali-matra-decompose-before.svg --features=-init --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifBengali-Regular.ttf --unicodes=09cc

hb-view --font-size=110 --margin=2,16,2,16 --output-file=bengali-matra-decompose-after.svg --features=-init --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifBengali-Regular.ttf --unicodes=09c7,200c,25cc,09d7

svg_stack.py --direction=h bengali-matra-decompose-before.svg right-arrow.svg bengali-matra-decompose-after.svg > bengali-matra-decompose.svg

cluster_styles = [c0,dc,c0,arrow,c0,dc,dc,c1]


## 2.7 Post-base consonants

hb-view --font-size=110 --margin=2,16,2,16 --output-file=bengali-yaphala-before.svg --features=-init,-pstf --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifBengali-Regular.ttf --unicodes=25cc,09cd,09af

hb-view --font-size=110 --margin=2,16,2,16 --output-file=bengali-yaphala-after.svg --features=-init,+pstf --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifBengali-Regular.ttf --unicodes=25cc,09cd,09af

svg_stack.py --direction=h bengali-yaphala-before.svg right-arrow.svg bengali-yaphala-after.svg > bengali-yaphala.svg

cluster_styles = [dc,c0,c1,arrow,dc,c1]

#### Duplicates for other subsections

cp bengali-yaphala.svg bengali-yaphala-1.svg

cluster_styles = [dc,c0,c1,arrow,dc,c1]


## 3.2 `nukt`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=bengali-nukt-before.svg --features=-init,-nukt --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifBengali-Regular.ttf --unicodes=09a1,25cc,09bc

hb-view --font-size=110 --margin=2,16,2,16 --output-file=bengali-nukt-after.svg --features=-init,+nukt --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifBengali-Regular.ttf --unicodes=09a1,09bc

svg_stack.py --direction=h bengali-nukt-before.svg right-arrow.svg bengali-nukt-after.svg > bengali-nukt.svg

cluster_styles = [c0,dc,c1,arrow,c0]


## 3.3 `akhn`

### KSsa

hb-view --font-size=110 --margin=2,16,2,16 --output-file=bengali-akhn-kssa-before.svg --features=-init,-akhn --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifBengali-Regular.ttf --unicodes=0995,25cc,09cd,09b7

hb-view --font-size=110 --margin=2,16,2,16 --output-file=bengali-akhn-kssa-after.svg --features=-init,+akhn --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifBengali-Regular.ttf --unicodes=0995,09cd,09b7

svg_stack.py --direction=h bengali-akhn-kssa-before.svg right-arrow.svg bengali-akhn-kssa-after.svg > bengali-akhn-kssa.svg

cluster_styles = [c0,dc,c1,c2,arrow,c0]


### JNya

hb-view --font-size=110 --margin=2,16,2,16 --output-file=bengali-akhn-jnya-before.svg --features=-init,-akhn --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifBengali-Regular.ttf --unicodes=099c,25cc,09cd,099e

hb-view --font-size=110 --margin=2,16,2,16 --output-file=bengali-akhn-jnya-after.svg --features=-init,+akhn --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifBengali-Regular.ttf --unicodes=099c,09cd,099e

svg_stack.py --direction=h bengali-akhn-jnya-before.svg right-arrow.svg bengali-akhn-jnya-after.svg > bengali-akhn-jnya.svg

cluster_styles = [c0,dc,c1,c2,arrow,c0]


## 3.4 `rphf`

### Bengali

hb-view --font-size=110 --margin=2,16,2,16 --output-file=bengali-rphf-before.svg --features=-init,-rphf --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifBengali-Regular.ttf --unicodes=09b0,25cc,09cd

hb-view --font-size=110 --margin=2,16,2,16 --output-file=bengali-rphf-after.svg --features=-init,+rphf --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifBengali-Regular.ttf --unicodes=09b0,09cd,25cc

svg_stack.py --direction=h bengali-rphf-before.svg right-arrow.svg bengali-rphf-after.svg > bengali-rphf.svg

cluster_styles = [c0,dc,c1,arrow,dc,c0]


### Assamese

hb-view --font-size=110 --margin=2,16,2,16 --output-file=bengali-rphf-as-before.svg --features=-init,-rphf --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifBengali-Regular.ttf --unicodes=09f0,25cc,09cd

hb-view --font-size=110 --margin=2,16,2,16 --output-file=bengali-rphf-as-after.svg --features=-init,+rphf --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifBengali-Regular.ttf --unicodes=09f0,09cd,25cc

svg_stack.py --direction=h bengali-rphf-as-before.svg right-arrow.svg bengali-rphf-as-after.svg > bengali-rphf-as.svg

cluster_styles = [c0,dc,c1,arrow,dc,c0]


## 3.7 `blwf`

### Raphala

hb-view --font-size=110 --margin=2,16,2,16 --output-file=bengali-raphala-before.svg --features=-init,-blwf --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifBengali-Regular.ttf --unicodes=25cc,09cd,09b0

hb-view --font-size=110 --margin=2,16,2,16 --output-file=bengali-raphala-after.svg --features=-init,+blwf --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifBengali-Regular.ttf --unicodes=25cc,09cd,09b0

svg_stack.py --direction=h bengali-raphala-before.svg right-arrow.svg bengali-raphala-after.svg > bengali-raphala.svg

cluster_styles = [dc,c0,c1,arrow,dc,c0]

#### Duplicates for other subsections

cp bengali-raphala.svg bengali-raphala-1.svg

cluster_styles = [dc,c0,c1,arrow,dc,c0]

cp bengali-raphala.svg bengali-raphala-2.svg

cluster_styles = [dc,c0,c1,arrow,dc,c0]

### Baphala

hb-view --font-size=110 --margin=2,16,2,16 --output-file=bengali-baphala-before.svg --features=-init,-blwf --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifBengali-Regular.ttf --unicodes=25cc,09cd,09ac

hb-view --font-size=110 --margin=2,16,2,16 --output-file=bengali-baphala-after.svg --features=-init,+blwf --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifBengali-Regular.ttf --unicodes=25cc,09cd,09ac

svg_stack.py --direction=h bengali-baphala-before.svg right-arrow.svg bengali-baphala-after.svg > bengali-baphala.svg

cluster_styles = [dc,c0,c1,arrow,dc,c0]


## 3.9 `half`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=bengali-half-ka-before.svg --features=-init,-half --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifBengali-Regular.ttf --unicodes=0995,25cc,09cd,0998

hb-view --font-size=110 --margin=2,16,2,16 --output-file=bengali-half-ka-after.svg --features=-init,+half --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifBengali-Regular.ttf --unicodes=0995,09cd,0998

svg_stack.py --direction=h bengali-half-ka-before.svg right-arrow.svg bengali-half-ka-after.svg > bengali-half-ka.svg

cluster_styles = [c0,dc,c1,c2,arrow,c0,c2]


## 3.10 `pstf`

> Same as 2.7

## 3.11 `vatu`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=bengali-vatu-before.svg --features=-init,-vatu --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifBengali-Regular.ttf --unicodes=0995,25cc,09cd,09b0

hb-view --font-size=110 --margin=2,16,2,16 --output-file=bengali-vatu-after.svg --features=-init,+vatu --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifBengali-Regular.ttf --unicodes=0995,09cd,09b0

svg_stack.py --direction=h bengali-vatu-before.svg right-arrow.svg bengali-vatu-after.svg > bengali-vatu.svg

cluster_styles = [c0,dc,c1,c2,arrow,c0]


## 3.12 `cjct`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=bengali-cjct-before.svg --features=-init,-cjct --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifBengali-Regular.ttf --unicodes=09aa,25cc,09cd,09a4,25cc

hb-view --font-size=110 --margin=2,16,2,16 --output-file=bengali-cjct-after.svg --features=-init,+cjct --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifBengali-Regular.ttf --unicodes=09aa,09cd,09a4,25cc

svg_stack.py --direction=h bengali-cjct-before.svg right-arrow.svg bengali-cjct-after.svg > bengali-cjct.svg

cluster_styles = [c0,dc,c1,c2,dc,arrow,c0,dc]


## 4.2 Pre-base matras

hb-view --font-size=110 --margin=2,16,2,16 --output-file=bengali-matra-position-before.svg --features=-init --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifBengali-Regular.ttf --unicodes=09c8,09b8,09cd,09ae,099a

hb-view --font-size=110 --margin=2,16,2,16 --output-file=bengali-matra-position-after.svg --features=-init --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifBengali-Regular.ttf --unicodes=09b8,09cd,09ae,099a,09c8

svg_stack.py --direction=h bengali-matra-position-before.svg right-arrow.svg bengali-matra-position-after.svg > bengali-matra-position.svg

cluster_styles = [c0,dc,c1,c2,arrow,c1,c0,c2]


## 4.3 Reph position

hb-view --font-size=110 --margin=2,16,2,16 --output-file=bengali-reph-position-before.svg --features=-init,+rphf --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifBengali-Regular.ttf --unicodes=09f0,09cd,25cc,09a1,09cd,09a1,09cd,0996,09c1

hb-view --font-size=110 --margin=2,16,2,16 --output-file=bengali-reph-position-after.svg --features=-init,+rphf --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifBengali-Regular.ttf --unicodes=09f0,09cd,09a1,09cd,09a1,09cd,0996,09c1

svg_stack.py --direction=h bengali-reph-position-before.svg right-arrow.svg bengali-reph-position-after.svg > bengali-reph-position.svg

cluster_styles = [c0,c1,dc,c2,c3,c4,arrow,c0,c1,c2,c3]


## 5 `init`

> ?? Maybe there's a headline-using second character that would be
> better here....

hb-view --font-size=110 --margin=2,16,2,16 --output-file=bengali-init-before.svg --features=-init --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifBengali-Regular.ttf --unicodes=0999,09c7

hb-view --font-size=110 --margin=2,16,2,16 --output-file=bengali-init-after.svg --features=+init --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifBengali-Regular.ttf --unicodes=0999,09c7

svg_stack.py --direction=h bengali-init-before.svg right-arrow.svg bengali-init-after.svg > bengali-init.svg

cluster_styles = [c0,c1,arrow,c0,c1]


## 5 `pres`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=bengali-pres-before.svg --features=-init,-pres --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifBengali-Regular.ttf --unicodes=099f,09bf

hb-view --font-size=110 --margin=2,16,2,16 --output-file=bengali-pres-after.svg --features=-init,+pres --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifBengali-Regular.ttf --unicodes=099f,09bf

svg_stack.py --direction=h bengali-pres-before.svg right-arrow.svg bengali-pres-after.svg > bengali-pres.svg

cluster_styles = [c0,c1,arrow,c0]


## 5 `abvs`

> Note that Noto Bengali implements this feature in a pres lookup for
> unknown reasons.

> No more!

hb-view --font-size=110 --margin=2,25,2,16 --output-file=bengali-abvs-before.svg --features=-init,-abvs --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifBengali-Regular.ttf --unicodes=09b0,09cd,25cc,09c0,0981

hb-view --font-size=110 --margin=2,16,2,16 --output-file=bengali-abvs-after.svg --features=-init,+abvs --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifBengali-Regular.ttf --unicodes=09b0,09cd,25cc,09c0,0981

svg_stack.py --direction=h bengali-abvs-before.svg right-arrow.svg bengali-abvs-after.svg > bengali-abvs.svg

cluster_styles = [c0,c1,dc,c2,c3,arrow,c0,c1,dc,c2,c3]


# 5 `blws`

> Note that Noto Bengali implements this feature in a pres lookup for
> unknown reasons.

> This now seems to require disablng -cjct and -blws, but -pres is no
> longer involved.

hb-view --font-size=110 --margin=2,16,2,16 --output-file=bengali-blws-before.svg --features=-init,-blws,-cjct --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifBengali-Regular.ttf --unicodes=099d,09cd,09ac

hb-view --font-size=110 --margin=2,16,2,16 --output-file=bengali-blws-after.svg --features=-init,+pres --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifBengali-Regular.ttf --unicodes=099d,09cd,09ac

svg_stack.py --direction=h bengali-blws-before.svg right-arrow.svg bengali-blws-after.svg > bengali-blws.svg

cluster_styles = [c0,c1,arrow,c0]


## 5 `psts`

> Note that Noto Bengali implements this feature in a pres lookup for
> unknown reasons.

> No more!

hb-view --font-size=110 --margin=2,16,2,16 --output-file=bengali-psts-before.svg --features=-init,-psts --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifBengali-Regular.ttf --unicodes=09a0,09c0

hb-view --font-size=110 --margin=2,16,2,16 --output-file=bengali-psts-after.svg --features=-init,+psts --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifBengali-Regular.ttf --unicodes=09a0,09c0

svg_stack.py --direction=h bengali-psts-before.svg right-arrow.svg bengali-psts-after.svg > bengali-psts.svg

cluster_styles = [c0,c1,arrow,c0,c1]


## 5 `haln`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=bengali-haln-before.svg --features=-init,-haln --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifBengali-Regular.ttf --unicodes=099b,09bc,09cd

hb-view --font-size=110 --margin=2,16,2,16 --output-file=bengali-haln-after.svg --features=-init,+haln --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifBengali-Regular.ttf --unicodes=099b,09bc,09cd

svg_stack.py --direction=h bengali-haln-before.svg right-arrow.svg bengali-haln-after.svg > bengali-haln.svg

cluster_styles = [c0,c1,c2,arrow,c0,c1,c2]


## 6 `abvm`

> ????

hb-view --font-size=110 --margin=2,16,2,16 --output-file=bengali-abvm-before.svg --features=-init,-abvm --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifBengali-Regular.ttf --unicodes=0994,0981

hb-view --font-size=110 --margin=2,16,2,16 --output-file=bengali-abvm-after.svg --features=-init,+abvm --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifBengali-Regular.ttf --unicodes=0994,0981

svg_stack.py --direction=h bengali-abvm-before.svg right-arrow.svg bengali-abvm-after.svg > bengali-abvm.svg

cluster_styles = [c0,c1,arrow,c0,c1]


## 6 `blwm`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=bengali-blwm-after.svg --features=-init,+blwm --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifBengali-Regular.ttf --unicodes=09ad,09c2

hb-view --font-size=110 --margin=2,16,2,16 --output-file=bengali-blwm-before.svg --features=-init,-blwm --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifBengali-Regular.ttf --unicodes=09ad,09c2

svg_stack.py --direction=h bengali-blwm-before.svg right-arrow.svg bengali-blwm-after.svg > bengali-blwm.svg

cluster_styles = [c0,c1,arrow,c0,c1]


================================================
FILE: images/devanagari/devanagari-png-image-generation-log.md
================================================
# Commands used to generate the images in [opentype-shaping-devanagari.md](../../opentype-shaping-devanagari.md)

## Arrow general

hb-view --font-size=110 --output-file=right-arrow.png --background=FFFFFF00 --margin=0,0,0,0 /usr/share/fonts/opentype/gentiumplus/GentiumPlus-R.ttf --unicodes=2192


> Note: always use `--features=-init` in examples where the `init`
> feature itself is not being explained.


## 3.1 `locl`

> Note: Noto Devanagari has a 'MAR' locl feature. 


## 3.2 `nukt`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=devanagari-nukt-before.png --features=-init,-nukt --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifDevanagari-Regular.ttf --unicodes=092b,25cc,093c

hb-view --font-size=110 --margin=2,16,2,16 --output-file=devanagari-nukt-after.png --features=-init,+nukt --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifDevanagari-Regular.ttf --unicodes=092b,093c

montage devanagari-nukt-before.png right-arrow.png devanagari-nukt-after.png -geometry +0+0 -background transparent devanagari-nukt.png


## 3.3 `akhn`

### KSsa

hb-view --font-size=110 --margin=2,16,2,16 --output-file=devanagari-akhn-kssa-before.png --features=-init,-akhn --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifDevanagari-Regular.ttf --unicodes=0915,25cc,094d,0937

hb-view --font-size=110 --margin=2,16,2,16 --output-file=devanagari-akhn-kssa-after.png --features=-init,+akhn --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifDevanagari-Regular.ttf --unicodes=0915,094d,0937

montage devanagari-akhn-kssa-before.png right-arrow.png devanagari-akhn-kssa-after.png -geometry +0+0 -background transparent devanagari-akhn-kssa.png

### JNya

hb-view --font-size=110 --margin=2,16,2,16 --output-file=devanagari-akhn-jnya-before.png --features=-init,-akhn --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifDevanagari-Regular.ttf --unicodes=091c,25cc,094d,091e

hb-view --font-size=110 --margin=2,16,2,16 --output-file=devanagari-akhn-jnya-after.png --features=-init,+akhn --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifDevanagari-Regular.ttf --unicodes=091c,094d,091e

montage devanagari-akhn-jnya-before.png right-arrow.png devanagari-akhn-jnya-after.png -geometry +0+0 -background transparent devanagari-akhn-jnya.png


## 3.4 `rphf`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=devanagari-rphf-before.png --features=-init,-rphf --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifDevanagari-Regular.ttf --unicodes=0930,25cc,094d

hb-view --font-size=110 --margin=2,16,2,16 --output-file=devanagari-rphf-after.png --features=-init,+rphf --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifDevanagari-Regular.ttf --unicodes=0930,094d,25cc

montage devanagari-rphf-before.png right-arrow.png devanagari-rphf-after.png -geometry +0+0 -background transparent devanagari-rphf.png


## 3.5 `rkrf`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=devanagari-rkrf-before.png --features=-init,-rkrf,-blwf --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifDevanagari-Regular.ttf --unicodes=091d,25cc,094d,0930

hb-view --font-size=110 --margin=2,16,2,16 --output-file=devanagari-rkrf-after.png --features=-init,+rkrf --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifDevanagari-Regular.ttf --unicodes=091d,094d,0930

montage devanagari-rkrf-before.png right-arrow.png devanagari-rkrf-after.png -geometry +0+0 -background transparent devanagari-rkrf.png


## 3.7 `blwf`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=devanagari-blwf-before.png --features=-init,-blwf --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifDevanagari-Regular.ttf --unicodes=25cc,094d,0930

hb-view --font-size=110 --margin=2,16,2,16 --output-file=devanagari-blwf-after.png --features=-init,+blwf --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifDevanagari-Regular.ttf --unicodes=25cc,094d,0930

montage devanagari-blwf-before.png right-arrow.png devanagari-blwf-after.png -geometry +0+0 -background transparent devanagari-blwf.png


## 3.9 `half`

### Half form

hb-view --font-size=110 --margin=2,16,2,16 --output-file=devanagari-half-before.png --features=-init,-half --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifDevanagari-Regular.ttf --unicodes=0932,094d,0930,25cc,094d

hb-view --font-size=110 --margin=2,16,2,16 --output-file=devanagari-half-after.png --features=-init,+half --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifDevanagari-Regular.ttf --unicodes=0932,094d,0930,094d

montage devanagari-half-before.png right-arrow.png devanagari-half-after.png -geometry +0+0 -background transparent devanagari-half.png

### Eyelash Ra

> Note that Noto Devanagari eyelash-Ra substitution does not appear to
> work when using `U+25cc` dotted circle as the "base consonant"
> substitute. Hence, a real consonant glyph is used instead. But it is
> important that "Ra" _not_ be used as the "base consonant", as this
> triggers "Rakaar".

hb-view --font-size=110 --margin=2,16,2,16 --output-file=devanagari-eyelash-ra-before.png --features=-init,-half --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifDevanagari-Regular.ttf --unicodes=0931,094d,0932

hb-view --font-size=110 --margin=2,16,2,16 --output-file=devanagari-eyelash-ra-after.png --features=-init --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifDevanagari-Regular.ttf --unicodes=0931,094d,0932

montage devanagari-eyelash-ra-before.png right-arrow.png devanagari-eyelash-ra-after.png -geometry +0+0 -background transparent devanagari-eyelash-ra.png


## 3.11 `vatu`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=devanagari-vatu-before.png --features=-init,-vatu --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifDevanagari-Regular.ttf --unicodes=0936,25cc,094d,0930

hb-view --font-size=110 --margin=2,16,2,16 --output-file=devanagari-vatu-after.png --features=-init,+vatu --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifDevanagari-Regular.ttf --unicodes=0936,094d,0930

montage devanagari-vatu-before.png right-arrow.png devanagari-vatu-after.png -geometry +0+0 -background transparent devanagari-vatu.png


## 3.12 `cjct`

> Note: Noto Serif Devanagari implements this as `pres` for unknown
> reasons.

hb-view --font-size=110 --margin=2,16,2,16 --output-file=devanagari-cjct-before.png --features=-init,-pres --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifDevanagari-Regular.ttf --unicodes=0922,25cc,094d,0922

hb-view --font-size=110 --margin=2,16,2,16 --output-file=devanagari-cjct-after.png --features=-init,+pres --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifDevanagari-Regular.ttf --unicodes=0922,094d,0922

montage devanagari-cjct-before.png right-arrow.png devanagari-cjct-after.png -geometry +0+0 -background transparent devanagari-cjct.png


## 4.2 Pre-base matras

hb-view --font-size=110 --margin=2,16,2,16 --output-file=devanagari-matra-position-before.png --features=-init --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifDevanagari-Regular.ttf --unicodes=093f,091e,094d,200c,091e,094d,0939,094d,0930

hb-view --font-size=110 --margin=2,16,2,16 --output-file=devanagari-matra-position-after.png --features=-init --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifDevanagari-Regular.ttf --unicodes=091e,094d,200c,091e,094d,0939,094d,0930,093f

montage devanagari-matra-position-before.png right-arrow.png devanagari-matra-position-after.png -geometry +0+0 -background transparent devanagari-matra-position.png


## 4.3 Reph position

hb-view --font-size=110 --margin=2,16,2,16 --output-file=devanagari-reph-position-before.png --features=-init --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifDevanagari-Regular.ttf --unicodes=0930,094d,25cc,092f,094d,0932,094d,092e,094d,0930

hb-view --font-size=110 --margin=2,16,2,16 --output-file=devanagari-reph-position-after.png --features=-init --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifDevanagari-Regular.ttf --unicodes=0930,094d,092f,094d,0932,094d,092e,094d,0930

montage devanagari-reph-position-before.png right-arrow.png devanagari-reph-position-after.png -geometry +0+0 -background transparent devanagari-reph-position.png


## 5 `init`

> Note: Noto Devanagari and Murty don't implement `init`.


## 5 `pres`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=devanagari-pres-before.png --features=-init,-pres --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifDevanagari-Regular.ttf --unicodes=0916,093f

hb-view --font-size=110 --margin=2,16,2,16 --output-file=devanagari-pres-after.png --features=-init,+pres --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifDevanagari-Regular.ttf --unicodes=0916,093f

montage devanagari-pres-before.png right-arrow.png devanagari-pres-after.png -geometry +0+0 -background transparent devanagari-pres.png


## 5 `abvs`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=devanagari-abvs-before.png --features=-init,-abvs --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifDevanagari-Regular.ttf --unicodes=0930,094d,25cc,0949

hb-view --font-size=110 --margin=2,16,2,16 --output-file=devanagari-abvs-after.png --features=-init,+abvs --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifDevanagari-Regular.ttf --unicodes=0930,094d,25cc,0949

montage devanagari-abvs-before.png right-arrow.png devanagari-abvs-after.png -geometry +0+0 -background transparent devanagari-abvs.png


## 5 `blws`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=devanagari-blws-before.png --features=-init,-blws --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifDevanagari-Regular.ttf --unicodes=0939,0944

hb-view --font-size=110 --margin=2,16,2,16 --output-file=devanagari-blws-after.png --features=-init,+blws --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifDevanagari-Regular.ttf --unicodes=0939,0944

montage devanagari-blws-before.png right-arrow.png devanagari-blws-after.png -geometry +0+0 -background transparent devanagari-blws.png


## 5 `psts`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=devanagari-psts-before.png --features=-init,-psts --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifDevanagari-Regular.ttf --unicodes=092b,093c,0940

hb-view --font-size=110 --margin=2,16,2,16 --output-file=devanagari-psts-after.png --features=-init,+psts --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifDevanagari-Regular.ttf --unicodes=092b,093c,0940

montage devanagari-psts-before.png right-arrow.png devanagari-psts-after.png -geometry +0+0 -background transparent devanagari-psts.png


## 5 `haln`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=devanagari-haln-before.png --features=-init,-haln --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifDevanagari-Regular.ttf --unicodes=25cc,095c,094d

hb-view --font-size=110 --margin=2,16,2,16 --output-file=devanagari-haln-after.png --features=-init,+haln --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifDevanagari-Regular.ttf --unicodes=25cc,095c,094d

montage devanagari-haln-before.png right-arrow.png devanagari-haln-after.png -geometry +0+0 -background transparent devanagari-haln.png


## 6 `abvm`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=devanagari-abvm-before.png --features=-init,-abvm --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifDevanagari-Regular.ttf --unicodes=092b,0948

hb-view --font-size=110 --margin=2,16,2,16 --output-file=devanagari-abvm-after.png --features=-init,+abvm --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifDevanagari-Regular.ttf --unicodes=092b,0948

montage devanagari-abvm-before.png right-arrow.png devanagari-abvm-after.png -geometry +0+0 -background transparent devanagari-abvm.png


## 6 `blwm`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=devanagari-blwm-before.png --features=-init,-blwm --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifDevanagari-Regular.ttf --unicodes=0915,0943

hb-view --font-size=110 --margin=2,16,2,16 --output-file=devanagari-blwm-after.png --features=-init,+blwm --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifDevanagari-Regular.ttf --unicodes=0915,0943

montage devanagari-blwm-before.png right-arrow.png devanagari-blwm-after.png -geometry +0+0 -background transparent devanagari-blwm.png


================================================
FILE: images/devanagari/devanagari-svg-image-generation-log.md
================================================
# Commands used to generate the images in [opentype-shaping-devanagari.md](../../opentype-shaping-devanagari.md)

## Arrow general

hb-view --font-size=110 --output-file=right-arrow.svg --background=FFFFFF00 --margin=0,0,0,0 /usr/share/fonts/opentype/gentiumplus/GentiumPlus-R.ttf --unicodes=2192


> Note: always use `--features=-init` in examples where the `init`
> feature itself is not being explained.


## 3.1 `locl`

> Note: Noto Devanagari has 'NEP' and 'MAR' locl features. 


## 3.2 `nukt`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=devanagari-nukt-before.svg --features=-init,-nukt --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifDevanagari-Regular.ttf --unicodes=092b,25cc,093c

hb-view --font-size=110 --margin=2,16,2,16 --output-file=devanagari-nukt-after.svg --features=-init,+nukt --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifDevanagari-Regular.ttf --unicodes=092b,093c

svg_stack --direction=h devanagari-nukt-before.svg right-arrow.svg devanagari-nukt-after.svg > devanagari-nukt.svg


## 3.3 `akhn`

### KSsa

hb-view --font-size=110 --margin=2,16,2,16 --output-file=devanagari-akhn-kssa-before.svg --features=-init,-akhn --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifDevanagari-Regular.ttf --unicodes=0915,25cc,094d,0937

hb-view --font-size=110 --margin=2,16,2,16 --output-file=devanagari-akhn-kssa-after.svg --features=-init,+akhn --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifDevanagari-Regular.ttf --unicodes=0915,094d,0937

svg_stack --direction=h devanagari-akhn-kssa-before.svg right-arrow.svg devanagari-akhn-kssa-after.svg > devanagari-akhn-kssa.svg

### JNya

hb-view --font-size=110 --margin=2,16,2,16 --output-file=devanagari-akhn-jnya-before.svg --features=-init,-akhn --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifDevanagari-Regular.ttf --unicodes=091c,25cc,094d,091e

hb-view --font-size=110 --margin=2,16,2,16 --output-file=devanagari-akhn-jnya-after.svg --features=-init,+akhn --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifDevanagari-Regular.ttf --unicodes=091c,094d,091e

svg_stack --direction=h devanagari-akhn-jnya-before.svg right-arrow.svg devanagari-akhn-jnya-after.svg > devanagari-akhn-jnya.svg


## 3.4 `rphf`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=devanagari-rphf-before.svg --features=-init,-rphf --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifDevanagari-Regular.ttf --unicodes=0930,25cc,094d

hb-view --font-size=110 --margin=2,16,2,16 --output-file=devanagari-rphf-after.svg --features=-init,+rphf --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifDevanagari-Regular.ttf --unicodes=0930,094d,25cc

svg_stack --direction=h devanagari-rphf-before.svg right-arrow.svg devanagari-rphf-after.svg > devanagari-rphf.svg


## 3.5 `rkrf`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=devanagari-rkrf-before.svg --features=-init,-rkrf,-blwf --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifDevanagari-Regular.ttf --unicodes=091d,25cc,094d,0930

hb-view --font-size=110 --margin=2,16,2,16 --output-file=devanagari-rkrf-after.svg --features=-init,+rkrf --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifDevanagari-Regular.ttf --unicodes=091d,094d,0930

svg_stack --direction=h devanagari-rkrf-before.svg right-arrow.svg devanagari-rkrf-after.svg > devanagari-rkrf.svg


## 3.7 `blwf`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=devanagari-blwf-before.svg --features=-init,-blwf --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifDevanagari-Regular.ttf --unicodes=25cc,094d,0930

hb-view --font-size=110 --margin=2,16,2,16 --output-file=devanagari-blwf-after.svg --features=-init,+blwf --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifDevanagari-Regular.ttf --unicodes=25cc,094d,0930

svg_stack --direction=h devanagari-blwf-before.svg right-arrow.svg devanagari-blwf-after.svg > devanagari-blwf.svg


## 3.9 `half`

### Half form

hb-view --font-size=110 --margin=2,16,2,16 --output-file=devanagari-half-before.svg --features=-init,-half --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifDevanagari-Regular.ttf --unicodes=0932,094d,0930,25cc,094d

hb-view --font-size=110 --margin=2,16,2,16 --output-file=devanagari-half-after.svg --features=-init,+half --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifDevanagari-Regular.ttf --unicodes=0932,094d,0930,094d

svg_stack --direction=h devanagari-half-before.svg right-arrow.svg devanagari-half-after.svg > devanagari-half.svg

### Eyelash Ra

> Note that Noto Devanagari eyelash-Ra substitution does not appear to
> work when using `U+25cc` dotted circle as the "base consonant"
> substitute. Hence, a real consonant glyph is used instead. But it is
> important that "Ra" _not_ be used as the "base consonant", as this
> triggers "Rakaar".

hb-view --font-size=110 --margin=2,16,2,16 --output-file=devanagari-eyelash-ra-before.svg --features=-init,-half --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifDevanagari-Regular.ttf --unicodes=0931,094d,0932

hb-view --font-size=110 --margin=2,16,2,16 --output-file=devanagari-eyelash-ra-after.svg --features=-init --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifDevanagari-Regular.ttf --unicodes=0931,094d,0932

svg_stack --direction=h devanagari-eyelash-ra-before.svg right-arrow.svg devanagari-eyelash-ra-after.svg > devanagari-eyelash-ra.svg


## 3.11 `vatu`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=devanagari-vatu-before.svg --features=-init,-vatu --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifDevanagari-Regular.ttf --unicodes=0936,25cc,094d,0930

hb-view --font-size=110 --margin=2,16,2,16 --output-file=devanagari-vatu-after.svg --features=-init,+vatu --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifDevanagari-Regular.ttf --unicodes=0936,094d,0930

svg_stack --direction=h devanagari-vatu-before.svg right-arrow.svg devanagari-vatu-after.svg > devanagari-vatu.svg


## 3.12 `cjct`

> Note: Noto Serif Devanagari implements this as `pres` for unknown
> reasons.

hb-view --font-size=110 --margin=2,16,2,16 --output-file=devanagari-cjct-before.svg --features=-init,-pres --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifDevanagari-Regular.ttf --unicodes=0922,25cc,094d,0922

hb-view --font-size=110 --margin=2,16,2,16 --output-file=devanagari-cjct-after.svg --features=-init,+pres --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifDevanagari-Regular.ttf --unicodes=0922,094d,0922

svg_stack --direction=h devanagari-cjct-before.svg right-arrow.svg devanagari-cjct-after.svg > devanagari-cjct.svg


## 4.2 Pre-base matras

hb-view --font-size=110 --margin=2,16,2,16 --output-file=devanagari-matra-position-before.svg --features=-init --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifDevanagari-Regular.ttf --unicodes=093f,091e,094d,200c,091e,094d,0939,094d,0930

hb-view --font-size=110 --margin=2,16,2,16 --output-file=devanagari-matra-position-after.svg --features=-init,-pres --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifDevanagari-Regular.ttf --unicodes=091e,094d,200c,091e,094d,0939,094d,0930,093f

svg_stack --direction=h devanagari-matra-position-before.svg right-arrow.svg devanagari-matra-position-after.svg > devanagari-matra-position.svg


## 4.3 Reph position

hb-view --font-size=110 --margin=2,16,2,16 --output-file=devanagari-reph-position-before.svg --features=-init --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifDevanagari-Regular.ttf --unicodes=0930,094d,25cc,092f,094d,0932,094d,092e,094d,0930

hb-view --font-size=110 --margin=2,16,2,16 --output-file=devanagari-reph-position-after.svg --features=-init --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifDevanagari-Regular.ttf --unicodes=0930,094d,092f,094d,0932,094d,092e,094d,0930

svg_stack --direction=h devanagari-reph-position-before.svg right-arrow.svg devanagari-reph-position-after.svg > devanagari-reph-position.svg


## 5 `init`

> Note: Noto Devanagari and Murty don't implement `init`.


## 5 `pres`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=devanagari-pres-before.svg --features=-init,-pres --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifDevanagari-Regular.ttf --unicodes=0916,093f

hb-view --font-size=110 --margin=2,16,2,16 --output-file=devanagari-pres-after.svg --features=-init,+pres --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifDevanagari-Regular.ttf --unicodes=0916,093f

svg_stack --direction=h devanagari-pres-before.svg right-arrow.svg devanagari-pres-after.svg > devanagari-pres.svg


## 5 `abvs`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=devanagari-abvs-before.svg --features=-init,-abvs --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifDevanagari-Regular.ttf --unicodes=0930,094d,25cc,0949

hb-view --font-size=110 --margin=2,16,2,16 --output-file=devanagari-abvs-after.svg --features=-init,+abvs --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifDevanagari-Regular.ttf --unicodes=0930,094d,25cc,0949

svg_stack --direction=h devanagari-abvs-before.svg right-arrow.svg devanagari-abvs-after.svg > devanagari-abvs.svg


## 5 `blws`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=devanagari-blws-before.svg --features=-init,-blws --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifDevanagari-Regular.ttf --unicodes=0939,0944

hb-view --font-size=110 --margin=2,16,2,16 --output-file=devanagari-blws-after.svg --features=-init,+blws --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifDevanagari-Regular.ttf --unicodes=0939,0944

svg_stack --direction=h devanagari-blws-before.svg right-arrow.svg devanagari-blws-after.svg > devanagari-blws.svg


## 5 `psts`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=devanagari-psts-before.svg --features=-init,-psts --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifDevanagari-Regular.ttf --unicodes=092b,093c,0940

hb-view --font-size=110 --margin=2,16,2,16 --output-file=devanagari-psts-after.svg --features=-init,+psts --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifDevanagari-Regular.ttf --unicodes=092b,093c,0940

svg_stack --direction=h devanagari-psts-before.svg right-arrow.svg devanagari-psts-after.svg > devanagari-psts.svg


## 5 `haln`

# look at 0926,093c,094d in serif???

hb-view --font-size=110 --margin=2,16,2,16 --output-file=devanagari-haln-before.svg --features=-init,-haln --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSansDevanagari-Regular.ttf --unicodes=25cc,095d,094d

hb-view --font-size=110 --margin=2,16,2,16 --output-file=devanagari-haln-after.svg --features=-init,+haln --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSansDevanagari-Regular.ttf --unicodes=25cc,095d,094d

svg_stack --direction=h devanagari-haln-before.svg right-arrow.svg devanagari-haln-after.svg > devanagari-haln.svg


## 6 `abvm`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=devanagari-abvm-before.svg --features=-init,-abvm --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifDevanagari-Regular.ttf --unicodes=092b,0948

hb-view --font-size=110 --margin=2,16,2,16 --output-file=devanagari-abvm-after.svg --features=-init,+abvm --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifDevanagari-Regular.ttf --unicodes=092b,0948

svg_stack --direction=h devanagari-abvm-before.svg right-arrow.svg devanagari-abvm-after.svg > devanagari-abvm.svg


## 6 `blwm`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=devanagari-blwm-before.svg --features=-init,-blwm --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifDevanagari-Regular.ttf --unicodes=0915,0943

hb-view --font-size=110 --margin=2,16,2,16 --output-file=devanagari-blwm-after.svg --features=-init,+blwm --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifDevanagari-Regular.ttf --unicodes=0915,0943

svg_stack --direction=h devanagari-blwm-before.svg right-arrow.svg devanagari-blwm-after.svg > devanagari-blwm.svg


================================================
FILE: images/emoji/emoji-png-image-generation-log.md
================================================
# Commands used to generate the images in [opentype-shaping-emoji.md](../../opentype-shaping-emoji.md)

## Arrow general

hb-view --font-size=110 --output-file=right-arrow.png --background=FFFFFF00 --margin=0,0,0,0 /usr/share/fonts/truetype/gentiumplus/GentiumPlus-R.ttf --unicodes=2192


## Invisibles general

> Two options for VS15 and VS16 are included.
>
> Adobe Source Emoji includes non-color glyphs for both codepoints that
> offer a degree of visual communication to prevent confusion in the 
> sequence illustrations, but they are not immediately associated with
> the codepoint itself, like Gentium Plus's are.

hb-view --font-size=110 --margin=2,16,2,16 --preserve-default-ignorables --output-file=text-pres-selector.png --font-funcs=ot --background=FFFFFF00 ./AdobeSourceEmoji/SourceEmoji-BnW.otf --unicodes=fe0e

hb-view --font-size=110 --margin=2,16,2,16 --features="ss06" --shapers=ot --preserve-default-ignorables --output-file=vs15.png --background=FFFFFF00 /usr/share/fonts/truetype/gentiumplus/GentiumPlus-R.ttf --unicodes=fe0e

hb-view --font-size=110 --margin=2,16,2,16 --preserve-default-ignorables --output-file=emoji-pres-selector.png --font-funcs=ot --background=FFFFFF00 ./AdobeSourceEmoji/SourceEmoji-BnW.otf --unicodes=fe0f

hb-view --font-size=110 --margin=2,16,2,16 --features="ss06" --shapers=ot --preserve-default-ignorables --output-file=vs16.png --background=FFFFFF00 /usr/share/fonts/truetype/gentiumplus/GentiumPlus-R.ttf --unicodes=fe0f


> Gentium Plus's ZWJ and ZWNJ are visually preferrable:
hb-view --font-size=110 --margin=2,16,2,16 --features="ss06" --shapers=ot --preserve-default-ignorables --output-file=zwj.png --background=FFFFFF00 /usr/share/fonts/truetype/gentiumplus/GentiumPlus-R.ttf --unicodes=200d

hb-view --font-size=110 --margin=2,16,2,16 --features="ss06" --shapers=ot --preserve-default-ignorables --output-file=zwnj.png --background=FFFFFF00 /usr/share/fonts/truetype/gentiumplus/GentiumPlus-R.ttf --unicodes=200c


## Human beings general

hb-view --font-size=110 --margin=2,16,2,16 --preserve-default-ignorables --font-funcs=ot --output-file=fallback-boy.png --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoColorEmoji.ttf --unicodes=1f466

hb-view --font-size=110 --margin=2,16,2,16 --preserve-default-ignorables --font-funcs=ot --output-file=fallback-girl.png --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoColorEmoji.ttf --unicodes=1f467

hb-view --font-size=110 --margin=2,16,2,16 --preserve-default-ignorables --font-funcs=ot --output-file=fallback-man.png --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoColorEmoji.ttf --unicodes=1f468

hb-view --font-size=110 --margin=2,16,2,16 --preserve-default-ignorables --font-funcs=ot --output-file=fallback-woman.png --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoColorEmoji.ttf --unicodes=1f469

hb-view --font-size=110 --margin=2,16,2,16 --preserve-default-ignorables --font-funcs=ot --output-file=fallback-generalperson.png --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoColorEmoji.ttf --unicodes=1f9d1


## Presentation sequences

> Codepoints used are based on defaults in https://www.unicode.org/emoji/charts-14.0/text-style.html

> Invisibles:
montage vs15.png text-pres-selector.png -geometry +0+0 -background transparent -tile 2x1 text-presentation.png

montage vs16.png emoji-pres-selector.png -geometry +0+0 -background transparent -tile 2x1 emoji-presentation.png


> Default text-presentation: U+26A0, warning arrow:
hb-view --font-size=110 --margin=2,16,2,16 --preserve-default-ignorables --font-funcs=ot --output-file=default-text-before.png --background=FFFFFF00 ./Noto\ Emoji\ BW-via-GoogleFonts/static/NotoEmoji-Regular.ttf --unicodes=26a0

hb-view --font-size=110 --margin=2,16,2,16 --preserve-default-ignorables --font-funcs=ot --output-file=default-text-after.png --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoColorEmoji.ttf --unicodes=26a0

montage default-text-before.png emoji-pres-selector.png right-arrow.png default-text-after.png -geometry +0+0 -background transparent -tile 4x1 emoji-pres-sequence.png


> Default emoji-presentation: U+231B, hourglass:
hb-view --font-size=110 --margin=2,16,2,16 --preserve-default-ignorables --font-funcs=ot --output-file=default-emoji-before.png --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoColorEmoji.ttf --unicodes=231b

hb-view --font-size=110 --margin=2,16,2,16 --preserve-default-ignorables --font-funcs=ot --output-file=default-emoji-after.png --background=FFFFFF00 ./Noto\ Emoji\ BW-via-GoogleFonts/static/NotoEmoji-Regular.ttf --unicodes=231b

montage default-emoji-before.png text-pres-selector.png right-arrow.png default-emoji-after.png -geometry +0+0 -background transparent -tile 4x1 text-pres-sequence.png


## Modifier sequences

> Adobe Source Emoji for invisibles:
hb-view --font-size=110 --margin=2,16,2,16 --preserve-default-ignorables --output-file=skintone-2.png --background=FFFFFF00 ./AdobeSourceEmoji/SourceEmoji-BnW.otf --unicodes=1f3fb

hb-view --font-size=110 --margin=2,16,2,16 --preserve-default-ignorables --output-file=skintone-3.png --background=FFFFFF00 ./AdobeSourceEmoji/SourceEmoji-BnW.otf --unicodes=1f3fc

hb-view --font-size=110 --margin=2,16,2,16 --preserve-default-ignorables --output-file=skintone-4.png --background=FFFFFF00 ./AdobeSourceEmoji/SourceEmoji-BnW.otf --unicodes=1f3fd

hb-view --font-size=110 --margin=2,16,2,16 --preserve-default-ignorables --output-file=skintone-5.png --background=FFFFFF00 ./AdobeSourceEmoji/SourceEmoji-BnW.otf --unicodes=1f3fe

hb-view --font-size=110 --margin=2,16,2,16 --preserve-default-ignorables --output-file=skintone-6.png --background=FFFFFF00 ./AdobeSourceEmoji/SourceEmoji-BnW.otf --unicodes=1f3ff


> NotoColorEmoji squares for fallback skin-tone-squares:
hb-view --font-size=110 --margin=2,16,2,16 --preserve-default-ignorables --font-funcs=ot --output-file=skintone-fallback-2.png --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoColorEmoji.ttf --unicodes=1f3fb

hb-view --font-size=110 --margin=2,16,2,16 --preserve-default-ignorables --font-funcs=ot --output-file=skintone-fallback-3.png --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoColorEmoji.ttf --unicodes=1f3fc

hb-view --font-size=110 --margin=2,16,2,16 --preserve-default-ignorables --font-funcs=ot --output-file=skintone-fallback-4.png --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoColorEmoji.ttf --unicodes=1f3fd

hb-view --font-size=110 --margin=2,16,2,16 --preserve-default-ignorables --font-funcs=ot --output-file=skintone-fallback-5.png --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoColorEmoji.ttf --unicodes=1f3fe

hb-view --font-size=110 --margin=2,16,2,16 --preserve-default-ignorables --font-funcs=ot --output-file=skintone-fallback-6.png --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoColorEmoji.ttf --unicodes=1f3ff


> Symbola for text-mode fallback squares:
hb-view --font-size=110 --margin=2,16,2,16 --preserve-default-ignorables --output-file=skintone-2-text-fallback.png --background=FFFFFF00 /usr/share/fonts/truetype/ancient-scripts/Symbola_hint.ttf --unicodes=1f3fb

hb-view --font-size=110 --margin=2,16,2,16 --preserve-default-ignorables --output-file=skintone-3-text-fallback.png --background=FFFFFF00 /usr/share/fonts/truetype/ancient-scripts/Symbola_hint.ttf --unicodes=1f3fc

hb-view --font-size=110 --margin=2,16,2,16 --preserve-default-ignorables --output-file=skintone-4-text-fallback.png --background=FFFFFF00 /usr/share/fonts/truetype/ancient-scripts/Symbola_hint.ttf --unicodes=1f3fd

hb-view --font-size=110 --margin=2,16,2,16 --preserve-default-ignorables --output-file=skintone-5-text-fallback.png --background=FFFFFF00 /usr/share/fonts/truetype/ancient-scripts/Symbola_hint.ttf --unicodes=1f3fe

hb-view --font-size=110 --margin=2,16,2,16 --preserve-default-ignorables --output-file=skintone-6-text-fallback.png --background=FFFFFF00 /usr/share/fonts/truetype/ancient-scripts/Symbola_hint.ttf --unicodes=1f3ff


> Fitzpatrick scale:
montage skintone-2.png skintone-fallback-2.png -geometry +0+0 -background transparent -tile 2x1 fitzpatrick-2.png

montage skintone-3.png skintone-fallback-3.png -geometry +0+0 -background transparent -tile 2x1 fitzpatrick-3.png

montage skintone-4.png skintone-fallback-4.png -geometry +0+0 -background transparent -tile 2x1 fitzpatrick-4.png

montage skintone-5.png skintone-fallback-5.png -geometry +0+0 -background transparent -tile 2x1 fitzpatrick-5.png

montage skintone-6.png skintone-fallback-6.png -geometry +0+0 -background transparent -tile 2x1 fitzpatrick-6.png


> Fitzpatrick scale text fallback:
> (currently unused)
montage skintone-2.png skintone-2-text-fallback.png -geometry +0+0 -background transparent -tile 2x1 fitzpatrick-2-text-fallback.png

montage skintone-3.png skintone-3-text-fallback.png -geometry +0+0 -background transparent -tile 2x1 fitzpatrick-3-text-fallback.png

montage skintone-4.png skintone-4-text-fallback.png -geometry +0+0 -background transparent -tile 2x1 fitzpatrick-4-text-fallback.png

ontage skintone-5.png skintone-5-text-fallback.png -geometry +0+0 -background transparent -tile 2x1 fitzpatrick-5-text-fallback.png

montage skintone-6.png skintone-6-text-fallback.png -geometry +0+0 -background transparent -tile 2x1 fitzpatrick-6-text-fallback.png


> Sequence:
hb-view --font-size=110 --margin=2,16,2,16 --preserve-default-ignorables --font-funcs=ot --output-file=modifier-before.png --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoColorEmoji.ttf --unicodes=1f44b

hb-view --font-size=110 --margin=2,16,2,16 --preserve-default-ignorables --font-funcs=ot --output-file=modifier-after.png --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoColorEmoji.ttf --unicodes=1f44b,1f3fd

montage modifier-before.png skintone-4.png right-arrow.png modifier-after.png -geometry +0+0 -background transparent -tile 4x1 modifier-sequence.png

montage modifier-before.png skintone-4.png right-arrow.png modifier-before.png skintone-fallback-4.png -geometry +0+0 -background transparent -tile 5x1 modifier-sequence-fallback.png


## Regional Indicator flag sequences

> UN chosen for maximum acheivable internationality

> Sequence
hb-view --font-size=110 --margin=2,16,2,16 --preserve-default-ignorables --output-file=regional-flag-un-before.png --background=FFFFFF00 ./AdobeSourceEmoji/SourceEmoji-BnW.otf --unicodes=1f1fa,1f1f3

hb-view --font-size=110 --margin=2,16,2,16 --preserve-default-ignorables --font-funcs=ot --output-file=regional-flag-un-after.png --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoColorEmoji.ttf --unicodes=1f1fa,1f1f3

montage regional-flag-un-before.png right-arrow.png regional-flag-un-after.png -geometry +0+0 -background transparent regional-indicator-flag-sequence-un.png

> Fallbacks

hb-view --font-size=110 --margin=2,16,2,16 --preserve-default-ignorables --font-funcs=ot --output-file=regional-flag-un-fallback.png --background=FFFFFF00 ./Noto\ Emoji\ BW-via-GoogleFonts/static/NotoEmoji-Regular.ttf --unicodes=1f1fa,1f1f3

montage regional-flag-un-before.png right-arrow.png regional-flag-un-fallback.png -geometry +0+0 -background transparent regional-indicator-flag-sequence-un-fallback.png


## Tag flag sequences

> From https://unicode.org/reports/tr51/#valid-emoji-tag-sequences
>
> Wales chosen to be most distinctive example KNOWN to be widely implemented


> Tag pseudo-glyphs

> LastResort. This crops in, non-precisely, on the "tag" symbol itself:
hb-view --font-size=110 --margin=-42,-28,-44,-28 --preserve-default-ignorables --output-file=tag-isolate.png --background=FFFFFF00 ./unicode/Last-Resort/LastResort-Regular.ttf --unicodes=e007f

> Margin-adjusted versions:
hb-view --font-size=110 --margin=-20,-28,-48,-28 --preserve-default-ignorables --output-file=tag-isolate-high.png --background=FFFFFF00 ./unicode/Last-Resort/LastResort-Regular.ttf --unicodes=e007f

hb-view --font-size=110 --margin=-44,-28,-24,-28 --preserve-default-ignorables --output-file=tag-isolate-low.png --background=FFFFFF00 ./unicode/Last-Resort/LastResort-Regular.ttf --unicodes=e007f


> DejaVu empty dotted square, U+2B1A:
hb-view --font-size=110 --margin=-16,2,2,2 --preserve-default-ignorables --output-file=dotted-square.png --background=FFFFFF00 /usr/share/fonts/truetype/dejavu/DejaVuSans.ttf --unicodes=2b1a


> Letters and "end" components:
hb-view --font-size=40 --margin=16,2,2,2 --preserve-default-ignorables --output-file=g-isolate.png --background=FFFFFF00 /usr/share/fonts/truetype/dejavu/DejaVuSans.ttf --unicodes=0067

hb-view --font-size=40 --margin=16,2,2,2 --preserve-default-ignorables --output-file=b-isolate.png --background=FFFFFF00 /usr/share/fonts/truetype/dejavu/DejaVuSans.ttf --unicodes=0062

hb-view --font-size=40 --margin=16,2,2,2 --preserve-default-ignorables --output-file=w-isolate.png --background=FFFFFF00 /usr/share/fonts/truetype/dejavu/DejaVuSans.ttf --unicodes=0077

hb-view --font-size=40 --margin=16,2,2,2 --preserve-default-ignorables --output-file=l-isolate.png --background=FFFFFF00 /usr/share/fonts/truetype/dejavu/DejaVuSans.ttf --unicodes=006c

hb-view --font-size=40 --margin=16,2,2,2 --preserve-default-ignorables --output-file=s-isolate.png --background=FFFFFF00 /usr/share/fonts/truetype/dejavu/DejaVuSans.ttf --unicodes=0073

hb-view --font-size=32 --margin=2,2,16,2 --preserve-default-ignorables --output-file=end-isolate.png --background=FFFFFF00 /usr/share/fonts/truetype/dejavu/DejaVuSans.ttf --unicodes=0045,004e,0044


> Composite tags:

composite -gravity north tag-isolate-high.png dotted-square.png blank-tag-high.png

composite -gravity south tag-isolate-low.png dotted-square.png blank-tag-low.png

composite -gravity north g-isolate.png blank-tag-low.png tag-g.png

composite -gravity north b-isolate.png blank-tag-low.png tag-b.png

composite -gravity north w-isolate.png blank-tag-low.png tag-w.png

composite -gravity north l-isolate.png blank-tag-low.png tag-l.png

composite -gravity north s-isolate.png blank-tag-low.png tag-s.png

composite -gravity south end-isolate.png blank-tag-high.png tag-end.png


> Completed sequence:

hb-view --font-size=110 --margin=2,16,2,16 --preserve-default-ignorables --font-funcs=ot --output-file=tag-flag-black.png --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoColorEmoji.ttf --unicodes=1f3f4

hb-view --font-size=110 --margin=2,16,2,16 --preserve-default-ignorables --font-funcs=ot --output-file=tag-flag-wales-after.png --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoColorEmoji.ttf --unicodes=1f3f4,e0067,e0062,e0077,e006c,e0073,e007f

montage tag-flag-black.png tag-g.png tag-b.png tag-w.png tag-l.png tag-s.png tag-end.png -geometry +0+0 -background transparent -tile 7x1 tag-flag-wales-before.png

montage tag-flag-wales-before.png right-arrow.png tag-flag-wales-after.png -geometry +0+0 -background transparent tag-flag-sequence-wales.png


## Keycap sequences

> Noto Sans Symbols has a visual CEK:
hb-view --font-size=110 --margin=2,64,2,64 --preserve-default-ignorables --output-file=keycap-cek.png --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSansSymbols-Regular.ttf --unicodes=20e3


> Sequence:
hb-view --font-size=110 --margin=2,16,2,16 --preserve-default-ignorables --font-funcs=ot --output-file=keycap-before.png --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerif-Regular.ttf --unicodes=0034

hb-view --font-size=110 --margin=2,16,2,16 --preserve-default-ignorables --font-funcs=ot --output-file=keycap-after.png --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoColorEmoji.ttf --unicodes=0034,20e3

> Something off here; the -gravity and -geometry switches are *not* working as expected....
montage keycap-before.png keycap-cek.png right-arrow.png keycap-after.png -geometry +0+0 -background transparent -tile 4x1 keycap-sequence.png


## ZWJ sequences

### ZWJ hair sequences

> Hairstyles:
hb-view --font-size=110 --margin=2,16,2,16 --preserve-default-ignorables --font-funcs=ot --output-file=hair-red.png --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoColorEmoji.ttf --unicodes=1f9b0

hb-view --font-size=110 --margin=2,16,2,16 --preserve-default-ignorables --font-funcs=ot --output-file=hair-curly.png --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoColorEmoji.ttf --unicodes=1f9b1

hb-view --font-size=110 --margin=2,16,2,16 --preserve-default-ignorables --font-funcs=ot --output-file=hair-bald.png --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoColorEmoji.ttf --unicodes=1f9b2

hb-view --font-size=110 --margin=2,16,2,16 --preserve-default-ignorables --font-funcs=ot --output-file=hair-white.png --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoColorEmoji.ttf --unicodes=1f9b3


> Sequence:
hb-view --font-size=110 --margin=2,16,2,16 --preserve-default-ignorables --font-funcs=ot --output-file=woman-white-hair.png --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoColorEmoji.ttf --unicodes=1F469,200d,1f9b3

montage fallback-woman.png zwj.png hair-white.png right-arrow.png woman-white-hair.png -geometry +0+0 -background transparent -tile 5x1 hairstyle-sequence.png


### ZWJ gendered person sequences

> Gender signs, input (text-presentation style):
hb-view --font-size=110 --margin=2,16,2,16 --preserve-default-ignorables --font-funcs=ot --output-file=gendersign-female.png --background=FFFFFF00 ./AdobeSourceEmoji/SourceEmoji-BnW.otf --unicodes=2640

hb-view --font-size=110 --margin=2,16,2,16 --preserve-default-ignorables --font-funcs=ot --output-file=gendersign-male.png --background=FFFFFF00 ./AdobeSourceEmoji/SourceEmoji-BnW.otf --unicodes=2642

> Gender signs, output fallback (emoji-presentation style):
hb-view --font-size=110 --margin=2,16,2,16 --preserve-default-ignorables --font-funcs=ot --output-file=gendersign-female-fallback.png --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoColorEmoji.ttf --unicodes=2640

hb-view --font-size=110 --margin=2,16,2,16 --preserve-default-ignorables --font-funcs=ot --output-file=gendersign-male-fallback.png --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoColorEmoji.ttf --unicodes=2642


> Sequence:
hb-view --font-size=110 --margin=2,16,2,16 --preserve-default-ignorables --font-funcs=ot --output-file=gendered-person-before.png --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoColorEmoji.ttf --unicodes=1F3c4

hb-view --font-size=110 --margin=2,16,2,16 --preserve-default-ignorables --font-funcs=ot --output-file=gendered-person-after.png --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoColorEmoji.ttf --unicodes=1F3c4,200d,2640,fe0f

montage gendered-person-before.png zwj.png gendersign-female.png emoji-pres-selector.png right-arrow.png gendered-person-after.png -geometry +0+0 -background transparent -tile 6x1 gendered-person-sequence.png


### ZWJ multi-person group sequences

>
> Couple with heart:
hb-view --font-size=110 --margin=2,16,2,16 --preserve-default-ignorables --font-funcs=ot --output-file=heart.png --background=FFFFFF00 ./AdobeSourceEmoji/SourceEmoji-BnW.otf --unicodes=2764

hb-view --font-size=110 --margin=2,16,2,16 --preserve-default-ignorables --font-funcs=ot --output-file=multi-person-man-heart-man-after.png --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoColorEmoji.ttf --unicodes=1f468,200d,2764,fe0f,200d,1f468

hb-view --font-size=110 --margin=2,16,2,16 --preserve-default-ignorables --font-funcs=ot --output-file=multi-person-man-skintone-2-heart-man-skintone-2-after.png --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoColorEmoji.ttf --unicodes=1f468,1f3fb,200d,2764,fe0f,200d,1f468,1f3fb

> Sequence:
montage fallback-man.png zwj.png heart.png emoji-pres-selector.png zwj.png fallback-man.png right-arrow.png multi-person-man-heart-man-after.png -geometry +0+0 -background transparent -tile 8x1 multi-person-heart-sequence.png

montage fallback-man.png skintone-2.png zwj.png heart.png emoji-pres-selector.png zwj.png fallback-man.png skintone-2.png right-arrow.png multi-person-man-skintone-2-heart-man-skintone-2-after.png -geometry +0+0 -background transparent -tile 10x1 multi-person-heart-skintone-sequence.png


>
> Couple kiss:
hb-view --font-size=110 --margin=2,16,2,16 --preserve-default-ignorables --font-funcs=ot --output-file=kiss-mark.png --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoColorEmoji.ttf --unicodes=1f48b

hb-view --font-size=110 --margin=2,16,2,16 --preserve-default-ignorables --font-funcs=ot --output-file=multi-person-kiss-sequence-after.png --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoColorEmoji.ttf --unicodes=1f469,200d,2764,fe0f,200d,1f48b,200d,1f468

hb-view --font-size=110 --margin=2,16,2,16 --preserve-default-ignorables --font-funcs=ot --output-file=multi-person-kiss-sequence-skintone-after.png --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoColorEmoji.ttf --unicodes=1f469,1f3ff,200d,2764,fe0f,200d,1f48b,200d,1f468,1f3fd

> Sequence:
montage fallback-woman.png zwj.png heart.png emoji-pres-selector.png zwj.png kiss-mark.png zwj.png fallback-man.png right-arrow.png multi-person-kiss-sequence-after.png -geometry +0+0 -background transparent -tile 10x1 multi-person-kiss-sequence.png

montage fallback-woman.png skintone-6.png zwj.png heart.png emoji-pres-selector.png zwj.png kiss-mark.png zwj.png fallback-man.png skintone-4.png right-arrow.png multi-person-kiss-sequence-skintone-after.png -geometry +0+0 -background transparent -tile 12x1 multi-person-kiss-skintone-sequence.png


>
> Couple holding hands:
hb-view --font-size=110 --margin=2,16,2,16 --preserve-default-ignorables --font-funcs=ot --output-file=handshake.png --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoColorEmoji.ttf --unicodes=1f91d

> Noto Color Emoji does not support this sequence, perhaps because it expects
> fallback to `U+1F46D` to occur, e.g. at the keyboard/UI level....
hb-view --font-size=110 --margin=2,16,2,16 --preserve-default-ignorables --font-funcs=ot --output-file=multi-person-holding-hands-sequence-woman-woman-after.png --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoColorEmoji.ttf --unicodes=1f46d

> This modifier sequence IS supported in Noto Color Emoji
hb-view --font-size=110 --margin=2,16,2,16 --preserve-default-ignorables --font-funcs=ot --output-file=multi-person-holding-hands-sequence-woman-woman-skintone-after.png --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoColorEmoji.ttf --unicodes=1f469,1f3fc,200d,1f91d,200d,1f469,1f3fe

> Sequence:
montage fallback-woman.png zwj.png handshake.png zwj.png fallback-woman.png right-arrow.png multi-person-holding-hands-sequence-woman-woman-after.png -geometry +0+0 -background transparent -tile 7x1 multi-person-holding-hands-sequence.png

montage fallback-woman.png skintone-3.png zwj.png handshake.png zwj.png fallback-woman.png skintone-5.png right-arrow.png multi-person-holding-hands-sequence-woman-woman-skintone-after.png -geometry +0+0 -background transparent -tile 9x1 multi-person-holding-hands-skintone-sequence.png


>
> Family:
>
> Noto Color Emoji supports "Man,ZWJ,Woman,ZWJ,_child_" but not "Woman,ZWJ,Man,ZWJ,_child_".
>
> Noto Color Emoji supports "Woman,ZWJ,Woman,ZWJ,Girl,ZWJ,Boy" but not "Woman,ZWJ,Woman,ZWJ,Boy,ZWJ,Girl".
hb-view --font-size=110 --margin=2,16,2,16 --preserve-default-ignorables --font-funcs=ot --output-file=multi-person-family-man-boy-after.png --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoColorEmoji.ttf --unicodes=1f468,200d,1f466

hb-view --font-size=110 --margin=2,16,2,16 --preserve-default-ignorables --font-funcs=ot --output-file=multi-person-family-man-girl-girl-after.png --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoColorEmoji.ttf --unicodes=1f468,200d,1f467,200d,1f467

hb-view --font-size=110 --margin=2,16,2,16 --preserve-default-ignorables --font-funcs=ot --output-file=multi-person-family-man-woman-girl-after.png --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoColorEmoji.ttf --unicodes=1f468,200d,1f469,200d,1f467

hb-view --font-size=110 --margin=2,16,2,16 --preserve-default-ignorables --font-funcs=ot --output-file=multi-person-family-woman-woman-girl-boy-after.png --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoColorEmoji.ttf --unicodes=1f469,200d,1f469,200d,1f467,200d,1f466

> Sequence:
montage fallback-man.png zwj.png fallback-boy.png right-arrow.png multi-person-family-man-boy-after.png -geometry +0+0 -background transparent -tile 5x1 multi-person-family-man-boy-sequence.png

montage fallback-man.png zwj.png fallback-girl.png zwj.png fallback-girl.png right-arrow.png multi-person-family-man-girl-girl-after.png -geometry +0+0 -background transparent -tile 7x1 multi-person-family-man-girl-girl-sequence.png

montage fallback-man.png zwj.png fallback-woman.png zwj.png fallback-girl.png right-arrow.png multi-person-family-man-woman-girl-after.png -geometry +0+0 -background transparent -tile 7x1 multi-person-family-man-woman-girl-sequence.png

montage fallback-woman.png zwj.png fallback-woman.png zwj.png fallback-girl.png zwj.png fallback-boy.png right-arrow.png multi-person-family-woman-woman-girl-boy-after.png -geometry +0+0 -background transparent -tile 9x1 multi-person-family-woman-woman-girl-boy-sequence.png

>
> Noto Color Emoji does not seem to support skin-tone modifiers for family sequences,
> at least in the current release on my system.
>
>


>
> Shaking hands:
hb-view --font-size=110 --margin=2,16,2,16 --preserve-default-ignorables --font-funcs=ot --output-file=hand-left.png --background=FFFFFF00 ./blobmoji/Blobmoji.ttf --unicodes=1faf1

hb-view --font-size=110 --margin=2,16,2,16 --preserve-default-ignorables --font-funcs=ot --output-file=hand-right.png --background=FFFFFF00 ./blobmoji/Blobmoji.ttf --unicodes=1faf2

hb-view --font-size=110 --margin=2,16,2,16 --preserve-default-ignorables --font-funcs=ot --output-file=multi-person-shaking-hands-after.png --background=FFFFFF00 ./blobmoji/Blobmoji.ttf --unicodes=1faf1,1f3fd,200d,1faf2,1f3ff

> Sequence:
montage hand-left.png skintone-4.png zwj.png hand-right.png skintone-6.png right-arrow.png multi-person-handshake-after.png -geometry +0+0 -background transparent -tile 7x1 multi-person-shaking-hands-sequence.png


### ZWJ role sequences

> Firefighter (emoji-pres-by-default):
hb-view --font-size=110 --margin=2,16,2,16 --preserve-default-ignorables --font-funcs=ot --output-file=firetruck.png --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoColorEmoji.ttf --unicodes=1F692

hb-view --font-size=110 --margin=2,16,2,16 --preserve-default-ignorables --font-funcs=ot --output-file=role-firefighter-man-after.png --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoColorEmoji.ttf --unicodes=1f468,200d,1F692

hb-view --font-size=110 --margin=2,16,2,16 --preserve-default-ignorables --font-funcs=ot --output-file=role-firefighter-man-skintone-6-after.png --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoColorEmoji.ttf --unicodes=1f468,1f3ff,200d,1F692

montage fallback-man.png zwj.png firetruck.png right-arrow.png role-firefighter-man-after.png -geometry +0+0 -background transparent -tile 5x1 role-sequence-firefighter.png

montage fallback-man.png skintone-6.png zwj.png firetruck.png right-arrow.png role-firefighter-man-skintone-6-after.png -geometry +0+0 -background transparent -tile 6x1 role-sequence-firefighter-skintone-6.png


> Pilot (non-emoji-pres-by-default)
hb-view --font-size=110 --margin=2,16,2,16 --preserve-default-ignorables --font-funcs=ot --output-file=airplane.png --background=FFFFFF00 ./Noto\ Emoji\ BW-via-GoogleFonts/static/NotoEmoji-Regular.ttf --unicodes=2708,fe0e

hb-view --font-size=110 --margin=2,16,2,16 --preserve-default-ignorables --font-funcs=ot --output-file=role-pilot-woman-after.png --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoColorEmoji.ttf --unicodes=1f469,200d,2708,fe0f

hb-view --font-size=110 --margin=2,16,2,16 --preserve-default-ignorables --font-funcs=ot --output-file=role-pilot-woman-skintone-2-after.png --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoColorEmoji.ttf --unicodes=1f469,1f3fb,200d,2708,fe0f

montage fallback-woman.png zwj.png airplane.png emoji-pres-selector.png right-arrow.png role-pilot-woman-after.png -geometry +0+0 -background transparent -tile 6x1 role-sequence-pilot.png

montage fallback-woman.png skintone-2.png zwj.png airplane.png emoji-pres-selector.png right-arrow.png role-pilot-woman-skintone-2-after.png -geometry +0+0 -background transparent -tile 7x1 role-sequence-pilot-skintone-2.png


### ZWJ color sequences

> Noto Color Emoji, "black cat" seems to be the only widely-implemented sequence:
hb-view --font-size=110 --margin=2,16,2,16 --preserve-default-ignorables --font-funcs=ot --output-file=color-before.png --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoColorEmoji.ttf --unicodes=1F408

hb-view --font-size=110 --margin=2,16,2,16 --preserve-default-ignorables --font-funcs=ot --output-file=color-after.png --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoColorEmoji.ttf --unicodes=1F408,200d,2b1b

hb-view --font-size=110 --margin=2,16,2,16 --preserve-default-ignorables --font-funcs=ot --output-file=color-black.png --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoColorEmoji.ttf --unicodes=2b1b

montage color-before.png zwj.png color-black.png right-arrow.png color-after.png -geometry +0+0 -background transparent -tile 5x1 color-sequence.png


### ZWJ directionality sequences

> These are not widely implemented, and possible not implemented at all in open-source fonts.
hb-view --font-size=110 --margin=2,16,2,16 --preserve-default-ignorables --font-funcs=ot --output-file=running.png --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoColorEmoji.ttf --unicodes=1f3c3

hb-view --font-size=110 --margin=2,16,2,16 --preserve-default-ignorables --font-funcs=ot --output-file=direction-rightward.png --background=FFFFFF00 ./Noto\ Emoji\ BW-via-GoogleFonts/static/NotoEmoji-Regular.ttf --unicodes=27a1

convert -flop running.png running-rightward.png

montage running.png zwj.png direction-rightward.png emoji-pres-selector.png right-arrow.png running-rightward.png -geometry +0+0 -background transparent -tile 6x1 zwj-directionality-sequence.png


### ZWJ additional sequences

> 13 on the named list. Heart-on-fire is clearly not just an overlay, and not a flag:
hb-view --font-size=110 --margin=2,16,2,16 --preserve-default-ignorables --font-funcs=ot --output-file=fire.png --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoColorEmoji.ttf --unicodes=1f525

hb-view --font-size=110 --margin=2,16,2,16 --preserve-default-ignorables --font-funcs=ot --output-file=heart-on-fire-after.png --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoColorEmoji.ttf --unicodes=2764,fe0f,200d,1f525

montage heart.png emoji-pres-selector.png zwj.png fire.png right-arrow.png heart-on-fire-after.png -geometry +0+0 -background transparent -tile 6x1 zwj-sequence-heart-on-fire.png


## Other sequences and ligatures

> For non-standard ligatures (banana split?)

> Adobe SourceEmoji has a "hand + zwj + heart -> i-love-you-hand" ligature:


>Other possiblities:
> https://blog.emojipedia.org/emoji-flags-explained/ (discusses flag options for vendors that don't require Unicode pre-/formal-approval)
> Noto monochromatic emoji details: https://blog.emojipedia.org/exploring-googles-new-black-and-blobby-emoji-font/
> Are Noto's "Emoji Kitchen" emoji (beginning with magic wand) just ligatures, or all they all "stickers"? https://blog.emojipedia.org/emoji-kitchen-beta-magics-back-the-blobs/


================================================
FILE: images/example-fonts.txt
================================================
#############################################################
#                                                           #
# All uncommented lines in this file are URLs which should  #
# be downloadable via wget, cURL, or scripts.               #
#                                                           #
# SHA checksums are for the exact file downloaded at the    #
# URL (including archive files), not for the .ttf/.otf/.ttc #
# within.                                                   #
#                                                           #
# If any CDN links change or URLs break, please open an     #
# issue in this project's bug tracker, not the font's.      #
#                                                           #
#############################################################

##########################
# General
#

# GentiumPlus-R.ttf
# sha256: 5244209b44a5111736379686119cd54042dce18e308a351c366999ac563ca6bb
https://software.sil.org/downloads/r/gentium/GentiumPlus-6.101.zip


##########################
# Arabic-like
#

# Arabic

# NotoNaskhArabic-Regular.ttf
# sha256: c1f3654dd9142073b00289700ce0aa5218c1aa4d5be38a3c5f7f2649bee12c1f
https://cdn.jsdelivr.net/gh/notofonts/notofonts.github.io/fonts/NotoNaskhArabic/unhinted/ttf/NotoNaskhArabic-Regular.ttf

# NotoNastaliqUrdu-Regular.ttf
# sha256: beee3156a724adf64178c2dbac86f5394b6a4bb67aea75d4354c817c9e6c27da
https://cdn.jsdelivr.net/gh/notofonts/notofonts.github.io/fonts/NotoNastaliqUrdu/hinted/ttf/NotoNastaliqUrdu-Regular.ttf

# NotoKufiArabic-Regular.ttf
# sha256: efb00432829e570a12a5afd43dd4667950ee7cf4dbc9f4c421b2f27b89dee301
https://fonts.google.com/download?family=Noto%20Kufi%20Arabic


# Mongolian

# NotoSansMongolian-Regular.ttf
# sha256: 31a750c5b7e335ebb3d841b2f97baf045178db23fe199eb3040cb3496470daa7
https://cdn.jsdelivr.net/gh/notofonts/notofonts.github.io/fonts/NotoSansMongolian/unhinted/ttf/NotoSansMongolian-Regular.ttf

# N'Ko

# NotoSansNKo-Regular.ttf
# sha256: 4e9de46bfa60bf800fe4608c5ec434602c496b924b6e5333dc14515b0883dd86
https://cdn.jsdelivr.net/gh/notofonts/notofonts.github.io/fonts/NotoSansNKo/full/ttf/NotoSansNKo-Regular.ttf

# Syriac

# NotoSansSyriacEastern-Regular.ttf
# sha256: 3310d8ec31633d516b09a676b8fa6c784e2d263aeea52051ca40c60347e2e9cd
https://noto-website-2.storage.googleapis.com/pkgs/NotoSansSyriacEastern-unhinted.zip

# NotoSansSyriacWestern-Regular.ttf
# sha256: e8c6c6ae30ae6fb414e59112ebd08ab8341733b39b756a918e3c324249cdf5b5
https://noto-website-2.storage.googleapis.com/pkgs/NotoSansSyriacWestern-unhinted.zip

# NotoSansSyriacEstrangela-Regular.ttf
# sha256: 0cb823a1d55ca97bda55ae1f31b03ef762b1faa3447700142f83c9dc8f7828e4
https://noto-website-2.storage.googleapis.com/pkgs/NotoSansSyriacEstrangela-unhinted.zip


##########################
# Indic
#

# Bengali

# NotoSerifBengali-Regular.ttf
# sha256: 6f046ad71ff7f3ba154e7087a02a1f71fff782e85ab00fdd08cd28ad3e06e24b
https://cdn.jsdelivr.net/gh/notofonts/notofonts.github.io/fonts/NotoSerifBengali/full/ttf/NotoSerifBengali-Regular.ttf

# Devanagari

# NotoSerifDevanagari-Regular.ttf
# sha256: 55d2e062fac9208412a15e79a5bd3753af650a4ed47bb2eea4b105591fba4a8f
https://cdn.jsdelivr.net/gh/notofonts/notofonts.github.io/fonts/NotoSerifDevanagari/full/ttf/NotoSerifDevanagari-Regular.ttf

# Gujarati

# NotoSerifGujarati-Regular.ttf
# sha256: 041827ca7d1b58393587d5c974db1adf9409293ac13f23cef1a02b8700a4c6c3
https://cdn.jsdelivr.net/gh/notofonts/notofonts.github.io/fonts/NotoSerifGujarati/full/ttf/NotoSerifGujarati-Regular.ttf

# Gurmukhi

# NotoSansGurmukhi-Regular.ttf
# sha256: 255f404f61622ef03385a2851c2423de3a676d978115b2e39ffd5570e0022c32
https://cdn.jsdelivr.net/gh/notofonts/notofonts.github.io/fonts/NotoSerifGurmukhi/full/ttf/NotoSerifGurmukhi-Regular.ttf

# Kannada

# NotoSerifKannada-Regular.ttf
# sha256: c9e7d3168a0134f68456607521b66e3208586d07cc144618791f3492a0e88bfb
https://cdn.jsdelivr.net/gh/notofonts/notofonts.github.io/fonts/NotoSerifKannada/full/ttf/NotoSerifKannada-Regular.ttf

# Malayalam

# NotoSerifMalayalam-Regular.ttf
# sha256: 7755029171b99ef7d5e7027c6f9148bcca3f3b95a41fd73cfc79d664a935cf30
https://cdn.jsdelivr.net/gh/notofonts/notofonts.github.io/fonts/NotoSerifMalayalam/full/ttf/NotoSerifMalayalam-Regular.ttf

# NotoSansMalayalam-Regular.ttf 
# sha256: d18b5c10d85bba3d3d89775484bcfa731112f501ac070793ddbda5a36992520f
https://cdn.jsdelivr.net/gh/notofonts/notofonts.github.io/fonts/NotoSansMalayalam/full/ttf/NotoSansMalayalam-Regular.ttf

# Rachana-Regular.ttf
# sha256: a0d5c8417b58f98fe387758bb5c1a0e75225bb77759640bd7131c6c78dce16f1
https://gitlab.com/rit-fonts/RIT-Rachana/-/jobs/artifacts/1.2/download?job=build-tag

# Oriya

# NotoSansOriya-Regular.ttf
# sha256: e87f6a6c611c53dabb708a7ddfafa527b3ca7d0ccc5d2e0e659f46af19cb320f
https://cdn.jsdelivr.net/gh/notofonts/notofonts.github.io/fonts/NotoSansOriya/full/ttf/NotoSansOriya-Regular.ttf

# Sinhala

# NotoSerifSinhala-Regular.ttf 
# sha256: cafa8544ad87a1116d296193cfbf8be39b7927e67fd4af36b7e73105cf2ac85e
https://cdn.jsdelivr.net/gh/notofonts/notofonts.github.io/fonts/NotoSerifSinhala/full/ttf/NotoSerifSinhala-Regular.ttf

# Tamil

# NotoSerifTamil-Regular.ttf
# sha256: 00611c2dc5e6a09a5cd85eb87e135ae86aaafa5ac63543adf5eb2050784582a0
https://cdn.jsdelivr.net/gh/notofonts/notofonts.github.io/fonts/NotoSerifTamil/full/ttf/NotoSerifTamil-Regular.ttf

# NotoSansTamil-Regular.ttf
# sha256: 0afbc221964b6048c6d771c525be474d21b288a621dce0fafedd695cc5c98e4e
https://cdn.jsdelivr.net/gh/notofonts/notofonts.github.io/fonts/NotoSansTamil/full/ttf/NotoSansTamil-Regular.ttf

# Telugu

# NotoSerifTelugu-Regular.ttf
# sha256: 15cec4e867d25105c484301af0b4be577231e04e13364abdff844a4a5c9711dc
https://cdn.jsdelivr.net/gh/notofonts/notofonts.github.io/fonts/NotoSerifTelugu/full/ttf/NotoSerifTelugu-Regular.ttf

# NotoSansTelugu-Regular.ttf
# sha256: e0595bcf47b907b2afb77a34ae64c3e8351f56452c66983660172c6b9ea15576
https://cdn.jsdelivr.net/gh/notofonts/notofonts.github.io/fonts/NotoSansTelugu/full/ttf/NotoSansTelugu-Regular.ttf


##########################
# Brahmi-derived
#

# Khmer

# NotoSerifKhmer-Regular.ttf
# sha256: 1068ef26dadf6bd322f6bcef1990015ca3c998064d20842d67aaefc4b62d9cdb
https://cdn.jsdelivr.net/gh/notofonts/notofonts.github.io/fonts/NotoSerifKhmer/full/ttf/NotoSerifKhmer-Regular.ttf

# Myanmar

# NotoSansMyanmar-Regular.ttf
# sha256: e6d59055e5e7a8cc57ad8e04150136f4fc48bc3fca6f307c5e78f40f7e560a6d
https://cdn.jsdelivr.net/gh/notofonts/notofonts.github.io/fonts/NotoSansMyanmar/full/ttf/NotoSansMyanmar-Regular.ttf

# PadaukBook-Regular.ttf
# sha256: b47b2639489d7cec5ad38d025f181b061767e4e161a41f19528e910f79fd03a1
https://software.sil.org/downloads/r/padauk/padauk-3.003.zip

# Thai and Lao

# NotoSerifThai-Regular.ttf
# sha256: 428afb46af2c025ed2b9fe39bda2fffce9475fa5d2e7ae7911771633014b91b0
https://cdn.jsdelivr.net/gh/notofonts/notofonts.github.io/fonts/NotoSerifThai/full/ttf/NotoSerifThai-Regular.ttf

# NotoSerifLao-Regular.ttf
# sha256: ff5ab4f3270c448b99b113a8ac6275bc6e4a4ca922ed2271df9a95132b5c2db6
https://cdn.jsdelivr.net/gh/notofonts/notofonts.github.io/fonts/NotoSerifLao/full/ttf/NotoSerifLao-Regular.ttf

# Tibetan

# NotoSerifTibetan-Regular.ttf -- RENAMED FROM NotoSansTibetan-Regular.ttf
# sha256: 2ac2555a88b5bcbbacc490003e96dd4d00d064daeee4a9465d68cf301f9886b3
https://cdn.jsdelivr.net/gh/notofonts/notofonts.github.io/fonts/NotoSerifTibetan/full/ttf/NotoSerifTibetan-Regular.ttf


##########################
# Hangul
#

# NotoSansCJK-Regular.ttc
# sha256: b76b0433203017ca80401b2ee0dd69350349871c4b19d504c34dbdd80541690a
https://github.com/googlefonts/noto-cjk/archive/refs/tags/NotoSansV2.001.zip


##########################
# Hebrew
#

# NotoSansHebrew-Regular.ttf
# sha256: 6d925ace0a6ccce47b64e4a8d26869f423774d66dd6b9f67cf98441075e69582
https://cdn.jsdelivr.net/gh/notofonts/notofonts.github.io/fonts/NotoSansHebrew/full/ttf/NotoSansHebrew-Regular.ttf


##########################
# Emoji
#

# NotoColorEmoji.ttf
# sha256: ad8eb6a1c403c73f0ff48ce288f2101de3814d0c6483f398023630e98331eeb2
https://fonts.google.com/download?family=Noto%20Color%20Emoji

# NotoEmoji-Regular.ttf
# sha256: 415dc6290378574135b64c808dc640c1df7531973290c4970c51fdeb849cb0c5
https://github.com/googlefonts/noto-emoji/raw/v2020-04-08-unicode12_1/fonts/NotoEmoji-Regular.ttf

# Symbola_hint.ttf
# sha256: 856de5857be48b8e31fc078fb93a9f3bd706b2552ba57b42863622f837cd3f35
https://fontlibrary.org/assets/downloads/symbola/cf81aeb303c13ce765877d31571dc5c7/symbola.zip

# LastResort-Regular.ttf
# sha256: da83a62294e74d963a10de4c3750ccf089273e3b7fc6744daef9844163ade078
https://github.com/unicode-org/last-resort-font/releases/download/15.000/LastResort-Regular.ttf

# DejaVuSans.ttf
# sha256: 6aaad3365c30c4f8d2504e569527e588d33eeae66dd7045bcfeef7413820db2a
http://sourceforge.net/projects/dejavu/files/dejavu/2.37/dejavu-fonts-ttf-2.37.tar.bz2

# NotoSansSymbols-Regular.ttf
# sha256: 0088617baec0e8ac47e022cc1f38695f772301c9ef6d1f24a785abbef1e05d79
https://cdn.jsdelivr.net/gh/notofonts/notofonts.github.io/fonts/NotoSansSymbols/full/ttf/NotoSansSymbols-Regular.ttf

# NotoSerif-Regular.ttf
# sha256: 504b8ec55d003cade88fb0a7bb93254ad81fd1cb29f4818d260300dbaef5d37b
https://cdn.jsdelivr.net/gh/notofonts/notofonts.github.io/fonts/NotoSerif/hinted/ttf/NotoSerif-Regular.ttf

# Blobmoji.ttf
# sha256: c3db3bb85e84ea7a2674399e281e004dc181ff9038b13c03bf01d3dd8197cfc8
https://github.com/C1710/blobmoji/releases/download/v14.0.1/Blobmoji.ttf

# SourceEmoji-BnW.otf 
# sha256: e648822387b8860a74a478df07e1bdc6e56ee57e74e3f4b07272a8a9485c7186
https://github.com/adobe-fonts/source-emoji/releases/download/1.017/SourceEmoji-BnW.otf


================================================
FILE: images/gujarati/gujarati-png-image-generation-log.md
================================================
# Commands used to generate the images in [opentype-shaping-gujarati.md](../../opentype-shaping-gujarati.md)

## Arrow general

hb-view --font-size=110 --output-file=right-arrow.png --background=FFFFFF00 --margin=0,0,0,0 /usr/share/fonts/opentype/gentiumplus/GentiumPlus-R.ttf --unicodes=2192


## 2.2 Matra decomposition

hb-view --font-size=110 --margin=2,16,2,16 --output-file=gujarati-matra-decompose-before.png --features=-init --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifGujarati-Regular.ttf --unicodes=0ac9

hb-view --font-size=110 --margin=2,16,2,16 --output-file=gujarati-matra-decompose-after.png --features=-init --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifGujarati-Regular.ttf --unicodes=0ac5,25cc,0abe

montage gujarati-matra-decompose-before.png right-arrow.png gujarati-matra-decompose-after.png -geometry +0+0 -background transparent gujarati-matra-decompose.png


## 2.7 Post-base consonants

> None


## 3.2 `nukt`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=gujarati-nukt-before.png --features=-init,-nukt --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifGujarati-Regular.ttf --unicodes=0a97,25cc,0abc

hb-view --font-size=110 --margin=2,16,2,16 --output-file=gujarati-nukt-after.png --features=-init,+nukt --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifGujarati-Regular.ttf --unicodes=0a97,0abc

montage gujarati-nukt-before.png right-arrow.png gujarati-nukt-after.png -geometry +0+0 -background transparent gujarati-nukt.png

## 3.3 `akhn`

### KSsa

hb-view --font-size=110 --margin=2,16,2,16 --output-file=gujarati-akhn-kssa-before.png --features=-init,-akhn --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifGujarati-Regular.ttf --unicodes=0a95,25cc,0acd,0ab7

hb-view --font-size=110 --margin=2,16,2,16 --output-file=gujarati-akhn-kssa-after.png --features=-init,+akhn --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifGujarati-Regular.ttf --unicodes=0a95,0acd,0ab7

montage gujarati-akhn-kssa-before.png right-arrow.png gujarati-akhn-kssa-after.png -geometry +0+0 -background transparent gujarati-akhn-kssa.png

### JNya

hb-view --font-size=110 --margin=2,16,2,16 --output-file=gujarati-akhn-jnya-before.png --features=-init,-akhn --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifGujarati-Regular.ttf --unicodes=0a9c,25cc,0acd,0a9e

hb-view --font-size=110 --margin=2,16,2,16 --output-file=gujarati-akhn-jnya-after.png --features=-init,+akhn --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifGujarati-Regular.ttf --unicodes=0a9c,0acd,0a9e

montage gujarati-akhn-jnya-before.png right-arrow.png gujarati-akhn-jnya-after.png -geometry +0+0 -background transparent gujarati-akhn-jnya.png


## 3.4 `rphf`


hb-view --font-size=110 --margin=2,16,2,16 --output-file=gujarati-rphf-before.png --features=-init,-rphf --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifGujarati-Regular.ttf --unicodes=0ab0,25cc,0acd

hb-view --font-size=110 --margin=2,16,2,16 --output-file=gujarati-rphf-after.png --features=-init,+rphf --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifGujarati-Regular.ttf --unicodes=0ab0,0acd,25cc

montage gujarati-rphf-before.png right-arrow.png gujarati-rphf-after.png -geometry +0+0 -background transparent gujarati-rphf.png


## 3.5 `rkrf`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=gujarati-rkrf-before.png --features=-init,-rkrf,-blwf --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifGujarati-Regular.ttf --unicodes=0aa6,25cc,0acd,0ab0

hb-view --font-size=110 --margin=2,16,2,16 --output-file=gujarati-rkrf-after.png --features=-init,+rkrf --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifGujarati-Regular.ttf --unicodes=0aa6,0acd,0ab0

montage gujarati-rkrf-before.png right-arrow.png gujarati-rkrf-after.png -geometry +0+0 -background transparent gujarati-rkrf.png


## 3.7 `blwf`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=gujarati-blwf-before.png --features=-init,-blwf --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifGujarati-Regular.ttf --unicodes=25cc,0acd,0ab0

hb-view --font-size=110 --margin=2,16,2,16 --output-file=gujarati-blwf-after.png --features=-init,+blwf --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifGujarati-Regular.ttf --unicodes=25cc,0acd,0ab0

montage gujarati-blwf-before.png right-arrow.png gujarati-blwf-after.png -geometry +0+0 -background transparent gujarati-blwf.png


## 3.9 `half`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=gujarati-half-before.png --features=-init,-half --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifGujarati-Regular.ttf --unicodes=0aad,0acd,0ab0,25cc,0acd

hb-view --font-size=110 --margin=2,16,2,16 --output-file=gujarati-half-after.png --features=-init,+half --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifGujarati-Regular.ttf --unicodes=0aad,0acd,0ab0,0acd,25cc

montage gujarati-half-before.png right-arrow.png gujarati-half-after.png -geometry +0+0 -background transparent gujarati-half.png


## 3.11 `vatu`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=gujarati-vatu-before.png --features=-init,-vatu --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifGujarati-Regular.ttf --unicodes=0aa4,25cc,0acd,0ab0

hb-view --font-size=110 --margin=2,16,2,16 --output-file=gujarati-vatu-after.png --features=-init,+vatu --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifGujarati-Regular.ttf --unicodes=0aa4,0acd,0ab0

montage gujarati-vatu-before.png right-arrow.png gujarati-vatu-after.png -geometry +0+0 -background transparent gujarati-vatu.png


## 3.12 `cjct`

> Note that Noto Serif Gujarati implements this in `pres` for unknown
> reasons.

hb-view --font-size=110 --margin=2,16,2,16 --output-file=gujarati-cjct-before.png --features=-init,-pres,-cjct --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifGujarati-Regular.ttf --unicodes=0aa6,25cc,0acd,0aae

hb-view --font-size=110 --margin=2,16,2,16 --output-file=gujarati-cjct-after.png --features=-init,+cjct --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifGujarati-Regular.ttf --unicodes=0aa6,0acd,0aae

montage gujarati-cjct-before.png right-arrow.png gujarati-cjct-after.png -geometry +0+0 -background transparent gujarati-cjct.png


## 4.2 Pre-base matras

hb-view --font-size=110 --margin=2,16,2,16 --output-file=gujarati-matra-position-before.png --features=-init --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifGujarati-Regular.ttf --unicodes=0abf,0a9f,0acd,0a9d,0acd,0ab9,0acd,0aa4

hb-view --font-size=110 --margin=2,16,2,16 --output-file=gujarati-matra-position-after.png --features=-init,-pres --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifGujarati-Regular.ttf --unicodes=0a9f,0acd,0a9d,0acd,0ab9,0acd,0aa4,0abf

montage gujarati-matra-position-before.png right-arrow.png gujarati-matra-position-after.png -geometry +0+0 -background transparent gujarati-matra-position.png


## 4.3 Reph position

hb-view --font-size=110 --margin=2,16,2,16 --output-file=gujarati-reph-position-before.png --features=-init --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifGujarati-Regular.ttf --unicodes=0ab0,0acd,25cc,0aab,0acd,0aa8,0acd,0a9a,0ac2

hb-view --font-size=110 --margin=2,16,2,16 --output-file=gujarati-reph-position-after.png --features=-init --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifGujarati-Regular.ttf --unicodes=0ab0,0acd,0aab,0acd,0aa8,0acd,0a9a,0ac2

montage gujarati-reph-position-before.png right-arrow.png gujarati-reph-position-after.png -geometry +0+0 -background transparent gujarati-reph-position.png


## 5 `pres`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=gujarati-pres-before.png --features=-init,-pres --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifGujarati-Regular.ttf --unicodes=0a9e,0acd,0a9a,25cc

hb-view --font-size=110 --margin=2,16,2,16 --output-file=gujarati-pres-after.png --features=-init,+pres --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifGujarati-Regular.ttf --unicodes=0a9e,0acd,0a9a,25cc

montage gujarati-pres-before.png right-arrow.png gujarati-pres-after.png -geometry +0+0 -background transparent gujarati-pres.png


## 5 `abvs`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=gujarati-abvs-before.png --features=-init,-abvs --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifGujarati-Regular.ttf --unicodes=0ab0,0acd,0aa3,0abf

hb-view --font-size=110 --margin=2,16,2,16 --output-file=gujarati-abvs-after.png --features=-init,+abvs --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifGujarati-Regular.ttf --unicodes=0ab0,0acd,0aa3,0abf

montage gujarati-abvs-before.png right-arrow.png gujarati-abvs-after.png -geometry +0+0 -background transparent gujarati-abvs.png


## 5 `blws`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=gujarati-blws-before.png --features=-init,-blws --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifGujarati-Regular.ttf --unicodes=0aa3,0ac1

hb-view --font-size=110 --margin=2,16,2,16 --output-file=gujarati-blws-after.png --features=-init,+blws --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifGujarati-Regular.ttf --unicodes=0aa3,0ac1

montage gujarati-blws-before.png right-arrow.png gujarati-blws-after.png -geometry +0+0 -background transparent gujarati-blws.png


## 5 `psts`

> Note: Noto Serif Gujarati implements this as an `abvs` lookup for
> unknown reasons.

hb-view --font-size=110 --margin=2,16,2,16 --output-file=gujarati-psts-before.png --features=-init,-abvs --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifGujarati-Regular.ttf --unicodes=0a9c,0acd,0ab0,0abe

hb-view --font-size=110 --margin=2,16,2,16 --output-file=gujarati-psts-after.png --features=-init,+abvs --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifGujarati-Regular.ttf --unicodes=0a9c,0acd,0ab0,0abe

montage gujarati-psts-before.png right-arrow.png gujarati-psts-after.png -geometry +0+0 -background transparent gujarati-psts.png


## 5 `haln`

> Note: Noto Serif Gujarati implements this as a `blwm` lookup.

hb-view --font-size=110 --margin=2,16,2,16 --output-file=gujarati-haln-after.png --features=-init,+blwm --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifGujarati-Regular.ttf --unicodes=0a95,0acd,0a95,0abc,0acd

hb-view --font-size=110 --margin=2,16,2,16 --output-file=gujarati-haln-before.png --features=-init,-blwm --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifGujarati-Regular.ttf --unicodes=0a95,0acd,0a95,0abc,0acd

montage gujarati-haln-before.png right-arrow.png gujarati-haln-after.png -geometry +0+0 -background transparent gujarati-haln.png


## 6 `abvm`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=gujarati-abvm-before.png --features=-init,-abvm --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifGujarati-Regular.ttf --unicodes=0ab0,0acd,0ab9

hb-view --font-size=110 --margin=2,16,2,16 --output-file=gujarati-abvm-after.png --features=-init,+abvm --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifGujarati-Regular.ttf --unicodes=0ab0,0acd,0ab9

montage gujarati-abvm-before.png right-arrow.png gujarati-abvm-after.png -geometry +0+0 -background transparent gujarati-abvm.png


## 6 `blwm`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=gujarati-blwm-before.png --features=-init,-blwm --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifGujarati-Regular.ttf --unicodes=0a9f,0acd,0aa0,0ac4

hb-view --font-size=110 --margin=2,16,2,16 --output-file=gujarati-blwm-after.png --features=-init,+blwm --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifGujarati-Regular.ttf --unicodes=0a9f,0acd,0aa0,0ac4

montage gujarati-blwm-before.png right-arrow.png gujarati-blwm-after.png -geometry +0+0 -background transparent gujarati-blwm.png


================================================
FILE: images/gujarati/gujarati-svg-image-generation-log.md
================================================
# Commands used to generate the images in [opentype-shaping-gujarati.md](../../opentype-shaping-gujarati.md)

## Arrow general

hb-view --font-size=110 --output-file=right-arrow.svg --background=FFFFFF00 --margin=0,0,0,0 /usr/share/fonts/opentype/gentiumplus/GentiumPlus-R.ttf --unicodes=2192


## 2.2 Matra decomposition

hb-view --font-size=110 --margin=2,16,2,16 --output-file=gujarati-matra-decompose-before.svg --features=-init --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifGujarati-Regular.otf --unicodes=0ac9

hb-view --font-size=110 --margin=2,16,2,16 --output-file=gujarati-matra-decompose-after.svg --features=-init --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifGujarati-Regular.otf --unicodes=0ac5,25cc,0abe

svg_stack.py --direction=h gujarati-matra-decompose-before.svg right-arrow.svg gujarati-matra-decompose-after.svg > gujarati-matra-decompose.svg


## 2.7 Post-base consonants

> None


## 3.2 `nukt`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=gujarati-nukt-before.svg --features=-init,-nukt --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifGujarati-Regular.otf --unicodes=0a97,25cc,0abc

hb-view --font-size=110 --margin=2,16,2,16 --output-file=gujarati-nukt-after.svg --features=-init,+nukt --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifGujarati-Regular.otf --unicodes=0a97,0abc

svg_stack.py --direction=h gujarati-nukt-before.svg right-arrow.svg gujarati-nukt-after.svg > gujarati-nukt.svg

## 3.3 `akhn`

### KSsa

hb-view --font-size=110 --margin=2,16,2,16 --output-file=gujarati-akhn-kssa-before.svg --features=-init,-akhn --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifGujarati-Regular.otf --unicodes=0a95,25cc,0acd,0ab7

hb-view --font-size=110 --margin=2,16,2,16 --output-file=gujarati-akhn-kssa-after.svg --features=-init,+akhn --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifGujarati-Regular.otf --unicodes=0a95,0acd,0ab7

svg_stack.py --direction=h gujarati-akhn-kssa-before.svg right-arrow.svg gujarati-akhn-kssa-after.svg > gujarati-akhn-kssa.svg

### JNya

hb-view --font-size=110 --margin=2,16,2,16 --output-file=gujarati-akhn-jnya-before.svg --features=-init,-akhn --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifGujarati-Regular.otf --unicodes=0a9c,25cc,0acd,0a9e

hb-view --font-size=110 --margin=2,16,2,16 --output-file=gujarati-akhn-jnya-after.svg --features=-init,+akhn --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifGujarati-Regular.otf --unicodes=0a9c,0acd,0a9e

svg_stack.py --direction=h gujarati-akhn-jnya-before.svg right-arrow.svg gujarati-akhn-jnya-after.svg > gujarati-akhn-jnya.svg


## 3.4 `rphf`


hb-view --font-size=110 --margin=2,16,2,16 --output-file=gujarati-rphf-before.svg --features=-init,-rphf --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifGujarati-Regular.otf --unicodes=0ab0,25cc,0acd

hb-view --font-size=110 --margin=2,16,2,16 --output-file=gujarati-rphf-after.svg --features=-init,+rphf --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifGujarati-Regular.otf --unicodes=0ab0,0acd,25cc

svg_stack.py --direction=h gujarati-rphf-before.svg right-arrow.svg gujarati-rphf-after.svg > gujarati-rphf.svg


## 3.5 `rkrf`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=gujarati-rkrf-before.svg --features=-init,-rkrf,-blwf --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifGujarati-Regular.otf --unicodes=0aa6,25cc,0acd,0ab0

hb-view --font-size=110 --margin=2,16,2,16 --output-file=gujarati-rkrf-after.svg --features=-init,+rkrf --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifGujarati-Regular.otf --unicodes=0aa6,0acd,0ab0

svg_stack.py --direction=h gujarati-rkrf-before.svg right-arrow.svg gujarati-rkrf-after.svg > gujarati-rkrf.svg


## 3.7 `blwf`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=gujarati-blwf-before.svg --features=-init,-blwf --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifGujarati-Regular.otf --unicodes=25cc,0acd,0ab0

hb-view --font-size=110 --margin=2,16,2,16 --output-file=gujarati-blwf-after.svg --features=-init,+blwf --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifGujarati-Regular.otf --unicodes=25cc,0acd,0ab0

svg_stack.py --direction=h gujarati-blwf-before.svg right-arrow.svg gujarati-blwf-after.svg > gujarati-blwf.svg


## 3.9 `half`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=gujarati-half-before.svg --features=-init,-half --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifGujarati-Regular.otf --unicodes=0aad,0acd,0ab0,25cc,0acd

hb-view --font-size=110 --margin=2,16,2,16 --output-file=gujarati-half-after.svg --features=-init,+half --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifGujarati-Regular.otf --unicodes=0aad,0acd,0ab0,0acd,25cc

svg_stack.py --direction=h gujarati-half-before.svg right-arrow.svg gujarati-half-after.svg > gujarati-half.svg


## 3.11 `vatu`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=gujarati-vatu-before.svg --features=-init,-vatu --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifGujarati-Regular.otf --unicodes=0aa4,25cc,0acd,0ab0

hb-view --font-size=110 --margin=2,16,2,16 --output-file=gujarati-vatu-after.svg --features=-init,+vatu --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifGujarati-Regular.otf --unicodes=0aa4,0acd,0ab0

svg_stack.py --direction=h gujarati-vatu-before.svg right-arrow.svg gujarati-vatu-after.svg > gujarati-vatu.svg


## 3.12 `cjct`

> Note that Noto Serif Gujarati implements this in `pres` for unknown
> reasons.

hb-view --font-size=110 --margin=2,16,2,16 --output-file=gujarati-cjct-before.svg --features=-init,-pres,-cjct --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifGujarati-Regular.otf --unicodes=0aa6,25cc,0acd,0aae

hb-view --font-size=110 --margin=2,16,2,16 --output-file=gujarati-cjct-after.svg --features=-init,+cjct --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifGujarati-Regular.otf --unicodes=0aa6,0acd,0aae

svg_stack.py --direction=h gujarati-cjct-before.svg right-arrow.svg gujarati-cjct-after.svg > gujarati-cjct.svg


## 4.2 Pre-base matras

hb-view --font-size=110 --margin=2,16,2,16 --output-file=gujarati-matra-position-before.svg --features=-init --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifGujarati-Regular.otf --unicodes=25cc,0abf,0a9b,0acd,0aad,0acd,0aa6,0acd,0aae

hb-view --font-size=110 --margin=2,16,2,16 --output-file=gujarati-matra-position-after.svg --features=-init,-pres --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifGujarati-Regular.otf --unicodes=0a9b,0acd,0aad,0acd,0aa6,0acd,0aae,0abf

svg_stack.py --direction=h gujarati-matra-position-before.svg right-arrow.svg gujarati-matra-position-after.svg > gujarati-matra-position.svg


## 4.3 Reph position

hb-view --font-size=110 --margin=2,16,2,16 --output-file=gujarati-reph-position-before.svg --features=-init --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifGujarati-Regular.otf --unicodes=0ab0,0acd,25cc,0aab,0acd,0aa8,0acd,0a9a,0ac2

hb-view --font-size=110 --margin=2,16,2,16 --output-file=gujarati-reph-position-after.svg --features=-init --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifGujarati-Regular.otf --unicodes=0ab0,0acd,0aab,0acd,0aa8,0acd,0a9a,0ac2

svg_stack.py --direction=h gujarati-reph-position-before.svg right-arrow.svg gujarati-reph-position-after.svg > gujarati-reph-position.svg


## 5 `pres`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=gujarati-pres-before.svg --features=-init,-pres --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifGujarati-Regular.otf --unicodes=0a9e,0acd,0a9a,25cc

hb-view --font-size=110 --margin=2,16,2,16 --output-file=gujarati-pres-after.svg --features=-init,+pres --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifGujarati-Regular.otf --unicodes=0a9e,0acd,0a9a,25cc

svg_stack.py --direction=h gujarati-pres-before.svg right-arrow.svg gujarati-pres-after.svg > gujarati-pres.svg


## 5 `abvs`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=gujarati-abvs-before.svg --features=-init,-abvs --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifGujarati-Regular.otf --unicodes=0ab0,0acd,0aa3,0abf

hb-view --font-size=110 --margin=2,16,2,16 --output-file=gujarati-abvs-after.svg --features=-init,+abvs --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifGujarati-Regular.otf --unicodes=0ab0,0acd,0aa3,0abf

svg_stack.py --direction=h gujarati-abvs-before.svg right-arrow.svg gujarati-abvs-after.svg > gujarati-abvs.svg


## 5 `blws`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=gujarati-blws-before.svg --features=-init,-blws --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifGujarati-Regular.otf --unicodes=0aab,0ac1

hb-view --font-size=110 --margin=2,16,2,16 --output-file=gujarati-blws-after.svg --features=-init,+blws --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifGujarati-Regular.otf --unicodes=0aab,0ac1

svg_stack.py --direction=h gujarati-blws-before.svg right-arrow.svg gujarati-blws-after.svg > gujarati-blws.svg


## 5 `psts`

> Note: Noto Serif Gujarati implements this as an `abvs` lookup for
> unknown reasons.

hb-view --font-size=110 --margin=2,16,2,16 --output-file=gujarati-psts-before.svg --features=-init,-abvs --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifGujarati-Regular.otf --unicodes=0a9c,0acd,0ab0,0abe

hb-view --font-size=110 --margin=2,16,2,16 --output-file=gujarati-psts-after.svg --features=-init,+abvs --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifGujarati-Regular.otf --unicodes=0a9c,0acd,0ab0,0abe

svg_stack.py --direction=h gujarati-psts-before.svg right-arrow.svg gujarati-psts-after.svg > gujarati-psts.svg


## 5 `haln`

> Note: Noto Serif Gujarati implements this as a `blwm` lookup in
> addition to `haln`.

hb-view --font-size=110 --margin=2,16,2,16 --output-file=gujarati-haln-after.svg --features=-init,+haln,+blwm --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifGujarati-Regular.otf --unicodes=0aa0,0acd

hb-view --font-size=110 --margin=2,24,2,16 --output-file=gujarati-haln-before.svg --features=-init,-haln,-blwm --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifGujarati-Regular.otf --unicodes=0aa0,0acd

svg_stack.py --direction=h gujarati-haln-before.svg right-arrow.svg gujarati-haln-after.svg > gujarati-haln.svg


## 6 `abvm`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=gujarati-abvm-before.svg --features=-init,-abvm --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifGujarati-Regular.otf --unicodes=0ab0,0acd,0ab9

hb-view --font-size=110 --margin=2,16,2,16 --output-file=gujarati-abvm-after.svg --features=-init,+abvm --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifGujarati-Regular.otf --unicodes=0ab0,0acd,0ab9

svg_stack.py --direction=h gujarati-abvm-before.svg right-arrow.svg gujarati-abvm-after.svg > gujarati-abvm.svg


## 6 `blwm`

hb-view --font-size=110 --margin=2,48,16,16 --output-file=gujarati-blwm-before.svg --features=-init,-blwm --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifGujarati-Regular.otf --unicodes=0a9b,0acd,0ab0,0ae3

hb-view --font-size=110 --margin=2,16,16,16 --output-file=gujarati-blwm-after.svg --features=-init,+blwm --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifGujarati-Regular.otf --unicodes=0a9b,0acd,0ab0,0ae3

svg_stack.py --direction=h gujarati-blwm-before.svg right-arrow.svg gujarati-blwm-after.svg > gujarati-blwm.svg

## 6 `dist`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=gujarati-dist-before.svg --features=-init,-dist --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifGujarati-Regular.otf --unicodes=0ab9,0acd,0aa3,0aa6,0acd,0ab5

hb-view --font-size=110 --margin=2,16,2,16 --output-file=gujarati-dist-after.svg --features=-init,+dist --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifGujarati-Regular.otf --unicodes=0ab9,0acd,0aa3,0aa6,0acd,0ab5

svg_stack.py --direction=h gujarati-dist-before.svg right-arrow.svg gujarati-dist-after.svg > gujarati-dist.svg


================================================
FILE: images/gurmukhi/gurmukhi-png-image-generation-log.md
================================================
# Commands used to generate the images in [opentype-shaping-gurmukhi.md](../../opentype-shaping-gurmukhi.md)

## Arrow general

hb-view --font-size=110 --output-file=right-arrow.png --background=FFFFFF00 --margin=0,0,0,0 /usr/share/fonts/opentype/gentiumplus/GentiumPlus-R.ttf --unicodes=2192


> Note: always use `--features=-init` in examples where the `init`
> feature itself is not being explained.

> Note: There is, at present, no Noto Serif for Gurmukhi; therefore
> (unlike the other Indic scripts) these examples use Noto Sans. Serif
> would be preferrable if it appears in the future, though, due to the
> increased stroke contrast.

## 2.7 Post-base consonants

hb-view --font-size=110 --margin=2,16,2,16 --output-file=gurmukhi-pstf-before.png --features=-init,-pstf --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifGurmukhi-Regular.ttf --unicodes=25cc,0a4d,0a2f

hb-view --font-size=110 --margin=2,16,2,16 --output-file=gurmukhi-pstf-after.png --features=-init,+pstf --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifGurmukhi-Regular.ttf --unicodes=25cc,0a4d,0a2f

montage gurmukhi-pstf-before.png right-arrow.png gurmukhi-pstf-after.png -geometry +0+0 -background transparent gurmukhi-pstf.png


## 3.2 `nukt`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=gurmukhi-nukt-before.png --features=-init,-nukt --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifGurmukhi-Regular.ttf --unicodes=0a38,25cc,0a3c

hb-view --font-size=110 --margin=2,16,2,16 --output-file=gurmukhi-nukt-after.png --features=-init,+nukt --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifGurmukhi-Regular.ttf --unicodes=0a38,0a3c

montage gurmukhi-nukt-before.png right-arrow.png gurmukhi-nukt-after.png -geometry +0+0 -background transparent gurmukhi-nukt.png


## 3.3 `akhn`

> Note: Noto Sans Gurmukhi has no `akhn` feature implemented.


## 3.4 `rphf`

> Note: Noto Sans Gurmukhi has no `rphf` feature implemented.


## 3.7 `blwf`

### Ra

hb-view --font-size=110 --margin=2,16,2,16 --output-file=gurmukhi-blwf-ra-before.png --features=-init,-blwf --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifGurmukhi-Regular.ttf --unicodes=25cc,0a4d,0a30

hb-view --font-size=110 --margin=2,16,2,16 --output-file=gurmukhi-blwf-ra-after.png --features=-init,+blwf --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifGurmukhi-Regular.ttf --unicodes=25cc,0a4d,0a30

montage gurmukhi-blwf-ra-before.png right-arrow.png gurmukhi-blwf-ra-after.png -geometry +0+0 -background transparent gurmukhi-blwf-ra.png

### Va

hb-view --font-size=110 --margin=2,16,2,16 --output-file=gurmukhi-blwf-va-before.png --features=-init,-blwf --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifGurmukhi-Regular.ttf --unicodes=25cc,0a4d,0a35

hb-view --font-size=110 --margin=2,16,2,16 --output-file=gurmukhi-blwf-va-after.png --features=-init,+blwf --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifGurmukhi-Regular.ttf --unicodes=25cc,0a4d,0a35

montage gurmukhi-blwf-va-before.png right-arrow.png gurmukhi-blwf-va-after.png -geometry +0+0 -background transparent gurmukhi-blwf-va.png

### Ha

hb-view --font-size=110 --margin=2,16,2,16 --output-file=gurmukhi-blwf-ha-before.png --features=-init,-blwf --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifGurmukhi-Regular.ttf --unicodes=25cc,0a4d,0a39

hb-view --font-size=110 --margin=2,16,2,16 --output-file=gurmukhi-blwf-ha-after.png --features=-init,+blwf --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifGurmukhi-Regular.ttf --unicodes=25cc,0a4d,0a39

montage gurmukhi-blwf-ha-before.png right-arrow.png gurmukhi-blwf-ha-after.png -geometry +0+0 -background transparent gurmukhi-blwf-ha.png


## 3.9 `half`

> Note: Gurmukhi fonts seem to stick to explicit halant-forms.

hb-view --font-size=110 --margin=2,16,2,16 --output-file=gurmukhi-half-before.png --features=-init,-half,-haln --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifGurmukhi-Regular.ttf --unicodes=0a2d,0a4d

hb-view --font-size=110 --margin=2,16,2,16 --output-file=gurmukhi-half-after.png --features=-init,+half --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifGurmukhi-Regular.ttf --unicodes=0a2d,0a4d

montage gurmukhi-half-before.png right-arrow.png gurmukhi-half-after.png -geometry +0+0 -background transparent gurmukhi-half.png


## 3.10 `pstf`

> Same as 2.7


## 3.11 `vatu`

> Note: Noto Gurmukhi has no `vatu` feature.


## 3.12 `cjct`

> Note: Noto Gurmukhi has no `cjct` feature.


## 4.2 Pre-base matras

hb-view --font-size=110 --margin=2,16,2,16 --output-file=gurmukhi-matra-position-before.png --features=-init --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifGurmukhi-Regular.ttf --unicodes=0a25,0a3f,0a4d,0a32,0a4d,0a35,0a4d,0a1a

hb-view --font-size=110 --margin=2,16,2,16 --output-file=gurmukhi-matra-position-after.png --features=-init --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifGurmukhi-Regular.ttf --unicodes=0a25,0a4d,0a32,0a4d,0a35,0a4d,0a1a,0a3f

montage gurmukhi-matra-position-before.png right-arrow.png gurmukhi-matra-position-after.png -geometry +0+0 -background transparent gurmukhi-matra-position.png


## 4.3 Reph position

> Note: Noto Gurmukhi has no `rphf` feature and no Reph
> glyph. Therefore no illustration of Reph positioning is possible.


## 5 `init`

> Note: Noto Gurmukhi has no `init` feature, and it is unclear from
> the Microsoft specification whether `init` is defined for Gurmukhi.


## 5 `pres`

> Note: Noto Gurmukhi has no `pres` feature, even though it would be
> possible to implement one for the i-matra (`U+0A3F`).


## 5 `abvs`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=gurmukhi-abvs-before.png --features=-init,-abvs --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifGurmukhi-Regular.ttf --unicodes=0a13,0a71

hb-view --font-size=110 --margin=2,16,2,16 --output-file=gurmukhi-abvs-after.png --features=-init,+abvs --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifGurmukhi-Regular.ttf --unicodes=0a13,0a71

montage gurmukhi-abvs-before.png right-arrow.png gurmukhi-abvs-after.png -geometry +0+0 -background transparent gurmukhi-abvs.png


## 5 `blws`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=gurmukhi-blws-before.png --features=-init,-blws --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifGurmukhiUI-Regular.ttf --unicodes=0a15,25cc,0a4d,0a30

hb-view --font-size=110 --margin=2,16,2,16 --output-file=gurmukhi-blws-after.png --features=-init,+blws --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifGurmukhiUI-Regular.ttf --unicodes=0a15,0a4d,0a30

montage gurmukhi-blws-before.png right-arrow.png gurmukhi-blws-after.png -geometry +0+0 -background transparent gurmukhi-blws.png


## 5 `psts`

> Note: Noto Sans Gurmukhi has no `psts` feature.


## 5 `haln`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=gurmukhi-haln-before.png --features=-init,-haln --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifGurmukhi-Regular.ttf --unicodes=0a32,0a3c,0a4d

hb-view --font-size=110 --margin=2,16,2,16 --output-file=gurmukhi-haln-after.png --features=-init,+haln --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifGurmukhi-Regular.ttf --unicodes=0a32,0a3c,0a4d

montage gurmukhi-haln-before.png right-arrow.png gurmukhi-haln-after.png -geometry +0+0 -background transparent gurmukhi-haln.png


## 6 `abvm`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=gurmukhi-abvm-before.png --features=-init,-abvm --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifGurmukhi-Regular.ttf --unicodes=0a20,0a48

hb-view --font-size=110 --margin=2,16,2,16 --output-file=gurmukhi-abvm-after.png --features=-init,+abvm --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifGurmukhi-Regular.ttf --unicodes=0a20,0a48

montage gurmukhi-abvm-before.png right-arrow.png gurmukhi-abvm-after.png -geometry +0+0 -background transparent gurmukhi-abvm.png


## 6 `blwm`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=gurmukhi-blwm-before.png --features=-init,-blwm --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifGurmukhi-Regular.ttf --unicodes=0a06,0a42

hb-view --font-size=110 --margin=2,16,2,16 --output-file=gurmukhi-blwm-after.png --features=-init,+blwm --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifGurmukhi-Regular.ttf --unicodes=0a06,0a42

montage gurmukhi-blwm-before.png right-arrow.png gurmukhi-blwm-after.png -geometry +0+0 -background transparent gurmukhi-blwm.png


================================================
FILE: images/gurmukhi/gurmukhi-svg-image-generation-log.md
================================================
# Commands used to generate the images in [opentype-shaping-gurmukhi.md](../../opentype-shaping-gurmukhi.md)

## Arrow general

hb-view --font-size=110 --output-file=right-arrow.svg --background=FFFFFF00 --margin=0,0,0,0 /usr/share/fonts/opentype/gentiumplus/GentiumPlus-R.ttf --unicodes=2192


> Note: always use `--features=-init` in examples where the `init`
> feature itself is not being explained.

> Note: There is, at present, no Noto Serif for Gurmukhi; therefore
> (unlike the other Indic scripts) these examples use Noto Sans. Serif
> would be preferrable if it appears in the future, though, due to the
> increased stroke contrast.

## 2.7 Post-base consonants

hb-view --font-size=110 --margin=2,16,2,16 --output-file=gurmukhi-pstf-before.svg --features=-init,-pstf --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifGurmukhi-Regular.otf --unicodes=25cc,0a4d,0a2f

hb-view --font-size=110 --margin=2,16,2,16 --output-file=gurmukhi-pstf-after.svg --features=-init,+pstf --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifGurmukhi-Regular.otf --unicodes=25cc,0a4d,0a2f

svg_stack.py --direction=h gurmukhi-pstf-before.svg right-arrow.svg gurmukhi-pstf-after.svg > gurmukhi-pstf.svg

#### Duplicates for other subsections

cp gurmukhi-pstf.svg gurmukhi-pstf-1.svg

cluster_styles = [


## 3.2 `nukt`

> Noto Serif replaces "Dda,Nukta" with "Rra". That sequence is chosen
> because it means the change in glyphs is easily seen, but perhaps it
> would be better to use an example that has an "-after" form that is
> more clearly nukta-bearing?

hb-view --font-size=110 --margin=2,16,2,16 --output-file=gurmukhi-nukt-before.svg --features=-init,-nukt --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifGurmukhi-Regular.otf --unicodes=0a21,25cc,0a3c

hb-view --font-size=110 --margin=2,16,2,16 --output-file=gurmukhi-nukt-after.svg --features=-init,+nukt --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifGurmukhi-Regular.otf --unicodes=0a21,0a3c

svg_stack.py --direction=h gurmukhi-nukt-before.svg right-arrow.svg gurmukhi-nukt-after.svg > gurmukhi-nukt.svg


## 3.3 `akhn`

> Note: Noto Sans/Serif Gurmukhi have no `akhn` feature implemented.


## 3.4 `rphf`

> Note: Noto Sans/Serif Gurmukhi have no `rphf` feature implemented.


## 3.7 `blwf`

### Ra

hb-view --font-size=110 --margin=2,16,2,16 --output-file=gurmukhi-blwf-ra-before.svg --features=-init,-blwf --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifGurmukhi-Regular.otf --unicodes=25cc,0a4d,0a30

hb-view --font-size=110 --margin=2,16,2,16 --output-file=gurmukhi-blwf-ra-after.svg --features=-init,+blwf --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifGurmukhi-Regular.otf --unicodes=25cc,0a4d,0a30

svg_stack.py --direction=h gurmukhi-blwf-ra-before.svg right-arrow.svg gurmukhi-blwf-ra-after.svg > gurmukhi-blwf-ra.svg

### Va

hb-view --font-size=110 --margin=2,16,2,16 --output-file=gurmukhi-blwf-va-before.svg --features=-init,-blwf --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifGurmukhi-Regular.otf --unicodes=25cc,0a4d,0a35

hb-view --font-size=110 --margin=2,16,2,16 --output-file=gurmukhi-blwf-va-after.svg --features=-init,+blwf --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifGurmukhi-Regular.otf --unicodes=25cc,0a4d,0a35

svg_stack.py --direction=h gurmukhi-blwf-va-before.svg right-arrow.svg gurmukhi-blwf-va-after.svg > gurmukhi-blwf-va.svg

### Ha

hb-view --font-size=110 --margin=2,16,2,16 --output-file=gurmukhi-blwf-ha-before.svg --features=-init,-blwf --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifGurmukhi-Regular.otf --unicodes=25cc,0a4d,0a39

hb-view --font-size=110 --margin=2,16,2,16 --output-file=gurmukhi-blwf-ha-after.svg --features=-init,+blwf --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifGurmukhi-Regular.otf --unicodes=25cc,0a4d,0a39

svg_stack.py --direction=h gurmukhi-blwf-ha-before.svg right-arrow.svg gurmukhi-blwf-ha-after.svg > gurmukhi-blwf-ha.svg


## 3.9 `half`

> Note: Gurmukhi fonts seem to stick to explicit halant-forms.

hb-view --font-size=110 --margin=2,16,2,16 --output-file=gurmukhi-half-before.svg --features=-init,-half,-mark,-blwm --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifGurmukhi-Regular.otf --unicodes=0a23,0a4d

hb-view --font-size=110 --margin=2,16,2,16 --output-file=gurmukhi-half-after.svg --features=-init,+half,+haln --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifGurmukhi-Regular.otf --unicodes=0a23,0a4d

svg_stack.py --direction=h gurmukhi-half-before.svg right-arrow.svg gurmukhi-half-after.svg > gurmukhi-half.svg


## 3.10 `pstf`

> Same as 2.7


## 3.11 `vatu`

> Note: Noto Gurmukhi has no `vatu` feature.


## 3.12 `cjct`

> Note: Noto Gurmukhi has no `cjct` feature.


## 4.2 Pre-base matras

hb-view --font-size=110 --margin=2,16,24,16 --output-file=gurmukhi-matra-position-before.svg --features=-init --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifGurmukhi-Regular.otf --unicodes=0a25,0a3f,0a4d,0a32,0a4d,0a35,0a4d,0a1a

hb-view --font-size=110 --margin=2,16,24,16 --output-file=gurmukhi-matra-position-after.svg --features=-init --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifGurmukhi-Regular.otf --unicodes=0a25,0a4d,0a32,0a4d,0a35,0a4d,0a1a,0a3f

svg_stack.py --direction=h gurmukhi-matra-position-before.svg right-arrow.svg gurmukhi-matra-position-after.svg > gurmukhi-matra-position.svg


## 4.3 Reph position

> Note: Noto Gurmukhi has no `rphf` feature and no Reph
> glyph. Therefore no illustration of Reph positioning is possible.


## 5 `init`

> Note: Noto Gurmukhi has no `init` feature, and it is unclear from
> the Microsoft specification whether `init` is defined for Gurmukhi.


## 5 `pres`

> Note: Noto Gurmukhi has no `pres` feature, even though it would be
> possible to implement one for the i-matra (`U+0A3F`).


## 5 `abvs`

hb-view --font-size=110 --margin=2,32,2,16 --output-file=gurmukhi-abvs-before.svg --features=-init,-abvs --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifGurmukhi-Regular.otf --unicodes=0a13,0a71

hb-view --font-size=110 --margin=2,16,2,16 --output-file=gurmukhi-abvs-after.svg --features=-init,+abvs --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifGurmukhi-Regular.otf --unicodes=0a13,0a71

svg_stack.py --direction=h gurmukhi-abvs-before.svg right-arrow.svg gurmukhi-abvs-after.svg > gurmukhi-abvs.svg


## 5 `blws`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=gurmukhi-blws-before.svg --features=-init,-blws --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifGurmukhi-Regular.otf --unicodes=0a15,25cc,0a4d,0a30

hb-view --font-size=110 --margin=2,16,2,16 --output-file=gurmukhi-blws-after.svg --features=-init,+blws --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifGurmukhi-Regular.otf --unicodes=0a15,0a4d,0a30

svg_stack.py --direction=h gurmukhi-blws-before.svg right-arrow.svg gurmukhi-blws-after.svg > gurmukhi-blws.svg


## 5 `psts`

> Note: Noto Sans Gurmukhi has no `psts` feature.


## 5 `haln`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=gurmukhi-haln-before.svg --features=-init,-haln,-half,-blwm --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifGurmukhi-Regular.otf --unicodes=0a5c,0a4d

hb-view --font-size=110 --margin=2,16,2,16 --output-file=gurmukhi-haln-after.svg --features=-init,+haln --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifGurmukhi-Regular.otf --unicodes=0a5c,0a4d

svg_stack.py --direction=h gurmukhi-haln-before.svg right-arrow.svg gurmukhi-haln-after.svg > gurmukhi-haln.svg


## 6 `abvm`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=gurmukhi-abvm-before.svg --features=-init,-abvm --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifGurmukhi-Regular.otf --unicodes=0a20,0a48

hb-view --font-size=110 --margin=2,16,2,16 --output-file=gurmukhi-abvm-after.svg --features=-init,+abvm --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifGurmukhi-Regular.otf --unicodes=0a20,0a48

svg_stack.py --direction=h gurmukhi-abvm-before.svg right-arrow.svg gurmukhi-abvm-after.svg > gurmukhi-abvm.svg


## 6 `blwm`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=gurmukhi-blwm-before.svg --features=-init,-blwm --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifGurmukhi-Regular.otf --unicodes=0a17,0a51

hb-view --font-size=110 --margin=2,16,2,16 --output-file=gurmukhi-blwm-after.svg --features=-init,+blwm --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifGurmukhi-Regular.otf --unicodes=0a17,0a51

svg_stack.py --direction=h gurmukhi-blwm-before.svg right-arrow.svg gurmukhi-blwm-after.svg > gurmukhi-blwm.svg


================================================
FILE: images/hangul/hangul-png-image-generation-log.md
================================================
# Commands used to generate the images in [opentype-shaping-hangul.md](../../opentype-shaping-hangul.md)

## Arrow general

hb-view --font-size=110 --output-file=right-arrow.png --background=FFFFFF00 --margin=0,0,0,0 /usr/share/fonts/opentype/gentiumplus/GentiumPlus-R.ttf --unicodes=2192


> Note: always use `--features=-ljmo,-vjmo,-tjmo` in examples where
> the jamo features are not being explained. This may not be
> neccessary in all fonts.


## LV example

hb-view --font-size=110 --margin=2,16,2,16 --output-file=hangul-lv-syllable.png --features=+ljmo,+vjmo,+tjmo --background=FFFFFF00 /home/nate/SyncThing/fonts-external/temporary-and-testing/NotoSerifKR-Regular.otf --unicodes=110e,1166


## LVT example

hb-view --font-size=110 --margin=2,16,2,16 --output-file=hangul-lvt-syllable.png --features=+ljmo,+vjmo,+tjmo --background=FFFFFF00 /home/nate/SyncThing/fonts-external/temporary-and-testing/NotoSerifKR-Regular.otf --unicodes=110e,1166,11ae


## 3. Compose the syllable

hb-view --font-size=110 --margin=2,16,2,16 --output-file=hangul-compose-before.png --features=-ljmo,-vjmo,-tjmo --background=FFFFFF00 /home/nate/SyncThing/fonts-external/temporary-and-testing/NotoSerifKR-Regular.otf --unicodes=1108,200b,1171,200b,11b8

hb-view --font-size=110 --margin=2,16,2,16 --output-file=hangul-compose-after.png --features=+ljmo,+vjmo,+tjmo --background=FFFFFF00 /home/nate/SyncThing/fonts-external/temporary-and-testing/NotoSerifKR-Regular.otf --unicodes=1108,1171,11b8

montage hangul-compose-before.png right-arrow.png hangul-compose-after.png -geometry +0+0 -background transparent hangul-compose.png


## 4. Decompose the syllable

hb-view --font-size=110 --margin=2,16,2,16 --output-file=hangul-decompose-before.png --features=+ljmo,+vjmo,+tjmo --background=FFFFFF00 /home/nate/SyncThing/fonts-external/temporary-and-testing/NotoSerifKR-Regular.otf --unicodes=1106,1172,11af

hb-view --font-size=110 --margin=2,16,2,16 --output-file=hangul-decompose-after.png --features=-ljmo,-vjmo,-tjmo --background=FFFFFF00 /home/nate/SyncThing/fonts-external/temporary-and-testing/NotoSerifKR-Regular.otf --unicodes=1106,200d,1172,200d,11af

montage hangul-decompose-before.png right-arrow.png hangul-decompose-after.png -geometry +0+0 -background transparent hangul-decompose.png


## 5.2 `ljmo`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=hangul-ljmo-before.png --features=-ljmo,-vjmo,-tjmo --background=FFFFFF00 /home/nate/SyncThing/fonts-external/temporary-and-testing/NotoSerifKR-Regular.otf --unicodes=110f,200b,1169,200b,d7d9

hb-view --font-size=110 --margin=2,16,2,16 --output-file=hangul-ljmo-after.png --features=+ljmo,-vjmo,-tjmo --background=FFFFFF00 /home/nate/SyncThing/fonts-external/temporary-and-testing/NotoSerifKR-Regular.otf --unicodes=110f,200b,1169,200b,d7d9

montage hangul-ljmo-before.png right-arrow.png hangul-ljmo-after.png -geometry +0+0 -background transparent hangul-ljmo.png


## 5.3 `vjmo`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=hangul-vjmo-before.png --features=+ljmo,-vjmo,-tjmo --background=FFFFFF00 /home/nate/SyncThing/fonts-external/temporary-and-testing/NotoSerifKR-Regular.otf --unicodes=110f,200b,1169,200b,d7d9

hb-view --font-size=110 --margin=2,16,2,16 --output-file=hangul-vjmo-after.png --features=+ljmo,+vjmo,-tjmo --background=FFFFFF00 /home/nate/SyncThing/fonts-external/temporary-and-testing/NotoSerifKR-Regular.otf --unicodes=110f,200b,1169,200b,d7d9

montage hangul-vjmo-before.png right-arrow.png hangul-vjmo-after.png -geometry +0+0 -background transparent hangul-vjmo.png


## 5.4 `tjmo`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=hangul-tjmo-before.png --features=+ljmo,+vjmo,-tjmo --background=FFFFFF00 /home/nate/SyncThing/fonts-external/temporary-and-testing/NotoSerifKR-Regular.otf --unicodes=110f,200b,1169,200b,d7d9

hb-view --font-size=110 --margin=2,16,2,16 --output-file=hangul-tjmo-after.png --features=+ljmo,+vjmo,+tjmo --background=FFFFFF00 /home/nate/SyncThing/fonts-external/temporary-and-testing/NotoSerifKR-Regular.otf --unicodes=110f,200b,1169,200b,d7d9

montage hangul-tjmo-before.png right-arrow.png hangul-tjmo-after.png -geometry +0+0 -background transparent hangul-tjmo.png


## 6. Tone marks

hb-view --font-size=110 --margin=2,16,2,16 --output-file=hangul-tone-before.png --features=+ljmo,+vjmo,-tjmo --background=FFFFFF00 /home/nate/SyncThing/fonts-external/temporary-and-testing/NotoSerifKR-Regular.otf --unicodes=1111,116b,11a8,200c,302f

hb-view --font-size=110 --margin=2,16,2,16 --output-file=hangul-tone-after.png --features=+ljmo,+vjmo,-tjmo --background=FFFFFF00 /home/nate/SyncThing/fonts-external/temporary-and-testing/NotoSerifKR-Regular.otf --unicodes=1111,116b,11a8,302f


================================================
FILE: images/hangul/hangul-svg-image-generation-log.md
================================================
# Commands used to generate the images in [opentype-shaping-hangul.md](../../opentype-shaping-hangul.md)

## Arrow general

hb-view --font-size=110 --output-file=right-arrow.svg --background=FFFFFF00 --margin=0,0,0,0 /usr/share/fonts/opentype/gentiumplus/GentiumPlus-R.ttf --unicodes=2192


> Note: always use `--features=-ljmo,-vjmo,-tjmo` in examples where
> the jamo features are not being explained. This may not be
> neccessary in all fonts.


## LV example

hb-view --font-size=110 --margin=2,16,2,16 --output-file=hangul-lv-syllable.svg --features=+ljmo,+vjmo,+tjmo --background=FFFFFF00 /home/nate/SyncThing/fonts-external/temporary-and-testing/NotoSerifKR-Regular.otf --unicodes=110e,1166


## LVT example

hb-view --font-size=110 --margin=2,16,2,16 --output-file=hangul-lvt-syllable.svg --features=+ljmo,+vjmo,+tjmo --background=FFFFFF00 /home/nate/SyncThing/fonts-external/temporary-and-testing/NotoSerifKR-Regular.otf --unicodes=110e,1166,11ae


## 3. Compose the syllable

hb-view --font-size=110 --margin=2,16,2,16 --output-file=hangul-compose-before.svg --features=-ljmo,-vjmo,-tjmo --background=FFFFFF00 /home/nate/SyncThing/fonts-external/temporary-and-testing/NotoSerifKR-Regular.otf --unicodes=1108,200b,1171,200b,11b8

hb-view --font-size=110 --margin=2,16,2,16 --output-file=hangul-compose-after.svg --features=+ljmo,+vjmo,+tjmo --background=FFFFFF00 /home/nate/SyncThing/fonts-external/temporary-and-testing/NotoSerifKR-Regular.otf --unicodes=1108,1171,11b8

svg_stack --direction=h hangul-compose-before.svg right-arrow.svg hangul-compose-after.svg > hangul-compose.svg


## 4. Decompose the syllable

hb-view --font-size=110 --margin=2,16,2,16 --output-file=hangul-decompose-before.svg --features=+ljmo,+vjmo,+tjmo --background=FFFFFF00 /home/nate/SyncThing/fonts-external/temporary-and-testing/NotoSerifKR-Regular.otf --unicodes=1106,1172,11af

hb-view --font-size=110 --margin=2,16,2,16 --output-file=hangul-decompose-after.svg --features=-ljmo,-vjmo,-tjmo --background=FFFFFF00 /home/nate/SyncThing/fonts-external/temporary-and-testing/NotoSerifKR-Regular.otf --unicodes=3141,200b,3160,200b,3139

svg_stack --direction=h hangul-decompose-before.svg right-arrow.svg hangul-decompose-after.svg > hangul-decompose.svg


## 5.2 `ljmo`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=hangul-ljmo-before.svg --features=-ljmo,-vjmo,-tjmo --background=FFFFFF00 /home/nate/SyncThing/fonts-external/temporary-and-testing/NotoSerifKR-Regular.otf --unicodes=110f,200b,1169,200b,11bb

hb-view --font-size=110 --margin=2,16,2,16 --output-file=hangul-ljmo-after.svg --features=+ljmo,-vjmo,-tjmo --background=FFFFFF00 /home/nate/SyncThing/fonts-external/temporary-and-testing/NotoSerifKR-Regular.otf --unicodes=110f,200b,1169,200b,11bb

svg_stack --direction=h hangul-ljmo-before.svg right-arrow.svg hangul-ljmo-after.svg > hangul-ljmo.svg


## 5.3 `vjmo`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=hangul-vjmo-before.svg --features=+ljmo,-vjmo,-tjmo --background=FFFFFF00 /home/nate/SyncThing/fonts-external/temporary-and-testing/NotoSerifKR-Regular.otf --unicodes=110f,200b,1169,200b,11bb

hb-view --font-size=110 --margin=2,16,2,16 --output-file=hangul-vjmo-after.svg --features=+ljmo,+vjmo,-tjmo --background=FFFFFF00 /home/nate/SyncThing/fonts-external/temporary-and-testing/NotoSerifKR-Regular.otf --unicodes=110f,1169,200b,11bb

svg_stack --direction=h hangul-vjmo-before.svg right-arrow.svg hangul-vjmo-after.svg > hangul-vjmo.svg


## 5.4 `tjmo`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=hangul-tjmo-before.svg --features=+ljmo,+vjmo,-tjmo --background=FFFFFF00 /home/nate/SyncThing/fonts-external/temporary-and-testing/NotoSerifKR-Regular.otf --unicodes=110f,1169,200b,11bb

hb-view --font-size=110 --margin=2,16,2,16 --output-file=hangul-tjmo-after.svg --features=+ljmo,+vjmo,+tjmo --background=FFFFFF00 /home/nate/SyncThing/fonts-external/temporary-and-testing/NotoSerifKR-Regular.otf --unicodes=110f,1169,11bb

svg_stack --direction=h hangul-tjmo-before.svg right-arrow.svg hangul-tjmo-after.svg > hangul-tjmo.svg


## 6. Tone marks

hb-view --font-size=110 --margin=2,16,2,16 --output-file=hangul-tone-before.svg --features=+ljmo,+vjmo,-tjmo --background=FFFFFF00 /home/nate/SyncThing/fonts-external/temporary-and-testing/NotoSerifKR-Regular.otf --unicodes=1111,116b,11a8,200c,302f

hb-view --font-size=110 --margin=2,16,2,16 --output-file=hangul-tone-after.svg --features=+ljmo,+vjmo,-tjmo --background=FFFFFF00 /home/nate/SyncThing/fonts-external/temporary-and-testing/NotoSerifKR-Regular.otf --unicodes=1111,116b,11a8,302f

svg_stack.py --direction=h hangul-tone-before.svg right-arrow.svg hangul-tone-after.svg > hangul-tone.svg


================================================
FILE: images/hebrew/hebrew-png-image-generation-log.md
================================================
# Commands used to generate the images in [opentype-shaping-hebrew.md](../../opentype-shaping-hebrew.md)

## Arrow general

hb-view --font-size=110 --output-file=right-arrow.png --background=FFFFFF00 --margin=0,0,0,0 /usr/share/fonts/opentype/gentiumplus/GentiumPlus-R.ttf --unicodes=2192

## 1.1 `ccmp`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=hebrew-ccmp-before.png --features=-ccmp --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifHebrew-Regular.ttf --unicodes=05b3,05bd

hb-view --font-size=110 --margin=2,24,2,24 --output-file=hebrew-ccmp-after.png --features=+ccmp --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifHebrew-Regular.ttf --unicodes=05b3,05bd

montage hebrew-ccmp-before.png right-arrow.png hebrew-ccmp-after.png -geometry +0+0 -background transparent hebrew-ccmp.png


## 2 Alphabetic Presentation Forms

> Note: Noto Sans Hebrew implements these compositions in a `ccmp` lookup.

hb-view --font-size=110 --margin=2,16,2,16 --output-file=hebrew-apf-before.png --features=-ccmp --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifHebrew-Regular.ttf --unicodes=05e7,05bc

hb-view --font-size=110 --margin=2,16,2,16 --output-file=hebrew-apf-after.png --features=+ccmp --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifHebrew-Regular.ttf --unicodes=05e7,05bc

montage hebrew-apf-before.png right-arrow.png hebrew-apf-after.png -geometry +0+0 -background transparent hebrew-apf.png


## 4.1 `liga`

hb-view --font-size=110 --margin=2,16,2,24 --output-file=hebrew-liga-before.png --features=-liga --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifHebrew-Regular.ttf --unicodes=fb4f,05b1

hb-view --font-size=110 --margin=2,16,2,16 --output-file=hebrew-liga-after.png --features=+liga --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifHebrew-Regular.ttf --unicodes=fb4f,05b1

montage hebrew-liga-before.png right-arrow.png hebrew-liga-after.png -geometry +0+0 -background transparent hebrew-liga.png


## 4.2 `dlig`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=hebrew-dlig-before.png --features=-dlig --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifHebrew-Regular.ttf --unicodes=05d0,05dc

hb-view --font-size=110 --margin=2,16,2,16 --output-file=hebrew-dlig-after.png --features=+dlig --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifHebrew-Regular.ttf --unicodes=05d0,05dc

montage hebrew-dlig-before.png right-arrow.png hebrew-dlig-after.png -geometry +0+0 -background transparent hebrew-dlig.png


## 5.1 `kern`

> Note: Noto Sans Hebrew has `kern` lookups, but so far I have not
> been able to identify an easily visible example pair.


## 5.2 `mark`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=hebrew-mark-before.png --features=-mark --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifHebrew-Regular.ttf --unicodes=05e7,059a

hb-view --font-size=110 --margin=2,16,2,16 --output-file=hebrew-mark-after.png --features=+mark --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifHebrew-Regular.ttf --unicodes=05e7,059a

montage hebrew-mark-before.png right-arrow.png hebrew-mark-after.png -geometry +0+0 -background transparent hebrew-mark.png


================================================
FILE: images/hebrew/hebrew-svg-image-generation-log.md
================================================
# Commands used to generate the images in [opentype-shaping-hebrew.md](../../opentype-shaping-hebrew.md)

## Arrow general

hb-view --font-size=110 --output-file=right-arrow.svg --background=FFFFFF00 --margin=0,0,0,0 /usr/share/fonts/opentype/gentiumplus/GentiumPlus-R.ttf --unicodes=2192

## 1.1 `ccmp`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=hebrew-ccmp-before.svg --features=-ccmp --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifHebrew-Regular.otf --unicodes=05d5,05c1

hb-view --font-size=110 --margin=2,24,2,24 --output-file=hebrew-ccmp-after.svg --features=+ccmp --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifHebrew-Regular.otf --unicodes=05d5,05c1

svg_stack.py --direction=h hebrew-ccmp-before.svg right-arrow.svg hebrew-ccmp-after.svg > hebrew-ccmp.svg


## 2 Alphabetic Presentation Forms

> Note: Noto Sans Hebrew implements these compositions in a `ccmp`
> lookup. Noto Serif Hebrew does not.

hb-view --font-size=110 --margin=2,16,2,16 --output-file=hebrew-apf-before.svg --features=-ccmp --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifHebrew-Regular.otf --unicodes=05d9,05b4

hb-view --font-size=110 --margin=2,16,2,16 --output-file=hebrew-apf-after.svg --features=+ccmp --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifHebrew-Regular.otf --unicodes=fb1d

svg_stack.py --direction=h hebrew-apf-before.svg right-arrow.svg hebrew-apf-after.svg > hebrew-apf.svg


## 4.1 `liga`

hb-view --font-size=110 --margin=2,16,2,24 --output-file=hebrew-liga-before.svg --features=-liga --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifHebrew-Regular.otf --unicodes=25cc,05b1,200d,05bd

hb-view --font-size=110 --margin=2,16,2,16 --output-file=hebrew-liga-after.svg --features=+liga --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifHebrew-Regular.otf --unicodes=25cc,05b1,200d,05bd

svg_stack.py --direction=h hebrew-liga-before.svg right-arrow.svg hebrew-liga-after.svg > hebrew-liga.svg


## `calt`

> The `calt` feature clearly gets applied; it's unclear why it was
> left off of the list in the initial release of this document.

hb-view --font-size=110 --margin=2,16,2,24 --output-file=hebrew-calt-before.svg --features=-calt --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifHebrew-Regular.otf --unicodes=05dc,05b4,05b8,05dd

hb-view --font-size=110 --margin=2,16,2,16 --output-file=hebrew-calt-after.svg --features=+calt --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifHebrew-Regular.otf --unicodes=05dc,05b4,05b8,05dd

svg_stack.py --direction=h hebrew-calt-before.svg right-arrow.svg hebrew-calt-after.svg > hebrew-calt.svg


## 4.2 `dlig`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=hebrew-dlig-before.svg --features=-dlig --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifHebrew-Regular.otf --unicodes=05d0,05dc

hb-view --font-size=110 --margin=2,16,2,16 --output-file=hebrew-dlig-after.svg --features=+dlig --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifHebrew-Regular.otf --unicodes=05d0,05dc

svg_stack.py --direction=h hebrew-dlig-before.svg right-arrow.svg hebrew-dlig-after.svg > hebrew-dlig.svg


## 5.1 `kern`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=hebrew-kern-before.svg --features=-kern --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifHebrew-Regular.otf --unicodes=05d2,05e2

hb-view --font-size=110 --margin=2,16,2,16 --output-file=hebrew-kern-after.svg --features=+kern --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifHebrew-Regular.otf --unicodes=05d2,05e2

svg_stack.py --direction=h hebrew-kern-before.svg right-arrow.svg hebrew-kern-after.svg > hebrew-kern.svg


## 5.2 `mark`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=hebrew-mark-before.svg --features=-mark --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifHebrew-Regular.otf --unicodes=05d3,05b1

hb-view --font-size=110 --margin=2,16,2,16 --output-file=hebrew-mark-after.svg --features=+mark --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifHebrew-Regular.otf --unicodes=05d3,05b1

svg_stack.py --direction=h hebrew-mark-before.svg right-arrow.svg hebrew-mark-after.svg > hebrew-mark.svg


================================================
FILE: images/images-index.md
================================================

# Images #

This section includes a separate subdirectory for each script,
containing the images included in the relevant script-shaping document.

Also included in each directory is a log file containing the exact
commands used to generate the images.

<abbr title="Portable Network Graphics">PNG</abbr> glyph images are generated using the `hb-view` utility from
Harfbuzz and the `montage` utility from ImageMagick. The commands were
run on a Linux-based system but, apart from minor differences in the
file-path to the font file specified, should be completely
reproducible on other operating systems.

<abbr title="Scalable Vector Graphics">SVG</abbr> glyph images are generated using the `hb-view` utility from
Harfbuzz and the [`svg_stack`](https://github.com/astraw/svg_stack/)
Python utility. The commands were run on a Linux-based system but,
apart from minor differences in the file-path to the font file
specified, should be completely reproducible on other operating
systems.

Long-term, the <abbr title="Portable Network Graphics">PNG</abbr> images will be replaced by <abbr title="Scalable Vector Graphics">SVG</abbr> images &mdash;
although, at present, there are still some images that are generated
in <abbr title="Portable Network Graphics">PNG</abbr> form (because kinks remain to be worked out in the <abbr title="Scalable Vector Graphics">SVG</abbr>-image
alignment process and the corresponding CSS styling).

The font files used must be publicly and freely available, open-source
fonts. By default, the Noto fonts from Google are the starting point.

A list of the fonts used to generate the latest version of the images
is provided in the [example-fonts.txt](https://github.com/n8willis/opentype-shaping-documents/blob/master/images/example-fonts.txt) file, with
URLs and <abbr title="Secure Hash Algorithm">SHA</abbr> checksums for each file.

The image file names follow a simple, but important, pattern:

    _script_-_featureillustrated_.png
	
Intermediary images copy the pattern but append _-before_ or _-after_
when depicting the before-or-after state of an applied OpenType
feature, or some other suffix as appropriate.

If you are suggesting an update to an image, please utilize the same
commands and general syntax. If you are suggesting adding a new image,
please also follow the file-name pattern. Patches to the image-generation log for
each script are appreciated, in order to keep the log up-to-date.

  - Indic
      - [Devanagari](https://github.com/n8willis/opentype-shaping-documents/blob/master/images/devanagari/devanagari-svg-image-generation-log.md)
      - [Bengali](https://github.com/n8willis/opentype-shaping-documents/blob/master/images/bengali/bengali-svg-image-generation-log.md)
      - [Gujarati](https://github.com/n8willis/opentype-shaping-documents/blob/master/images/gujarati/gujarati-svg-image-generation-log.md)
      - [Gurmukhi](https://github.com/n8willis/opentype-shaping-documents/blob/master/images/gurmukhi/gurmukhi-svg-image-generation-log.md)
      - [Kannada](https://github.com/n8willis/opentype-shaping-documents/blob/master/images/kannada/kannada-svg-image-generation-log.md)
      - [Malayalam](https://github.com/n8willis/opentype-shaping-documents/blob/master/images/malayalam/malayalam-svg-image-generation-log.md)
      - [Oriya](https://github.com/n8willis/opentype-shaping-documents/blob/master/images/oriya/oriya-svg-image-generation-log.md)
      - [Tamil](https://github.com/n8willis/opentype-shaping-documents/blob/master/images/tamil/tamil-svg-image-generation-log.md)
      - [Telugu](https://github.com/n8willis/opentype-shaping-documents/blob/master/images/telugu/telugu-svg-image-generation-log.md)
      - [Sinhala](https://github.com/n8willis/opentype-shaping-documents/blob/master/images/sinhala/sinhala-svg-image-generation-log.md)
  - Brahmi-derived
	  - [Khmer](https://github.com/n8willis/opentype-shaping-documents/blob/master/images/khmer/khmer-svg-image-generation-log.md)
	  - [Lao](https://github.com/n8willis/opentype-shaping-documents/blob/master/images/thai-lao/thai-lao-svg-image-generation-log.md)
	  - [Myanmar](https://github.com/n8willis/opentype-shaping-documents/blob/master/images/myanmar/myanmar-svg-image-generation-log.md)
	  - [Thai](https://github.com/n8willis/opentype-shaping-documents/blob/master/images/thai-lao/thai-lao-svg-image-generation-log.md)
	  - [Tibetan](https://github.com/n8willis/opentype-shaping-documents/blob/master/images/tibetan/tibetan-svg-image-generation-log.md)
  - Arabic
      - [Arabic](https://github.com/n8willis/opentype-shaping-documents/blob/master/images/arabic/arabic-svg-image-generation-log.md)
      - [Syriac](https://github.com/n8willis/opentype-shaping-documents/blob/master/images/syriac/syriac-svg-image-generation-log.md)
      - [N'Ko](https://github.com/n8willis/opentype-shaping-documents/blob/master/images/nko/nko-svg-image-generation-log.md)
      - [Mongolian](https://github.com/n8willis/opentype-shaping-documents/blob/master/images/mongolian/mongolian-svg-image-generation-log.md)
  - Hangul
      - [Hangul](https://github.com/n8willis/opentype-shaping-documents/blob/master/images/hangul/hangul-svg-image-generation-log.md)
  - Hebrew
      - [Hebrew](https://github.com/n8willis/opentype-shaping-documents/blob/master/images/hebrew/hebrew-svg-image-generation-log.md)
  - Emoji
      - [Emoji](https://github.com/n8willis/opentype-shaping-documents/blob/master/images/emoji/emoji-png-image-generation-log.md)


================================================
FILE: images/kannada/kannada-png-image-generation-log.md
================================================
# Commands used to generate the images in [opentype-shaping-kannada.md](../../opentype-shaping-kannada.md)

## Arrow general

hb-view --font-size=110 --output-file=right-arrow.png --background=FFFFFF00 --margin=0,0,0,0 /usr/share/fonts/opentype/gentiumplus/GentiumPlus-R.ttf --unicodes=2192

## 2.2 Matra decomposition

hb-view --font-size=110 --margin=2,16,2,16 --output-file=kannada-matra-decomposition-before.png --features= --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifKannada-Regular.ttf --unicodes=0cc8

hb-view --font-size=110 --margin=2,16,2,16 --output-file=kannada-matra-decomposition-after.png --features= --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifKannada-Regular.ttf --unicodes=0cc6,25cc,0cd6

montage kannada-matra-decomposition-before.png right-arrow.png kannada-matra-decomposition-after.png -geometry +0+0 -background transparent kannada-matra-decomposition.png


## 3.2 `nukt`

> Note: Noto Serif Kannada implements this in `blwm` for unknown
> reasons.

hb-view --font-size=110 --margin=2,32,2,16 --output-file=kannada-nukt-before.png --features=-blwm --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifKannada-Regular.ttf --unicodes=0cab,25cc,0cbc

hb-view --font-size=110 --margin=2,16,2,16 --output-file=kannada-nukt-after.png  --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifKannada-Regular.ttf --unicodes=0cab,0cbc

montage kannada-nukt-before.png right-arrow.png kannada-nukt-after.png -geometry +0+0 -background transparent kannada-nukt.png


## 3.3 `akhn`

> Note: Noto Serif Kannada implements this in both `akhn` and in
> `blwf` for unknown reasons.

### KSsa

hb-view --font-size=110 --margin=2,16,2,16 --output-file=kannada-akhn-kssa-before.png --features=-akhn,-blwf --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifKannada-Regular.ttf --unicodes=0c95,0ccd,0cb7

hb-view --font-size=110 --margin=2,16,2,16 --output-file=kannada-akhn-kssa-after.png --features=+akhn, --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifKannada-Regular.ttf --unicodes=0c95,0ccd,0cb7

montage kannada-akhn-kssa-before.png right-arrow.png kannada-akhn-kssa-after.png -geometry +0+0 -background transparent kannada-akhn-kssa.png


### JNya

hb-view --font-size=110 --margin=2,16,2,16 --output-file=kannada-akhn-jnya-before.png --features=-akhn,-blwf --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifKannada-Regular.ttf --unicodes=0c9c,0ccd,0c9e

hb-view --font-size=110 --margin=2,16,2,16 --output-file=kannada-akhn-jnya-after.png --features=+akhn, --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifKannada-Regular.ttf --unicodes=0c9c,0ccd,0c9e

montage kannada-akhn-jnya-before.png right-arrow.png kannada-akhn-jnya-after.png -geometry +0+0 -background transparent kannada-akhn-jnya.png


## 3.4 `rphf`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=kannada-rphf-before.png --features=-rphf --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifKannada-Regular.ttf --unicodes=0cb0,0ccd,25cc

hb-view --font-size=110 --margin=2,16,2,16 --output-file=kannada-rphf-after.png --features=+rphf --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifKannada-Regular.ttf --unicodes=0cb0,0ccd,25cc

montage kannada-rphf-before.png right-arrow.png kannada-rphf-after.png -geometry +0+0 -background transparent kannada-rphf.png


## 3.7 `blwf`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=kannada-blwf-before.png --features=-blwf --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifKannada-Regular.ttf --unicodes=25cc,0ccd,0ca1

hb-view --font-size=110 --margin=2,16,2,16 --output-file=kannada-blwf-after.png --features=+blwf --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifKannada-Regular.ttf --unicodes=25cc,0ccd,0ca1

montage kannada-blwf-before.png right-arrow.png kannada-blwf-after.png -geometry +0+0 -background transparent kannada-blwf.png


## 4.3 Reph positioning

hb-view --font-size=110 --margin=2,16,2,16 --output-file=kannada-reph-position-before.png  --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifKannada-Regular.ttf --unicodes=0cb0,0ccd,25cc,0cad,0ccd,0cb3,0cc2

hb-view --font-size=110 --margin=2,16,2,16 --output-file=kannada-reph-position-after.png  --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifKannada-Regular.ttf --unicodes=0cb0,0ccd,0cad,0ccd,0cb3,0cc2

montage kannada-reph-position-before.png right-arrow.png kannada-reph-position-after.png -geometry +0+0 -background transparent kannada-reph-position.png


## 5 `pres`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=kannada-pres-before.png --features=-pres --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifKannada-Regular.ttf --unicodes=0cb5,25cc,0cc1

hb-view --font-size=110 --margin=2,16,2,16 --output-file=kannada-pres-after.png --features=+pres --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifKannada-Regular.ttf --unicodes=0cb5,0cc1

montage kannada-pres-before.png right-arrow.png kannada-pres-after.png -geometry +0+0 -background transparent kannada-pres.png


## 5 `abvs`

> Note: Noto Serif Kannada has some abvs-like substituations in `pres`
> lookup 14 (via single-sub lookup 23), but I have not yet figured out
> whether they are,
> linguistically speaking, actually above-base features. Thus, they are
> included here, but might not be used in the shaping document.

hb-view --font-size=110 --margin=2,16,2,16 --output-file=kannada-abvs-before.png --features=-pres --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifKannada-Regular.ttf --unicodes=0ca3,0ccc

hb-view --font-size=110 --margin=2,16,2,16 --output-file=kannada-abvs-after.png --features=+pres --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifKannada-Regular.ttf --unicodes=0ca3,0ccc

montage kannada-abvs-before.png right-arrow.png kannada-abvs-after.png -geometry +0+0 -background transparent kannada-abvs.png


## 5 `blws`

> Note: Note Serif Kannada has some blws-like substitutions in 
> `pres` lookup 12 (via contextual chaining lookups 7 and 8 (via
> single-sub lookups 19 and 20)), but I have not yet figured out
> whether they are, linguistically speaking, actually above-base
> features. Thus, they are included here, but might not be used in the
> shaping document.

hb-view --font-size=110 --margin=2,16,2,16 --output-file=kannada-blws-before.png --features=-pres --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifKannada-Regular.ttf --unicodes=0c95,0ccd,0cb7,0cc1

hb-view --font-size=110 --margin=2,16,2,16 --output-file=kannada-blws-after.png --features=+pres --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifKannada-Regular.ttf --unicodes=0c95,0ccd,0cb7,0cc1

montage kannada-blws-before.png right-arrow.png kannada-blws-after.png -geometry +0+0 -background transparent kannada-blws.png

## 5 `psts`

> Note: Noto Serif Kannada has some psts-like lookups in `pres` lookup 12 (via single-sub lookup 21 and 22 (via contextual chaining lookup
> 9 and 10)), but I have not yet figured out whether they are,
> linguistically speaking, actually post-base features. Thus, they are
> included here, but might not be used in the shaping document.

hb-view --font-size=110 --margin=2,16,2,16 --output-file=kannada-psts-before.png --features=-pres --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifKannada-Regular.ttf --unicodes=0c95,0cbe

hb-view --font-size=110 --margin=2,16,2,16 --output-file=kannada-psts-after.png --features=+pres --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifKannada-Regular.ttf --unicodes=0c95,0cbe

montage kannada-psts-before.png right-arrow.png kannada-psts-after.png -geometry +0+0 -background transparent kannada-psts.png


## 5 `haln`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=kannada-haln-before.png --features=-haln --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifKannada-Regular.ttf --unicodes=0c98,0ccd

hb-view --font-size=110 --margin=2,16,2,16 --output-file=kannada-haln-after.png --features=+haln --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifKannada-Regular.ttf --unicodes=0c98,0ccd

montage kannada-haln-before.png right-arrow.png kannada-haln-after.png -geometry +0+0 -background transparent kannada-haln.png


## 6 `abvm`

> Note: Noto Serif Kannada does not include an `abvm` feature.


## 6 `blwm`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=kannada-blwm-before.png --features=-blwm,-pres --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifKannada-Regular.ttf --unicodes=0cab,0cc1,0cbc

hb-view --font-size=110 --margin=2,16,2,16 --output-file=kannada-blwm-after.png --features=+blwm --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifKannada-Regular.ttf --unicodes=0cab,0cc1,0cbc

montage kannada-blwm-before.png right-arrow.png kannada-blwm-after.png -geometry +0+0 -background transparent kannada-blwm.png


================================================
FILE: images/kannada/kannada-svg-image-generation-log.md
================================================
# Commands used to generate the images in [opentype-shaping-kannada.md](../../opentype-shaping-kannada.md)

## Arrow general

hb-view --font-size=110 --output-file=right-arrow.svg --background=FFFFFF00 --margin=0,0,0,0 /usr/share/fonts/opentype/gentiumplus/GentiumPlus-R.ttf --unicodes=2192

## 2.2 Matra decomposition

hb-view --font-size=110 --margin=2,16,2,16 --output-file=kannada-matra-decomposition-before.svg --features= --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifKannada-Regular.otf --unicodes=0cc8

hb-view --font-size=110 --margin=2,16,2,16 --output-file=kannada-matra-decomposition-after.svg --features= --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifKannada-Regular.otf --unicodes=0cc6,25cc,0cd6

svg_stack.py --direction=h kannada-matra-decomposition-before.svg right-arrow.svg kannada-matra-decomposition-after.svg > kannada-matra-decomposition.svg


## 3.2 `nukt`

> Note: Noto Serif Kannada implements this in `blwm` for unknown
> reasons.

hb-view --font-size=110 --margin=2,32,2,16 --output-file=kannada-nukt-before.svg --features=-blwm --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifKannada-Regular.otf --unicodes=0cab,25cc,0cbc

hb-view --font-size=110 --margin=2,16,2,16 --output-file=kannada-nukt-after.svg  --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifKannada-Regular.otf --unicodes=0cab,0cbc

svg_stack.py --direction=h kannada-nukt-before.svg right-arrow.svg kannada-nukt-after.svg > kannada-nukt.svg


## 3.3 `akhn`

> Note: Noto Serif Kannada implements this in both `akhn` and in
> `blwf` for unknown reasons.

### KSsa

hb-view --font-size=110 --margin=2,16,2,16 --output-file=kannada-akhn-kssa-before.svg --features=-akhn,-blwf --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifKannada-Regular.otf --unicodes=0c95,0ccd,0cb7

hb-view --font-size=110 --margin=2,16,2,16 --output-file=kannada-akhn-kssa-after.svg --features=+akhn, --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifKannada-Regular.otf --unicodes=0c95,0ccd,0cb7

svg_stack.py --direction=h kannada-akhn-kssa-before.svg right-arrow.svg kannada-akhn-kssa-after.svg > kannada-akhn-kssa.svg


### JNya

hb-view --font-size=110 --margin=2,16,2,16 --output-file=kannada-akhn-jnya-before.svg --features=-akhn,-blwf --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifKannada-Regular.otf --unicodes=0c9c,0ccd,0c9e

hb-view --font-size=110 --margin=2,16,2,16 --output-file=kannada-akhn-jnya-after.svg --features=+akhn, --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifKannada-Regular.otf --unicodes=0c9c,0ccd,0c9e

svg_stack.py --direction=h kannada-akhn-jnya-before.svg right-arrow.svg kannada-akhn-jnya-after.svg > kannada-akhn-jnya.svg


## 3.4 `rphf`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=kannada-rphf-before.svg --features=-rphf --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifKannada-Regular.otf --unicodes=0cb0,0ccd,25cc

hb-view --font-size=110 --margin=2,16,2,16 --output-file=kannada-rphf-after.svg --features=+rphf --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifKannada-Regular.otf --unicodes=0cb0,0ccd,25cc

svg_stack.py --direction=h kannada-rphf-before.svg right-arrow.svg kannada-rphf-after.svg > kannada-rphf.svg


## 3.7 `blwf`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=kannada-blwf-before.svg --features=-blwf --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifKannada-Regular.otf --unicodes=25cc,0ccd,0ca1

hb-view --font-size=110 --margin=2,24,2,16 --output-file=kannada-blwf-after.svg --features=+blwf --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifKannada-Regular.otf --unicodes=25cc,0ccd,0ca1

svg_stack.py --direction=h kannada-blwf-before.svg right-arrow.svg kannada-blwf-after.svg > kannada-blwf.svg


## 4.3 Reph positioning

hb-view --font-size=110 --margin=2,16,2,16 --output-file=kannada-reph-position-before.svg  --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifKannada-Regular.otf --unicodes=0cb0,0ccd,25cc,0cad,0ccd,0cb3,0cc2

hb-view --font-size=110 --margin=2,16,2,16 --output-file=kannada-reph-position-after.svg  --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifKannada-Regular.otf --unicodes=0cb0,0ccd,0cad,0ccd,0cb3,0cc2

svg_stack.py --direction=h kannada-reph-position-before.svg right-arrow.svg kannada-reph-position-after.svg > kannada-reph-position.svg


## 5 `pres`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=kannada-pres-before.svg --features=-pres --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifKannada-Regular.otf --unicodes=0cb5,25cc,0cc1

hb-view --font-size=110 --margin=2,16,2,16 --output-file=kannada-pres-after.svg --features=+pres --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifKannada-Regular.otf --unicodes=0cb5,0cc1

svg_stack.py --direction=h kannada-pres-before.svg right-arrow.svg kannada-pres-after.svg > kannada-pres.svg


## 5 `abvs`

> Note: Noto Serif Kannada has some abvs-like substituations in
> `psts` lookups for unknown reasons. This is one.

hb-view --font-size=110 --margin=2,16,2,16 --output-file=kannada-abvs-before.svg --features=-psts --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifKannada-Regular.otf --unicodes=0ca3,0ccd

hb-view --font-size=110 --margin=2,16,2,16 --output-file=kannada-abvs-after.svg --features=+psts --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifKannada-Regular.otf --unicodes=0ca3,0ccd

svg_stack.py --direction=h kannada-abvs-before.svg right-arrow.svg kannada-abvs-after.svg > kannada-abvs.svg


## 5 `blws`

> Note: Note Serif Kannada has some blws-like substitutions in 
> `psts` lookups for unknown reasons. This is one.

hb-view --font-size=110 --margin=2,16,2,16 --output-file=kannada-blws-before.svg --features=-blws --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifKannada-Regular.otf --unicodes=0c95,0ccd,0ca4,0ccd,0caf

hb-view --font-size=110 --margin=2,16,2,16 --output-file=kannada-blws-after.svg --features=+blws --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifKannada-Regular.otf --unicodes=0c95,0ccd,0ca4,0ccd,0caf

svg_stack.py --direction=h kannada-blws-before.svg right-arrow.svg kannada-blws-after.svg > kannada-blws.svg

## 5 `psts`

> Note: Noto Serif Kannada has moved many `pres` lookups into `psts`
> in the most recent release.

hb-view --font-size=110 --margin=2,16,2,16 --output-file=kannada-psts-before.svg --features=-pres,-psts --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifKannada-Regular.otf --unicodes=25cc,0ca4,0cbf

hb-view --font-size=110 --margin=2,16,2,16 --output-file=kannada-psts-after.svg --features=+pres,+psts --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifKannada-Regular.otf --unicodes=25cc,0ca4,0cbf

svg_stack.py --direction=h kannada-psts-before.svg right-arrow.svg kannada-psts-after.svg > kannada-psts.svg


## 5 `haln`

> Note: Noto Serif Kannada does not include a `haln` feature. Similar
> behavior is found in `psts`.

hb-view --font-size=110 --margin=2,16,2,16 --output-file=kannada-haln-before.svg --features=-haln,-psts --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifKannada-Regular.otf --unicodes=0c98,0ccd

hb-view --font-size=110 --margin=2,16,2,16 --output-file=kannada-haln-after.svg --features=+haln,+psts --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifKannada-Regular.otf --unicodes=0c98,0ccd

svg_stack.py --direction=h kannada-haln-before.svg right-arrow.svg kannada-haln-after.svg > kannada-haln.svg


## 6 `dist`

hb-view --font-size=110 --margin=2,32,2,16 --output-file=kannada-dist-before.svg --features=-dist,-pres --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifKannada-Regular.otf --unicodes=0c93,0cf3

hb-view --font-size=110 --margin=2,16,2,16 --output-file=kannada-dist-after.svg --features=+dist --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifKannada-Regular.otf --unicodes=0c93,0cf3

svg_stack.py --direction=h kannada-dist-before.svg right-arrow.svg kannada-dist-after.svg > kannada-dist.svg


## 6 `abvm`

> Note: Noto Serif Kannada does not include an `abvm` feature.


## 6 `blwm`

hb-view --font-size=110 --margin=2,32,2,16 --output-file=kannada-blwm-before.svg --features=-blwm,-pres --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifKannada-Regular.otf --unicodes=0cab,0cc1,0cbc

hb-view --font-size=110 --margin=2,16,2,16 --output-file=kannada-blwm-after.svg --features=+blwm --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifKannada-Regular.otf --unicodes=0cab,0cc1,0cbc

svg_stack.py --direction=h kannada-blwm-before.svg right-arrow.svg kannada-blwm-after.svg > kannada-blwm.svg


================================================
FILE: images/khmer/khmer-png-image-generation-log.md
================================================
# Commands used to generate the images in [opentype-shaping-khmer.md](../../opentype-shaping-khmer.md)

## Arrow general

hb-view --font-size=110 --output-file=right-arrow.png --background=FFFFFF00 --margin=0,0,0,0 /usr/share/fonts/opentype/gentiumplus/GentiumPlus-R.ttf --unicodes=2192

## Terminology

### Coeng forms

hb-view --font-size=110 --margin=2,16,2,16 --output-file=khmer-coeng-kha-before.png --features=-blwf,-pstf --preserve-default-ignorables --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifKhmer-Regular.ttf --unicodes=17d2,1783

hb-view --font-size=110 --margin=2,16,2,16 --output-file=khmer-coeng-kha-after.png --features=+blwf,+pstf --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifKhmer-Regular.ttf --unicodes=17d2,1783

montage khmer-coeng-kha-before.png right-arrow.png khmer-coeng-kha-after.png -geometry +0+0 -background transparent khmer-coeng-kha.png


## The khmr shaping model

### Robat

hb-view --font-size=110 --margin=2,16,2,16 --output-file=khmer-robat.png --features=+mark --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifKhmer-Regular.ttf --unicodes=25cc,17cc


## 2.2 Matra decomposition

> Note: Noto Serif Khmer has decompositions for the
> non-canonical-in-Unicode multi-part matras implemented in `psts`
> features, but I have not figured out how to activate them in isolation.

hb-view --font-size=110 --margin=2,16,2,16 --output-file=khmer-matra-decomposition-before.png --features=-ccmp --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifKhmer-Regular.ttf --unicodes=17c4

hb-view --font-size=110 --margin=2,16,2,16 --output-file=khmer-matra-decomposition-after.png --features=+ccmp,+psts --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifKhmer-Regular.ttf --unicodes=17c1,25cc,17b6

montage khmer-matra-decomposition-before.png right-arrow.png khmer-matra-decomposition-after.png -geometry +0+0 -background transparent khmer-matra-decomposition.png


## 2.4 Pre-base-reordering Ro

hb-view --font-size=110 --margin=2,16,2,16 --output-file=khmer-pref-before.png --features=-pref --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifKhmer-Regular.ttf --unicodes=1786,25cc,17d2,179a

hb-view --font-size=110 --margin=2,16,2,16 --output-file=khmer-pref-after.png --features=+pref --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifKhmer-Regular.ttf --unicodes=1786,17d2,179a

montage khmer-pref-before.png right-arrow.png khmer-pref-after.png -geometry +0+0 -background transparent khmer-pref.png


## 3.1 `locl`

No examples found in Noto Khmer.


## 3.2 `ccmp`

No examples found in Noto Khmer.


## 3.3 `pref`

Same as pre-base-reordering Ro.


## 3.4 `blwf`

> Note: altered bottom-margin number to fit.

hb-view --font-size=110 --margin=2,16,32,16 --output-file=khmer-blwf-before.png --features=-blwf --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifKhmer-Regular.ttf --unicodes=178c,25cc,17d2,17af

hb-view --font-size=110 --margin=2,16,32,16 --output-file=khmer-blwf-after.png --features=+blwf --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifKhmer-Regular.ttf --unicodes=178c,17d2,17af

montage khmer-blwf-before.png right-arrow.png khmer-blwf-after.png -geometry +0+0 -background transparent khmer-blwf.png


## 3.5 `abvf`

> Note: Noto Serif Khmer implements this as a `abvs` lookup.

hb-view --font-size=110 --margin=2,16,2,16 --output-file=khmer-abvf-before.png --features=-abvf,-abvs --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifKhmer-Regular.ttf --unicodes=179c,17ca

hb-view --font-size=110 --margin=2,16,2,16 --output-file=khmer-abvf-after.png --features=+abvf --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifKhmer-Regular.ttf --unicodes=179c,17ca

montage khmer-abvf-before.png right-arrow.png khmer-abvf-after.png -geometry +0+0 -background transparent khmer-abvf.png

## 3.6 `pstf`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=khmer-pstf-before.png --features=-pstf --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifKhmer-Regular.ttf --unicodes=25cc,17d2,179f

hb-view --font-size=110 --margin=2,16,2,16 --output-file=khmer-pstf-after.png --features=+pstf --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifKhmer-Regular.ttf --unicodes=25cc,17d2,179f

montage khmer-pstf-before.png right-arrow.png khmer-pstf-after.png -geometry +0+0 -background transparent khmer-pstf.png

## 3.7 `cfar`

No examples found in Noto Khmer.


## 4 `pres`

> Note: Adjusted bottom margin.

hb-view --font-size=110 --margin=2,16,32,16 --output-file=khmer-pres-before.png --features=-pres --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifKhmer-Regular.ttf --unicodes=1780,17d2,179a,17d2,1781

hb-view --font-size=110 --margin=2,16,32,16 --output-file=khmer-pres-after.png --features=+pres --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifKhmer-Regular.ttf --unicodes=1780,17d2,179a,17d2,1781

montage khmer-pres-before.png right-arrow.png khmer-pres-after.png -geometry +0+0 -background transparent khmer-pres.png


## 4 `blws`

hb-view --font-size=110 --margin=2,16,48,16 --output-file=khmer-blws-before.png --features=-blws --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifKhmer-Regular.ttf --unicodes=1789,17d2,1780

hb-view --font-size=110 --margin=2,16,48,16 --output-file=khmer-blws-after.png --features=+blws --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifKhmer-Regular.ttf --unicodes=1789,17d2,1780

montage khmer-blws-before.png right-arrow.png khmer-blws-after.png -geometry +0+0 -background transparent khmer-blws.png


## 4 `abvs`

hb-view --font-size=110 --margin=32,16,2,16 --output-file=khmer-abvs-before.png --features=-abvs --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifKhmer-Regular.ttf --unicodes=1796,17b7,17cd

hb-view --font-size=110 --margin=32,16,2,16 --output-file=khmer-abvs-after.png --features=+abvs --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifKhmer-Regular.ttf --unicodes=1796,17b7,17cd

montage khmer-abvs-before.png right-arrow.png khmer-abvs-after.png -geometry +0+0 -background transparent khmer-abvs.png


## 4 `psts`

> Note: Adjusted bottom margin.

hb-view --font-size=110 --margin=2,16,48,16 --output-file=khmer-psts-before.png --features=-psts --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifKhmer-Regular.ttf --unicodes=1787,17d2,1785,17d2,1788

hb-view --font-size=110 --margin=2,16,48,16 --output-file=khmer-psts-after.png --features=+psts --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifKhmer-Regular.ttf --unicodes=1787,17d2,1785,17d2,1788


## 4 `clig`

> Note: Noto Serif Khmer implements this twice, in clig and liga.

hb-view --font-size=110 --margin=2,16,2,16 --output-file=khmer-clig-before.png --features=-clig,-liga --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifKhmer-Regular.ttf --unicodes=1780,17b6

hb-view --font-size=110 --margin=2,16,2,16 --output-file=khmer-clig-after.png --features=+clig,+liga --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifKhmer-Regular.ttf --unicodes=1780,17b6

montage khmer-clig-before.png right-arrow.png khmer-clig-after.png -geometry +0+0 -background transparent khmer-clig.png


## 4 `liga`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=khmer-liga-before.png --features=-clig,-liga --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifKhmer-Regular.ttf --unicodes=179c,17d2,1788,17c5

hb-view --font-size=110 --margin=2,16,2,16 --output-file=khmer-liga-after.png --features=+clig,+liga --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifKhmer-Regular.ttf --unicodes=179c,17d2,1788,17c5

montage khmer-liga-before.png right-arrow.png khmer-liga-after.png -geometry +0+0 -background transparent khmer-liga.png


## 5 `dist`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=khmer-dist-before.png --features=-dist --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifKhmer-Regular.ttf --unicodes=179e,17d2,1798,179a,17bc

hb-view --font-size=110 --margin=2,16,2,16 --output-file=khmer-dist-after.png --features=+dist --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifKhmer-Regular.ttf --unicodes=179e,17d2,1798,179a,17bc

montage khmer-dist-before.png right-arrow.png khmer-dist-after.png -geometry +0+0 -background transparent khmer-dist.png


## 5 `kern`

No examples found in Noto Khmer....


## 5 `blwm`

> Note: adjusted bottom margin.

hb-view --font-size=110 --margin=2,16,48,16 --output-file=khmer-blwm-before.png --features=-blwm --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifKhmer-Regular.ttf --unicodes=1789,17bc

hb-view --font-size=110 --margin=2,16,48,16 --output-file=khmer-blwm-after.png --features=+blwm --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifKhmer-Regular.ttf --unicodes=1789,17bc

montage khmer-blwm-before.png right-arrow.png khmer-blwm-after.png -geometry +0+0 -background transparent khmer-blwm.png


## 5 `abvm`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=khmer-abvm-before.png --features=-abvm --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifKhmer-Regular.ttf --unicodes=178e,17b7

hb-view --font-size=110 --margin=2,16,2,16 --output-file=khmer-abvm-after.png --features=+abvm --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifKhmer-Regular.ttf --unicodes=178e,17b7

montage khmer-abvm-before.png right-arrow.png khmer-abvm-after.png -geometry +0+0 -background transparent khmer-abvm.png


## 5 `mkmk`

No examples found in Noto Khmer.


================================================
FILE: images/khmer/khmer-svg-image-generation-log.md
================================================
# Commands used to generate the images in [opentype-shaping-khmer.md](../../opentype-shaping-khmer.md)

## Arrow general

hb-view --font-size=110 --output-file=right-arrow.svg --background=FFFFFF00 --margin=0,0,0,0 /usr/share/fonts/opentype/gentiumplus/GentiumPlus-R.ttf --unicodes=2192

## Terminology

### Coeng forms

hb-view --font-size=110 --margin=2,16,2,16 --output-file=khmer-coeng-kha-before.svg --features=-blwf,-pstf --preserve-default-ignorables --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifKhmer-Regular.ttf --unicodes=17d2,1783

hb-view --font-size=110 --margin=2,16,2,16 --output-file=khmer-coeng-kha-after.svg --features=+blwf,+pstf --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifKhmer-Regular.ttf --unicodes=17d2,1783

svg_stack.py --direction=h khmer-coeng-kha-before.svg right-arrow.svg khmer-coeng-kha-after.svg > khmer-coeng-kha.svg


## The khmr shaping model

### Robat

hb-view --font-size=110 --margin=2,16,2,16 --output-file=khmer-robat.svg --features=+mark --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifKhmer-Regular.ttf --unicodes=25cc,17cc


## 2.2 Matra decomposition

> Note: Noto Serif Khmer has decompositions for the
> non-canonical-in-Unicode multi-part matras implemented in `psts`
> features, but I have not figured out how to activate them in isolation.

hb-view --font-size=110 --margin=2,16,2,16 --output-file=khmer-matra-decomposition-before.svg --features=-ccmp --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifKhmer-Regular.ttf --unicodes=17c4

hb-view --font-size=110 --margin=2,16,2,16 --output-file=khmer-matra-decomposition-after.svg --features=+ccmp,+psts --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifKhmer-Regular.ttf --unicodes=17c1,25cc,17b6

svg_stack.py --direction=h khmer-matra-decomposition-before.svg right-arrow.svg khmer-matra-decomposition-after.svg > khmer-matra-decomposition.svg


## 2.4 Pre-base-reordering Ro

hb-view --font-size=110 --margin=2,16,2,16 --output-file=khmer-pref-before.svg --features=-pref --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifKhmer-Regular.ttf --unicodes=1786,25cc,17d2,179a

hb-view --font-size=110 --margin=2,16,2,16 --output-file=khmer-pref-after.svg --features=+pref --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifKhmer-Regular.ttf --unicodes=1786,17d2,179a

svg_stack.py --direction=h khmer-pref-before.svg right-arrow.svg khmer-pref-after.svg > khmer-pref.svg


#### Duplicates for other subsections

cp khmer-pref.svg khmer-pref-1.svg

cluster_styles = [


## 3.1 `locl`

No examples found in Noto Khmer.


## 3.2 `ccmp`

No examples found in Noto Khmer.


## 3.3 `pref`

Same as pre-base-reordering Ro.


## 3.4 `blwf`

> Note: altered bottom-margin number to fit.

hb-view --font-size=110 --margin=2,16,32,16 --output-file=khmer-blwf-before.svg --features=-blwf --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifKhmer-Regular.ttf --unicodes=178c,25cc,17d2,17af

hb-view --font-size=110 --margin=2,16,32,16 --output-file=khmer-blwf-after.svg --features=+blwf --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifKhmer-Regular.ttf --unicodes=178c,17d2,17af

svg_stack.py --direction=h khmer-blwf-before.svg right-arrow.svg khmer-blwf-after.svg > khmer-blwf.svg


## 3.5 `abvf`

> Note: Noto Serif Khmer implements this as a `abvs` lookup.

hb-view --font-size=110 --margin=2,16,2,16 --output-file=khmer-abvf-before.svg --features=-abvf,-abvs --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifKhmer-Regular.ttf --unicodes=179c,17ca

hb-view --font-size=110 --margin=2,16,2,16 --output-file=khmer-abvf-after.svg --features=+abvf --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifKhmer-Regular.ttf --unicodes=179c,17ca

svg_stack.py --direction=h khmer-abvf-before.svg right-arrow.svg khmer-abvf-after.svg > khmer-abvf.svg

## 3.6 `pstf`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=khmer-pstf-before.svg --features=-pstf --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifKhmer-Regular.ttf --unicodes=25cc,17d2,179f

hb-view --font-size=110 --margin=2,16,2,16 --output-file=khmer-pstf-after.svg --features=+pstf --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifKhmer-Regular.ttf --unicodes=25cc,17d2,179f

svg_stack.py --direction=h khmer-pstf-before.svg right-arrow.svg khmer-pstf-after.svg > khmer-pstf.svg

## 3.7 `cfar`

No examples found in Noto Khmer.


## 4 `pres`

> Note: Adjusted bottom margin.

hb-view --font-size=110 --margin=2,16,32,16 --output-file=khmer-pres-before.svg --features=-pres --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifKhmer-Regular.ttf --unicodes=1780,17d2,179a,17d2,1781

hb-view --font-size=110 --margin=2,16,32,16 --output-file=khmer-pres-after.svg --features=+pres --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifKhmer-Regular.ttf --unicodes=1780,17d2,179a,17d2,1781

svg_stack.py --direction=h khmer-pres-before.svg right-arrow.svg khmer-pres-after.svg > khmer-pres.svg


## 4 `blws`

hb-view --font-size=110 --margin=2,16,48,16 --output-file=khmer-blws-before.svg --features=-blws --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifKhmer-Regular.ttf --unicodes=1789,17d2,1780

hb-view --font-size=110 --margin=2,16,48,16 --output-file=khmer-blws-after.svg --features=+blws --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifKhmer-Regular.ttf --unicodes=1789,17d2,1780

svg_stack.py --direction=h khmer-blws-before.svg right-arrow.svg khmer-blws-after.svg > khmer-blws.svg


## 4 `abvs`

hb-view --font-size=110 --margin=32,16,2,16 --output-file=khmer-abvs-before.svg --features=-abvs --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifKhmer-Regular.ttf --unicodes=1796,17b7,17cd

hb-view --font-size=110 --margin=32,16,2,16 --output-file=khmer-abvs-after.svg --features=+abvs --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifKhmer-Regular.ttf --unicodes=1796,17b7,17cd

svg_stack.py --direction=h khmer-abvs-before.svg right-arrow.svg khmer-abvs-after.svg > khmer-abvs.svg


## 4 `psts`

> Note: Adjusted bottom margin.

hb-view --font-size=110 --margin=2,16,48,16 --output-file=khmer-psts-before.svg --features=-psts --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifKhmer-Regular.ttf --unicodes=1787,17d2,1785,17d2,1788

hb-view --font-size=110 --margin=2,16,48,16 --output-file=khmer-psts-after.svg --features=+psts --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifKhmer-Regular.ttf --unicodes=1787,17d2,1785,17d2,1788


## 4 `clig`

> Note: Noto Serif Khmer implements this twice, in clig and liga.
>
> Note: It is no longer possible to deactivate clig in HarfBuzz. See
> the issue at https://github.com/harfbuzz/harfbuzz/issues/1310 for
> more information.

hb-view --font-size=110 --margin=2,16,2,16 --output-file=khmer-clig-before.svg --features=-clig,-liga --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifKhmer-Regular.ttf --unicodes=1780,17b6

hb-view --font-size=110 --margin=2,16,2,16 --output-file=khmer-clig-after.svg --features=+clig,+liga --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifKhmer-Regular.ttf --unicodes=1780,17b6

svg_stack.py --direction=h khmer-clig-before.svg right-arrow.svg khmer-clig-after.svg > khmer-clig.svg


## 4 `liga`

> Note: because Noto Serif Khmer duplicates all of its liga
> substitutions in clig, which cannot be disabled in HarfBuzz (see the
> preceding section about clig), it is not possible to disable the
> liga substitutions either.

hb-view --font-size=110 --margin=2,16,8,24 --output-file=khmer-liga-before.svg --features=-clig,-liga --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifKhmer-Regular.ttf --unicodes=179c,17d2,1788,17c5

hb-view --font-size=110 --margin=2,16,8,24 --output-file=khmer-liga-after.svg --features=+clig,+liga --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifKhmer-Regular.ttf --unicodes=179c,17d2,1788,17c5

svg_stack.py --direction=h khmer-liga-before.svg right-arrow.svg khmer-liga-after.svg > khmer-liga.svg


## 5 `dist`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=khmer-dist-before.svg --features=-dist --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifKhmer-Regular.ttf --unicodes=179e,17d2,1798,179a,17bc

hb-view --font-size=110 --margin=2,16,2,16 --output-file=khmer-dist-after.svg --features=+dist --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifKhmer-Regular.ttf --unicodes=179e,17d2,1798,179a,17bc

svg_stack.py --direction=h khmer-dist-before.svg right-arrow.svg khmer-dist-after.svg > khmer-dist.svg


## 5 `kern`

No examples found in Noto Khmer....


## 5 `blwm`

> Note: adjusted bottom margin.

hb-view --font-size=110 --margin=2,16,48,16 --output-file=khmer-blwm-before.svg --features=-blwm --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifKhmer-Regular.ttf --unicodes=1789,17bc

hb-view --font-size=110 --margin=2,16,48,16 --output-file=khmer-blwm-after.svg --features=+blwm --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifKhmer-Regular.ttf --unicodes=1789,17bc

svg_stack.py --direction=h khmer-blwm-before.svg right-arrow.svg khmer-blwm-after.svg > khmer-blwm.svg


## 5 `abvm`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=khmer-abvm-before.svg --features=-abvm --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifKhmer-Regular.ttf --unicodes=178e,17b7

hb-view --font-size=110 --margin=2,16,2,16 --output-file=khmer-abvm-after.svg --features=+abvm --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifKhmer-Regular.ttf --unicodes=178e,17b7

svg_stack.py --direction=h khmer-abvm-before.svg right-arrow.svg khmer-abvm-after.svg > khmer-abvm.svg


## 5 `mkmk`

No examples found in Noto Khmer.


================================================
FILE: images/malayalam/malayalam-png-image-generation-log.md
================================================
# Commands used to generate the images in [opentype-shaping-malayalam.md](../../opentype-shaping-malayalam.md)

## Arrow general

hb-view --font-size=110 --output-file=right-arrow.png --background=FFFFFF00 --margin=0,0,0,0 /usr/share/fonts/opentype/gentiumplus/GentiumPlus-R.ttf --unicodes=2192


## 2.2 Matra decomposition

hb-view --font-size=110 --margin=2,16,2,16 --output-file=malayalam-matra-decompose-before.png --features= --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifMalayalam-Regular.ttf --unicodes=0d4c

hb-view --font-size=110 --margin=2,16,2,16 --output-file=malayalam-matra-decompose-after.png --features= --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifMalayalam-Regular.ttf --unicodes=0d46,25cc,0d57

montage malayalam-matra-decompose-before.png right-arrow.png malayalam-matra-decompose-after.png -geometry +0+0 -background transparent malayalam-matra-decompose.png


## 2.7 post-base consonants

### Ya

hb-view --font-size=110 --margin=2,16,2,16 --output-file=malayalam-pstf-ya-before.png --features=-pstf --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifMalayalam-Regular.ttf --unicodes=0d4d,0d2f

hb-view --font-size=110 --margin=2,16,2,16 --output-file=malayalam-pstf-ya-after.png --features=+pstf --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifMalayalam-Regular.ttf --unicodes=0d4d,0d2f

montage malayalam-pstf-ya-before.png right-arrow.png malayalam-pstf-ya-after.png -geometry +0+0 -background transparent malayalam-pstf-ya.png

### Va

hb-view --font-size=110 --margin=2,16,2,16 --output-file=malayalam-pstf-va-before.png --features=-pstf --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifMalayalam-Regular.ttf --unicodes=0d4d,0d35

hb-view --font-size=110 --margin=2,16,2,16 --output-file=malayalam-pstf-va-after.png --features=+pstf --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifMalayalam-Regular.ttf --unicodes=0d4d,0d35

montage malayalam-pstf-va-before.png right-arrow.png malayalam-pstf-va-after.png -geometry +0+0 -background transparent malayalam-pstf-va.png


## 3.2 `nukt`

> Note: Noto Serif Malayalam uses `U+0323` "Combining dot below" in
> its mark-placement lookups, not a Nukta (which does not exist in the
> Malayalam Unicode block).

hb-view --font-size=110 --margin=2,16,2,16 --output-file=malayalam-nukt-before.png --features=-mark --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifMalayalam-Regular.ttf --unicodes=0d18,25cc,0323

hb-view --font-size=110 --margin=2,16,2,16 --output-file=malayalam-nukt-after.png --features=+mark --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifMalayalam-Regular.ttf --unicodes=0d18,0323

montage malayalam-nukt-before.png right-arrow.png malayalam-nukt-after.png -geometry +0+0 -background transparent malayalam-nukt.png


## 3.3 `akhn`

### KSsa

hb-view --font-size=110 --margin=2,16,2,16 --output-file=malayalam-akhn-kssa-before.png --features=-akhn --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifMalayalam-Regular.ttf --unicodes=0d15,0d4d,0d37

hb-view --font-size=110 --margin=2,16,2,16 --output-file=malayalam-akhn-kssa-after.png --features=+akhn --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifMalayalam-Regular.ttf --unicodes=0d15,0d4d,0d37

montage malayalam-akhn-kssa-before.png right-arrow.png malayalam-akhn-kssa-after.png -geometry +0+0 -background transparent malayalam-akhn-kssa.png

### NnTta

hb-view --font-size=110 --margin=2,16,2,16 --output-file=malayalam-akhn-nntta-before.png --features=-akhn --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifMalayalam-Regular.ttf --unicodes=0d23,0d4d,0d1f

hb-view --font-size=110 --margin=2,16,2,16 --output-file=malayalam-akhn-nntta-after.png --features=+akhn --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifMalayalam-Regular.ttf --unicodes=0d23,0d4d,0d1f

montage malayalam-akhn-nntta-before.png right-arrow.png malayalam-akhn-nntta-after.png -geometry +0+0 -background transparent malayalam-akhn-nntta.png

> Note: The "Chillu R" is shown here because it may be implemented as
> an akhand form and that makes Malayalam distinct from several other
> Indic scripts.

hb-view --font-size=110 --margin=2,16,2,16 --output-file=malayalam-akhn-chillu-r-before.png --features=-akhn --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifMalayalam-Regular.ttf --unicodes=0d30,0d4d

hb-view --font-size=110 --margin=2,16,2,16 --output-file=malayalam-akhn-chillu-r-after.png --features=+akhn --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifMalayalam-Regular.ttf --unicodes=0d7c

montage malayalam-akhn-chillu-r-before.png right-arrow.png malayalam-akhn-chillu-r-after.png -geometry +0+0 -background transparent malayalam-akhn-chillu-r.png


## 3.4 `rphf`

> Note: Malayalam modern orthography does not use Reph. The dot-reph
> substitution here is shown with an accompanying note to that effect,
> and is accompanied by the Chillu-R image.

hb-view --font-size=110 --margin=2,16,2,16 --output-file=malayalam-dot-reph-before.png --features=-akhn --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifMalayalam-Regular.ttf --unicodes=0d30,0d4d

hb-view --font-size=110 --margin=2,16,2,16 --output-file=malayalam-dot-reph-after.png --features=+rphf --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifMalayalam-Regular.ttf --unicodes=0d4e

montage malayalam-dot-reph-before.png right-arrow.png malayalam-dot-reph-after.png -geometry +0+0 -background transparent malayalam-dot-reph.png


## 3.6 `pref`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=malayalam-pstf-ra-before.png --features=-pref --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifMalayalam-Regular.ttf --unicodes=0d4d,0d30

hb-view --font-size=110 --margin=2,16,2,16 --output-file=malayalam-pstf-ra-after.png --features=+pref --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifMalayalam-Regular.ttf --unicodes=0d4d,0d30

montage malayalam-pstf-ra-before.png right-arrow.png malayalam-pstf-ra-after.png -geometry +0+0 -background transparent malayalam-pstf-ra.png


## 3.7 `blwf`

> Note: Noto Serif Malayalam includes a `blwf`-form "La" but does not
> include a feature that accesses it. It is included in several `akhn`
> ligatures, though. Instead, use SMC Rachana font.

hb-view --font-size=110 --margin=2,16,2,16 --output-file=malayalam-blwf-before.png --features=-blwf --background=FFFFFF00 /usr/share/fonts/truetype/malayalam/Rachana-Regular.ttf --unicodes=0d4d,0d32

hb-view --font-size=110 --margin=2,16,2,16 --output-file=malayalam-blwf-after.png --features=+blwf --background=FFFFFF00 /usr/share/fonts/truetype/malayalam/Rachana-Regular.ttf --unicodes=0d4d,0d32

montage malayalam-blwf-before.png right-arrow.png malayalam-blwf-after.png -geometry +0+0 -background transparent malayalam-blwf.png

## 3.9 `half`

> Note: Added a note to the shaping text about using `half` for Chillu
> lookups.

hb-view --font-size=110 --margin=2,16,2,16 --output-file=malayalam-half-before.png --features=+half --background=FFFFFF00 --preserve-default-ignorables /home/nate/SyncThing/fonts-external/temporary-and-testing/Noto_Serif_Malayalam/static/NotoSerifMalayalam-Regular.ttf --unicodes=0d15,0d4d,2005,200d

hb-view --font-size=110 --margin=2,16,2,16 --output-file=malayalam-half-after.png --features=+half --background=FFFFFF00 /home/nate/SyncThing/fonts-external/temporary-and-testing/Noto_Serif_Malayalam/static/NotoSerifMalayalam-Regular.ttf --unicodes=0d15,0d4d,200d

montage malayalam-half-before.png right-arrow.png malayalam-half-after.png -geometry +0+0 -background transparent malayalam-half.png


## 3.10 `pstf`

> Note: Uses the same images as 2.7

## 3.12 `cjct`

> Note: Noto Serif Malayalam implements this as an `akhn` feature.

hb-view --font-size=110 --margin=2,16,2,16 --output-file=malayalam-cjct-before.png --features=-akhn --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifMalayalam-Regular.ttf --unicodes=0d38,0d4d,0d31,0d4d,0d31

hb-view --font-size=110 --margin=2,16,2,16 --output-file=malayalam-cjct-after.png --features=+akhn --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifMalayalam-Regular.ttf --unicodes=0d38,0d4d,0d31,0d4d,0d31

montage malayalam-cjct-before.png right-arrow.png malayalam-cjct-after.png -geometry +0+0 -background transparent malayalam-cjct.png


## 4.2 Pre-base matras

hb-view --font-size=110 --margin=2,16,2,16 --output-file=malayalam-matra-position-before.png --features=+akhn --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifMalayalam-Regular.ttf --unicodes=0d47,0d2c,0d4d,0d1e,0d4d,0d1c,0d3e

hb-view --font-size=110 --margin=2,16,2,16 --output-file=malayalam-matra-position-after.png --features=+akhn --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifMalayalam-Regular.ttf --unicodes=0d2c,0d4d,0d1e,0d4d,0d1c,0d4b

montage malayalam-matra-position-before.png right-arrow.png malayalam-matra-position-after.png -geometry +0+0 -background transparent malayalam-matra-position.png


## 4.3 Reph position

hb-view --font-size=110 --margin=2,16,2,16 --output-file=malayalam-repha-position-before.png --features=+akhn,-abvm,-mark --background=FFFFFF00 /home/nate/SyncThing/fonts-external/temporary-and-testing/Noto_Serif_Malayalam/static/NotoSerifMalayalam-Regular.ttf --unicodes=0d4e,200d,0d23,0d4d,200d,0d21,0d41

hb-view --font-size=110 --margin=2,16,2,16 --output-file=malayalam-repha-position-after.png --features=+akhn,+abvm,+mark --background=FFFFFF00 /home/nate/SyncThing/fonts-external/temporary-and-testing/Noto_Serif_Malayalam/static/NotoSerifMalayalam-Regular.ttf --unicodes=0d4e,0d23,0d4d,200d,0d21,0d41

montage malayalam-repha-position-before.png right-arrow.png malayalam-repha-position-after.png -geometry +0+0 -background transparent malayalam-repha-position.png


## 4.4 Pre-base reordering

hb-view --font-size=110 --margin=2,16,2,16 --output-file=malayalam-pref-position-before.png --features=+akhn --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifMalayalam-Regular.ttf --unicodes=25cc,0d4d,0d30,0d39,0d4d,0d23,0d4d,0d21,0d4c

hb-view --font-size=110 --margin=2,16,2,16 --output-file=malayalam-pref-position-after.png --features=+akhn --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifMalayalam-Regular.ttf --unicodes=0d39,0d4d,0d23,0d4d,0d21,0d4d,0d30,0d4c

montage malayalam-pref-position-before.png right-arrow.png malayalam-pref-position-after.png -geometry +0+0 -background transparent malayalam-pref-position.png


## 5 `blws`

> Note: Noto Serif and Sans Malayalam have blws-like "La" features in
> other lookups, such as `akhn`. I have not been able to isolate one
> of them for usage.


## 5 `psts`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=malayalam-psts-before.png --features=-psts,-akhn --background=FFFFFF00 /home/nate/SyncThing/fonts-external/temporary-and-testing/Noto_Serif_Malayalam/static/NotoSerifMalayalam-Regular.ttf --unicodes=0d35,0d4d,0d35

hb-view --font-size=110 --margin=2,16,2,16 --output-file=malayalam-psts-after.png --features=+psts,+akhn --background=FFFFFF00 /home/nate/SyncThing/fonts-external/temporary-and-testing/Noto_Serif_Malayalam/static/NotoSerifMalayalam-Regular.ttf --unicodes=0d35,0d4d,0d35

montage malayalam-psts-before.png right-arrow.png malayalam-psts-after.png -geometry +0+0 -background transparent malayalam-psts.png

## 5 `haln`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=malayalam-haln-before.png --features=-haln --background=FFFFFF00 --preserve-default-ignorables /home/nate/SyncThing/fonts-external/temporary-and-testing/Noto_Serif_Malayalam/static/NotoSerifMalayalam-Regular.ttf --unicodes=0d33,0d4d,2005,200d

hb-view --font-size=110 --margin=2,16,2,16 --output-file=malayalam-haln-after.png --features=+haln --background=FFFFFF00 /home/nate/SyncThing/fonts-external/temporary-and-testing/Noto_Serif_Malayalam/static/NotoSerifMalayalam-Regular.ttf --unicodes=0d33,0d4d,200d

montage malayalam-haln-before.png right-arrow.png malayalam-haln-after.png -geometry +0+0 -background transparent malayalam-haln.png


## 6 `abvm`

hb-view --font-size=110 --margin=2,32,2,16 --output-file=malayalam-abvm-before.png --features=-abvm --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifMalayalam-Regular.ttf --unicodes=0d0a,0d01

hb-view --font-size=110 --margin=2,16,2,16 --output-file=malayalam-abvm-after.png --features=+abvm --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifMalayalam-Regular.ttf --unicodes=0d0a,0d01

montage malayalam-abvm-before.png right-arrow.png malayalam-abvm-after.png -geometry +0+0 -background transparent malayalam-abvm.png


## 6 `blwm`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=malayalam-blwm-before.png --features=-blwm --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifMalayalam-Regular.ttf --unicodes=0d34,0d62

hb-view --font-size=110 --margin=2,16,2,16 --output-file=malayalam-blwm-after.png --features=+blwm --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifMalayalam-Regular.ttf --unicodes=0d34,0d62

montage malayalam-blwm-before.png right-arrow.png malayalam-blwm-after.png -geometry +0+0 -background transparent malayalam-blwm.png


================================================
FILE: images/malayalam/malayalam-svg-image-generation-log.md
================================================
# Commands used to generate the images in [opentype-shaping-malayalam.md](../../opentype-shaping-malayalam.md)

## Arrow general

hb-view --font-size=110 --output-file=right-arrow.svg --background=FFFFFF00 --margin=0,0,0,0 /usr/share/fonts/opentype/gentiumplus/GentiumPlus-R.ttf --unicodes=2192


## 2.2 Matra decomposition

hb-view --font-size=110 --margin=2,16,2,16 --output-file=malayalam-matra-decompose-before.svg --features= --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifMalayalam-Regular.ttf --unicodes=0d4c

hb-view --font-size=110 --margin=2,16,2,16 --output-file=malayalam-matra-decompose-after.svg --features= --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifMalayalam-Regular.ttf --unicodes=0d46,25cc,0d57

svg_stack.py --direction=h malayalam-matra-decompose-before.svg right-arrow.svg malayalam-matra-decompose-after.svg > malayalam-matra-decompose.svg


## 2.7 post-base consonants

### Ya

hb-view --font-size=110 --margin=2,16,2,16 --output-file=malayalam-pstf-ya-before.svg --features=-pstf --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifMalayalam-Regular.ttf --unicodes=0d4d,0d2f

hb-view --font-size=110 --margin=2,16,2,16 --output-file=malayalam-pstf-ya-after.svg --features=+pstf --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifMalayalam-Regular.ttf --unicodes=0d4d,0d2f

svg_stack.py --direction=h malayalam-pstf-ya-before.svg right-arrow.svg malayalam-pstf-ya-after.svg > malayalam-pstf-ya.svg

### Va

hb-view --font-size=110 --margin=2,16,2,16 --output-file=malayalam-pstf-va-before.svg --features=-pstf --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifMalayalam-Regular.ttf --unicodes=0d4d,0d35

hb-view --font-size=110 --margin=2,16,2,16 --output-file=malayalam-pstf-va-after.svg --features=+pstf --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifMalayalam-Regular.ttf --unicodes=0d4d,0d35

svg_stack.py --direction=h malayalam-pstf-va-before.svg right-arrow.svg malayalam-pstf-va-after.svg > malayalam-pstf-va.svg


#### Duplicates for other subsections

cp malayalam-pstf-ya.svg malayalam-pstf-ya-1.svg

cluster_styles = [


cp malayalam-pstf-va.svg malayalam-pstf-va-1.svg

cluster_styles = [


## 3.2 `nukt`

> Note: Noto Serif Malayalam uses `U+0323` "Combining dot below" in
> its mark-placement lookups, not a Nukta (which does not exist in the
> Malayalam Unicode block).

hb-view --font-size=110 --margin=2,16,2,16 --output-file=malayalam-nukt-before.svg --features=-mark --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifMalayalam-Regular.ttf --unicodes=0d18,25cc,0323

hb-view --font-size=110 --margin=2,16,2,16 --output-file=malayalam-nukt-after.svg --features=+mark --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifMalayalam-Regular.ttf --unicodes=0d18,0323

svg_stack.py --direction=h malayalam-nukt-before.svg right-arrow.svg malayalam-nukt-after.svg > malayalam-nukt.svg


## 3.3 `akhn`

### KSsa

hb-view --font-size=110 --margin=2,16,2,16 --output-file=malayalam-akhn-kssa-before.svg --features=-akhn --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifMalayalam-Regular.ttf --unicodes=0d15,0d4d,0d37

hb-view --font-size=110 --margin=2,16,2,16 --output-file=malayalam-akhn-kssa-after.svg --features=+akhn --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifMalayalam-Regular.ttf --unicodes=0d15,0d4d,0d37

svg_stack.py --direction=h malayalam-akhn-kssa-before.svg right-arrow.svg malayalam-akhn-kssa-after.svg > malayalam-akhn-kssa.svg

### NnTta

hb-view --font-size=110 --margin=2,16,2,16 --output-file=malayalam-akhn-nntta-before.svg --features=-akhn --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifMalayalam-Regular.ttf --unicodes=0d23,0d4d,0d1f

hb-view --font-size=110 --margin=2,16,2,16 --output-file=malayalam-akhn-nntta-after.svg --features=+akhn --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifMalayalam-Regular.ttf --unicodes=0d23,0d4d,0d1f

svg_stack.py --direction=h malayalam-akhn-nntta-before.svg right-arrow.svg malayalam-akhn-nntta-after.svg > malayalam-akhn-nntta.svg

> Note: The "Chillu R" is shown here because it may be implemented as
> an akhand form and that makes Malayalam distinct from several other
> Indic scripts.

hb-view --font-size=110 --margin=2,16,2,16 --output-file=malayalam-akhn-chillu-r-before.svg --features=-akhn --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifMalayalam-Regular.ttf --unicodes=0d30,0d4d

hb-view --font-size=110 --margin=2,16,2,16 --output-file=malayalam-akhn-chillu-r-after.svg --features=+akhn --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifMalayalam-Regular.ttf --unicodes=0d7c

svg_stack.py --direction=h malayalam-akhn-chillu-r-before.svg right-arrow.svg malayalam-akhn-chillu-r-after.svg > malayalam-akhn-chillu-r.svg


## 3.4 `rphf`

> Note: Malayalam modern orthography does not use Reph. The dot-reph
> substitution here is shown with an accompanying note to that effect,
> and is accompanied by the Chillu-R image.

hb-view --font-size=110 --margin=2,16,2,16 --output-file=malayalam-dot-reph-before.svg --features=-akhn --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifMalayalam-Regular.ttf --unicodes=0d30,0d4d

hb-view --font-size=110 --margin=2,16,2,16 --output-file=malayalam-dot-reph-after.svg --features=+rphf --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifMalayalam-Regular.ttf --unicodes=0d4e

svg_stack.py --direction=h malayalam-dot-reph-before.svg right-arrow.svg malayalam-dot-reph-after.svg > malayalam-dot-reph.svg


## 3.6 `pref`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=malayalam-pstf-ra-before.svg --features=-pref --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifﬁMalayalam-Regular.ttf --unicodes=0d4d,0d30

hb-view --font-size=110 --margin=2,16,2,16 --output-file=malayalam-pstf-ra-after.svg --features=+pref --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifMalayalam-Regular.ttf --unicodes=0d4d,0d30

svg_stack.py --direction=h malayalam-pstf-ra-before.svg right-arrow.svg malayalam-pstf-ra-after.svg > malayalam-pstf-ra.svg


## 3.7 `blwf`

> Note: Noto Serif Malayalam includes a `blwf`-form "La" but does not
> include a feature that accesses it. It is included in several `akhn`
> ligatures, though. Instead, use SMC Rachana font.

hb-view --font-size=110 --margin=2,16,2,16 --output-file=malayalam-blwf-before.svg --features=-blwf --background=FFFFFF00 /usr/share/fonts/truetype/malayalam/Rachana-Regular.ttf --unicodes=0d4d,0d32

hb-view --font-size=110 --margin=2,16,2,16 --output-file=malayalam-blwf-after.svg --features=+blwf --background=FFFFFF00 /usr/share/fonts/truetype/malayalam/Rachana-Regular.ttf --unicodes=0d4d,0d32

svg_stack.py --direction=h malayalam-blwf-before.svg right-arrow.svg malayalam-blwf-after.svg > malayalam-blwf.svg


#### Duplicates for other subsections

cp malayalam-blwf.svg malayalam-blwf-1.svg

cluster_styles = [


## 3.9 `half`

> Note: Added a note to the shaping text about using `half` for Chillu
> lookups.

hb-view --font-size=110 --margin=2,16,2,16 --output-file=malayalam-half-before.svg --features=+half --background=FFFFFF00 --preserve-default-ignorables /usr/share/fonts/truetype/noto/NotoSerifMalayalam-Regular.ttf --unicodes=0d15,0d4d,2005,200d

hb-view --font-size=110 --margin=2,16,2,16 --output-file=malayalam-half-after.svg --features=+half --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifMalayalam-Regular.ttf --unicodes=0d15,0d4d,200d

svg_stack.py --direction=h malayalam-half-before.svg right-arrow.svg malayalam-half-after.svg > malayalam-half.svg


## 3.10 `pstf`

> Note: Uses the same images as 2.7

## 3.12 `cjct`

> Note: Noto Serif Malayalam implements this as an `akhn` feature.

hb-view --font-size=110 --margin=2,16,2,16 --output-file=malayalam-cjct-before.svg --features=-akhn --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifMalayalam-Regular.ttf --unicodes=0d38,0d4d,0d31,0d4d,0d31

hb-view --font-size=110 --margin=2,16,2,16 --output-file=malayalam-cjct-after.svg --features=+akhn --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifMalayalam-Regular.ttf --unicodes=0d38,0d4d,0d31,0d4d,0d31

svg_stack.py --direction=h malayalam-cjct-before.svg right-arrow.svg malayalam-cjct-after.svg > malayalam-cjct.svg


## 4.2 Pre-base matras

hb-view --font-size=110 --margin=2,16,2,16 --output-file=malayalam-matra-position-before.svg --features=+akhn --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifMalayalam-Regular.ttf --unicodes=0d47,0d2c,0d4d,0d1e,0d4d,0d1c,0d3e

hb-view --font-size=110 --margin=2,16,2,16 --output-file=malayalam-matra-position-after.svg --features=+akhn --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifMalayalam-Regular.ttf --unicodes=0d2c,0d4d,0d1e,0d4d,0d1c,0d4b

svg_stack.py --direction=h malayalam-matra-position-before.svg right-arrow.svg malayalam-matra-position-after.svg > malayalam-matra-position.svg


## 4.3 Reph position

hb-view --font-size=110 --margin=2,16,2,16 --output-file=malayalam-repha-position-before.svg --features=+akhn,-abvm,-mark --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifMalayalam-Regular.ttf --unicodes=0d4e,200d,0d23,0d4d,200d,0d21,0d41

hb-view --font-size=110 --margin=2,16,2,16 --output-file=malayalam-repha-position-after.svg --features=+akhn,+abvm,+mark --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifMalayalam-Regular.ttf --unicodes=0d4e,0d23,0d4d,200d,0d21,0d41

svg_stack.py --direction=h malayalam-repha-position-before.svg right-arrow.svg malayalam-repha-position-after.svg > malayalam-repha-position.svg


## 4.4 Pre-base reordering

hb-view --font-size=110 --margin=2,16,2,16 --output-file=malayalam-pref-position-before.svg --features=+akhn --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifMalayalam-Regular.ttf --unicodes=25cc,0d4d,0d30,0d39,0d4d,0d23,0d4d,0d21,0d4c

hb-view --font-size=110 --margin=2,16,2,16 --output-file=malayalam-pref-position-after.svg --features=+akhn --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifMalayalam-Regular.ttf --unicodes=0d39,0d4d,0d23,0d4d,0d21,0d4d,0d30,0d4c

svg_stack.py --direction=h malayalam-pref-position-before.svg right-arrow.svg malayalam-pref-position-after.svg > malayalam-pref-position.svg


## 5 `blws`

> Note: Noto Serif and Sans Malayalam have blws-like "La" features in
> other lookups, such as `akhn`. I have not been able to isolate one
> of them for usage.


## 5 `psts`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=malayalam-psts-before.svg --features=-psts,-akhn --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifMalayalam-Regular.ttf --unicodes=0d35,0d4d,0d35

hb-view --font-size=110 --margin=2,16,2,16 --output-file=malayalam-psts-after.svg --features=+psts,+akhn --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifMalayalam-Regular.ttf --unicodes=0d35,0d4d,0d35

svg_stack.py --direction=h malayalam-psts-before.svg right-arrow.svg malayalam-psts-after.svg > malayalam-psts.svg

## 5 `haln`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=malayalam-haln-before.svg --features=-haln --background=FFFFFF00 --preserve-default-ignorables /usr/share/fonts/truetype/noto/NotoSerifMalayalam-Regular.ttf --unicodes=0d33,0d4d,2005,200d

hb-view --font-size=110 --margin=2,16,2,16 --output-file=malayalam-haln-after.svg --features=+haln --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifMalayalam-Regular.ttf --unicodes=0d33,0d4d,200d

svg_stack.py --direction=h malayalam-haln-before.svg right-arrow.svg malayalam-haln-after.svg > malayalam-haln.svg


## 6 `abvm`

hb-view --font-size=110 --margin=32,16,2,16 --output-file=malayalam-abvm-before.svg --features=-abvm --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifMalayalam-Regular.ttf --unicodes=0d0a,0d01

hb-view --font-size=110 --margin=32,16,2,16 --output-file=malayalam-abvm-after.svg --features=+abvm --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifMalayalam-Regular.ttf --unicodes=0d0a,0d01

svg_stack.py --direction=h malayalam-abvm-before.svg right-arrow.svg malayalam-abvm-after.svg > malayalam-abvm.svg


## 6 `blwm`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=malayalam-blwm-before.svg --features=-blwm --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifMalayalam-Regular.ttf --unicodes=0d34,0d62

hb-view --font-size=110 --margin=2,16,2,16 --output-file=malayalam-blwm-after.svg --features=+blwm --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifMalayalam-Regular.ttf --unicodes=0d34,0d62

svg_stack.py --direction=h malayalam-blwm-before.svg right-arrow.svg malayalam-blwm-after.svg > malayalam-blwm.svg


================================================
FILE: images/mongolian/mongolian-png-image-generation-log.md
================================================
# Commands used to generate the images in [opentype-shaping-mongolian.md](../../opentype-shaping-mongolian.md)

## Arrow general

hb-view --font-size=110 --output-file=right-arrow.png --background=FFFFFF00 --margin=0,0,0,0 /usr/share/fonts/opentype/gentiumplus/GentiumPlus-R.ttf --unicodes=2192

## Terminology

### FVS

#### No FVS

hb-view --font-size=110 --margin=2,16,2,16 --output-file=mongolian-fvs-none-before.png --features=-medi --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSansMongolian-Regular.ttf --unicodes=25cc,180a,1873,180d,180a,25cc

hb-view --font-size=110 --margin=2,16,2,16 --output-file=mongolian-fvs-none-after.png --features=+medi --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSansMongolian-Regular.ttf --unicodes=25cc,180a,1873,180a,25cc

ontage mongolian-fvs-none-before.png right-arrow.png mongolian-fvs-none-after.png -geometry +0+0 -background transparent mongolian-fvs-none.png


#### FVS1

hb-view --font-size=110 --margin=2,16,2,16 --output-file=mongolian-fvs-fvs1-before.png --features=-medi --preserve-default-ignorables --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSansMongolian-Regular.ttf --unicodes=25cc,180a,1873,180b,200d,0020,0020,0020,202f,180a,25cc

hb-view --font-size=110 --margin=2,16,2,16 --output-file=mongolian-fvs-fvs1-after.png --features=+medi --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSansMongolian-Regular.ttf --unicodes=25cc,180a,1873,180b,180a,25cc

montage mongolian-fvs-fvs1-before.png right-arrow.png mongolian-fvs-fvs1-after.png -geometry +0+0 -background transparent mongolian-fvs-fvs1.png


#### FVS2

hb-view --font-size=110 --margin=2,16,2,16 --output-file=mongolian-fvs-fvs2-before.png --features=-medi --preserve-default-ignorables --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSansMongolian-Regular.ttf --unicodes=25cc,180a,1873,180c,200d,0020,0020,0020,202f,180a,25cc

hb-view --font-size=110 --margin=2,16,2,16 --output-file=mongolian-fvs-fvs2-after.png --features=+medi --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSansMongolian-Regular.ttf --unicodes=25cc,180a,1873,180c,180a,25cc

montage mongolian-fvs-fvs2-before.png right-arrow.png mongolian-fvs-fvs2-after.png -geometry +0+0 -background transparent mongolian-fvs-fvs2.png


#### FVS3

hb-view --font-size=110 --margin=2,16,2,16 --output-file=mongolian-fvs-fvs3-before.png --features=-medi --preserve-default-ignorables --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSansMongolian-Regular.ttf --unicodes=25cc,180a,1873,180d,200d,0020,0020,0020,202f,180a,25cc

hb-view --font-size=110 --margin=2,16,2,16 --output-file=mongolian-fvs-fvs3-after.png --features=+medi --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSansMongolian-Regular.ttf --unicodes=25cc,180a,1873,180d,180a,25cc

montage mongolian-fvs-fvs3-before.png right-arrow.png mongolian-fvs-fvs3-after.png -geometry +0+0 -background transparent mongolian-fvs-fvs3.png


## 4.2 `isol`

### `isol` general

hb-view --font-size=110 --margin=2,16,2,16 --output-file=mongolian-isol-before.png --features=-isol --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSansMongolian-Regular.ttf --unicodes=1826

hb-view --font-size=110 --margin=2,16,2,16 --output-file=mongolian-isol-after.png --features=+isol --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSansMongolian-Regular.ttf --unicodes=1826

montage mongolian-isol-before.png right-arrow.png mongolian-isol-after.png -geometry +0+0 -background transparent mongolian-isol.png


### `isol` FVS

> Note: uses larger right margin

hb-view --font-size=110 --margin=2,112,2,16 --output-file=mongolian-isol-fvs1-before.png --features=-isol --preserve-default-ignorables --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSansMongolian-Regular.ttf --unicodes=1826,180b

hb-view --font-size=110 --margin=2,16,2,16 --output-file=mongolian-isol-fvs1-after.png --features=+isol --preserve-default-ignorables --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSansMongolian-Regular.ttf --unicodes=1826,180b

montage mongolian-isol-fvs1-before.png right-arrow.png mongolian-isol-fvs1-after.png -geometry +0+0 -background transparent mongolian-isol-fvs1.png 


## 4.3 `fina`

### `fina` general

hb-view --font-size=110 --margin=2,16,2,16 --output-file=mongolian-fina-before.png --features=-fina --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSansMongolian-Regular.ttf --unicodes=25cc,180a,1830

hb-view --font-size=110 --margin=2,16,2,16 --output-file=mongolian-fina-after.png --features=+fina --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSansMongolian-Regular.ttf --unicodes=25cc,180a,1830

montage mongolian-fina-before.png right-arrow.png mongolian-fina-after.png -geometry +0+0 -background transparent mongolian-fina.png


### `fina` FVS

> Note: uses larger right margin

hb-view --font-size=110 --margin=2,112,2,16 --output-file=mongolian-fina-fvs2-before.png --features=-fina --preserve-default-ignorables --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSansMongolian-Regular.ttf --unicodes=25cc,180a,1830,180c

hb-view --font-size=110 --margin=2,16,2,16 --output-file=mongolian-fina-fvs2-after.png --features=+fina --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSansMongolian-Regular.ttf --unicodes=25cc,180a,1830,180c

montage mongolian-fina-fvs2-before.png right-arrow.png mongolian-fina-fvs2-after.png -geometry +0+0 -background transparent mongolian-fina-fvs2.png


## 4.6 `medi`

### `medi` general

hb-view --font-size=110 --margin=2,16,2,16 --output-file=mongolian-medi-before.png --features=-medi --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSansMongolian-Regular.ttf --unicodes=25cc,180a,186f,180a,25cc

hb-view --font-size=110 --margin=2,16,2,16 --output-file=mongolian-medi-after.png --features=+medi --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSansMongolian-Regular.ttf --unicodes=25cc,180a,186f,180a,25cc

montage mongolian-medi-before.png right-arrow.png mongolian-medi-after.png -geometry +0+0 -background transparent mongolian-medi.png


### `medi` FVS

> Note: uses ZWNJ and spaces to approximate correct spacing for FVS1
> (which is zero-width)

hb-view --font-size=110 --margin=2,16,2,16 --output-file=mongolian-medi-fvs1-before.png --features=-medi --preserve-default-ignorables --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSansMongolian-Regular.ttf --unicodes=25cc,180a,186f,180b,200d,0020,0020,0020,202f,180a,25cc

hb-view --font-size=110 --margin=2,16,2,16 --output-file=mongolian-medi-fvs1-after.png --features=+medi --preserve-default-ignorables --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSansMongolian-Regular.ttf --unicodes=25cc,180a,186f,180b,180a,25cc

montage mongolian-medi-fvs1-before.png right-arrow.png mongolian-medi-fvs1-after.png -geometry +0+0 -background transparent mongolian-medi-fvs1.png


## 4.8 `init`

### `init` general

hb-view --font-size=110 --margin=2,16,2,16 --output-file=mongolian-init-before.png --features=-init --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSansMongolian-Regular.ttf --unicodes=1821,180a,25cc

hb-view --font-size=110 --margin=2,16,2,16 --output-file=mongolian-init-after.png --features=+init --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSansMongolian-Regular.ttf --unicodes=1821,180a,25cc

montage mongolian-init-before.png right-arrow.png mongolian-init-after.png -geometry +0+0 -background transparent mongolian-init.png


### `init` FVS

> Note: uses ZWNJ and spaces to approximate correct spacing for FVS1
> (which is zero-width)

hb-view --font-size=110 --margin=2,16,2,16 --output-file=mongolian-init-fvs1-before.png --features=-init --preserve-default-ignorables --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSansMongolian-Regular.ttf --unicodes=1821,180b,200d,0020,0020,0020,202f,180a,25cc

hb-view --font-size=110 --margin=2,16,2,16 --output-file=mongolian-init-fvs1-after.png --features=+init --preserve-default-ignorables --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSansMongolian-Regular.ttf --unicodes=1821,180b,180a,25cc

montage mongolian-init-fvs1-before.png right-arrow.png mongolian-init-fvs1-after.png -geometry +0+0 -background transparent mongolian-init-fvs1.png


## 4.9 `rlig`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=mongolian-rlig-before.png --features=-rlig --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSansMongolian-Regular.ttf --unicodes=182a,1820

hb-view --font-size=110 --margin=2,16,2,16 --output-file=mongolian-rlig-after.png --features=+rlig --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSansMongolian-Regular.ttf --unicodes=182a,1820

montage mongolian-rlig-before.png right-arrow.png mongolian-rlig-after.png -geometry +0+0 -background transparent mongolian-rlig.png


================================================
FILE: images/mongolian/mongolian-svg-image-generation-log.md
================================================
# Commands used to generate the images in [opentype-shaping-mongolian.md](../../opentype-shaping-mongolian.md)

## Arrow general

hb-view --font-size=110 --output-file=right-arrow.svg --background=FFFFFF00 --margin=0,0,0,0 /usr/share/fonts/opentype/gentiumplus/GentiumPlus-R.ttf --unicodes=2192

## Terminology

### FVS

#### No FVS

hb-view --font-size=110 --margin=2,16,2,16 --output-file=mongolian-fvs-none-before.svg --features=-medi --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSansMongolian-Regular.ttf --unicodes=25cc,180a,1873,180d,180a,25cc

hb-view --font-size=110 --margin=2,16,2,16 --output-file=mongolian-fvs-none-after.svg --features=+medi --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSansMongolian-Regular.ttf --unicodes=25cc,180a,1873,180a,25cc

svg_stack --direction=h mongolian-fvs-none-before.svg right-arrow.svg mongolian-fvs-none-after.svg > mongolian-fvs-none.svg


#### FVS1

hb-view --font-size=110 --margin=2,16,2,16 --output-file=mongolian-fvs-fvs1-before.svg --features=-medi --preserve-default-ignorables --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSansMongolian-Regular.ttf --unicodes=25cc,180a,1873,180b,200d,0020,0020,0020,202f,180a,25cc

hb-view --font-size=110 --margin=2,16,2,16 --output-file=mongolian-fvs-fvs1-after.svg --features=+medi --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSansMongolian-Regular.ttf --unicodes=25cc,180a,1873,180b,180a,25cc

svg_stack --direction=h mongolian-fvs-fvs1-before.svg right-arrow.svg mongolian-fvs-fvs1-after.svg > mongolian-fvs-fvs1.svg


#### FVS2

hb-view --font-size=110 --margin=2,16,2,16 --output-file=mongolian-fvs-fvs2-before.svg --features=-medi --preserve-default-ignorables --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSansMongolian-Regular.ttf --unicodes=25cc,180a,1873,180c,200d,0020,0020,0020,202f,180a,25cc

hb-view --font-size=110 --margin=2,16,2,16 --output-file=mongolian-fvs-fvs2-after.svg --features=+medi --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSansMongolian-Regular.ttf --unicodes=25cc,180a,1873,180c,180a,25cc

svg_stack --direction=h mongolian-fvs-fvs2-before.svg right-arrow.svg mongolian-fvs-fvs2-after.svg > mongolian-fvs-fvs2.svg


#### FVS3

hb-view --font-size=110 --margin=2,16,2,16 --output-file=mongolian-fvs-fvs3-before.svg --features=-medi --preserve-default-ignorables --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSansMongolian-Regular.ttf --unicodes=25cc,180a,1873,180d,200d,0020,0020,0020,202f,180a,25cc

hb-view --font-size=110 --margin=2,16,2,16 --output-file=mongolian-fvs-fvs3-after.svg --features=+medi --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSansMongolian-Regular.ttf --unicodes=25cc,180a,1873,180d,180a,25cc

svg_stack --direction=h mongolian-fvs-fvs3-before.svg right-arrow.svg mongolian-fvs-fvs3-after.svg > mongolian-fvs-fvs3.svg


## 4.2 `isol`

### `isol` general

hb-view --font-size=110 --margin=2,16,2,16 --output-file=mongolian-isol-before.svg --features=-isol --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSansMongolian-Regular.ttf --unicodes=1826

hb-view --font-size=110 --margin=2,16,2,16 --output-file=mongolian-isol-after.svg --features=+isol --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSansMongolian-Regular.ttf --unicodes=1826

svg_stack --direction=h mongolian-isol-before.svg right-arrow.svg mongolian-isol-after.svg > mongolian-isol.svg


### `isol` FVS

> Note: uses larger right margin

hb-view --font-size=110 --margin=2,112,2,16 --output-file=mongolian-isol-fvs1-before.svg --features=-isol --preserve-default-ignorables --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSansMongolian-Regular.ttf --unicodes=1826,180b

hb-view --font-size=110 --margin=2,16,2,16 --output-file=mongolian-isol-fvs1-after.svg --features=+isol --preserve-default-ignorables --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSansMongolian-Regular.ttf --unicodes=1826,180b

svg_stack --direction=h mongolian-isol-fvs1-before.svg right-arrow.svg mongolian-isol-fvs1-after.svg > mongolian-isol-fvs1.svg 


## 4.3 `fina`

### `fina` general

hb-view --font-size=110 --margin=2,16,2,16 --output-file=mongolian-fina-before.svg --features=-fina --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSansMongolian-Regular.ttf --unicodes=25cc,180a,1830

hb-view --font-size=110 --margin=2,16,2,16 --output-file=mongolian-fina-after.svg --features=+fina --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSansMongolian-Regular.ttf --unicodes=25cc,180a,1830

svg_stack --direction=h mongolian-fina-before.svg right-arrow.svg mongolian-fina-after.svg > mongolian-fina.svg


### `fina` FVS

> Note: uses larger right margin

hb-view --font-size=110 --margin=2,112,2,16 --output-file=mongolian-fina-fvs2-before.svg --features=-fina --preserve-default-ignorables --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSansMongolian-Regular.ttf --unicodes=25cc,180a,1830,180c

hb-view --font-size=110 --margin=2,16,2,16 --output-file=mongolian-fina-fvs2-after.svg --features=+fina --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSansMongolian-Regular.ttf --unicodes=25cc,180a,1830,180c

svg_stack --direction=h mongolian-fina-fvs2-before.svg right-arrow.svg mongolian-fina-fvs2-after.svg > mongolian-fina-fvs2.svg


## 4.6 `medi`

### `medi` general

hb-view --font-size=110 --margin=2,16,2,16 --output-file=mongolian-medi-before.svg --features=-medi --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSansMongolian-Regular.ttf --unicodes=25cc,180a,186f,180a,25cc

hb-view --font-size=110 --margin=2,16,2,16 --output-file=mongolian-medi-after.svg --features=+medi --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSansMongolian-Regular.ttf --unicodes=25cc,180a,186f,180a,25cc

svg_stack --direction=h mongolian-medi-before.svg right-arrow.svg mongolian-medi-after.svg > mongolian-medi.svg


### `medi` FVS

> Note: uses ZWNJ and spaces to approximate correct spacing for FVS1
> (which is zero-width)

hb-view --font-size=110 --margin=2,16,2,16 --output-file=mongolian-medi-fvs1-before.svg --features=-medi --preserve-default-ignorables --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSansMongolian-Regular.ttf --unicodes=25cc,180a,186f,180b,200d,0020,0020,0020,202f,180a,25cc

hb-view --font-size=110 --margin=2,16,2,16 --output-file=mongolian-medi-fvs1-after.svg --features=+medi --preserve-default-ignorables --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSansMongolian-Regular.ttf --unicodes=25cc,180a,186f,180b,180a,25cc

svg_stack --direction=h mongolian-medi-fvs1-before.svg right-arrow.svg mongolian-medi-fvs1-after.svg > mongolian-medi-fvs1.svg


## 4.8 `init`

### `init` general

hb-view --font-size=110 --margin=2,16,2,16 --output-file=mongolian-init-before.svg --features=-init --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSansMongolian-Regular.ttf --unicodes=1821,180a,25cc

hb-view --font-size=110 --margin=2,16,2,16 --output-file=mongolian-init-after.svg --features=+init --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSansMongolian-Regular.ttf --unicodes=1821,180a,25cc

svg_stack --direction=h mongolian-init-before.svg right-arrow.svg mongolian-init-after.svg > mongolian-init.svg


### `init` FVS

> Note: uses ZWNJ and spaces to approximate correct spacing for FVS1
> (which is zero-width)

hb-view --font-size=110 --margin=2,16,2,16 --output-file=mongolian-init-fvs1-before.svg --features=-init --preserve-default-ignorables --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSansMongolian-Regular.ttf --unicodes=1821,180b,200d,0020,0020,0020,202f,180a,25cc

hb-view --font-size=110 --margin=2,16,2,16 --output-file=mongolian-init-fvs1-after.svg --features=+init --preserve-default-ignorables --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSansMongolian-Regular.ttf --unicodes=1821,180b,180a,25cc

svg_stack --direction=h mongolian-init-fvs1-before.svg right-arrow.svg mongolian-init-fvs1-after.svg > mongolian-init-fvs1.svg


## 4.9 `rlig`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=mongolian-rlig-before.svg --features=-rlig --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSansMongolian-Regular.ttf --unicodes=182a,1820

hb-view --font-size=110 --margin=2,16,2,16 --output-file=mongolian-rlig-after.svg --features=+rlig --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSansMongolian-Regular.ttf --unicodes=182a,1820

svg_stack --direction=h mongolian-rlig-before.svg right-arrow.svg mongolian-rlig-after.svg > mongolian-rlig.svg


================================================
FILE: images/myanmar/myanmar-png-image-generation-log.md
================================================
# Commands used to generate the images in [opentype-shaping-myanmar.md](../../opentype-shaping-myanmar.md)

## Arrow general

hb-view --font-size=110 --output-file=right-arrow.png --background=FFFFFF00 --margin=0,0,0,0 /usr/share/fonts/opentype/gentiumplus/GentiumPlus-R.ttf --unicodes=2192

## Terminology

### Variation Selector

#### No VS

> Note: SIL Padauk 3.x implements the dotted-form feature, but not
> using Variation Selectors, for unknown reasons.

hb-view --font-size=110 --margin=2,16,2,16 --output-file=myanmar-dotted-before.png --features=+psts --background=FFFFFF00 /home/nate/SyncThing/fonts-external/temporary-and-testing/Noto_Serif_Myanmar/NotoSerifMyanmar-Regular.ttf --unicodes=1022

#### VS dotted form

hb-view --font-size=110 --margin=2,16,2,16 --output-file=myanmar-dotted-after.png --features=+psts --preserve-default-ignorables --language=KHT --background=FFFFFF00 /home/nate/SyncThing/fonts-external/temporary-and-testing/Noto_Serif_Myanmar/NotoSerifMyanmar-Regular.ttf --unicodes=1022,fe00


montage myanmar-dotted-before.png right-arrow.png myanmar-dotted-after.png -geometry +0+0 -background transparent myanmar-dotted.png


## 1. Kinzi

> Note: Noto Sans Myanmar does not implement the `rphf` feature for
> unknown reasons.

### Ra

hb-view --font-size=110 --margin=2,16,2,16 --output-file=myanmar-kinzi-ra-before.png --features=-rphf,-abvs --background=FFFFFF00 /home/nate/SyncThing/fonts-external/temporary-and-testing/Noto_Serif_Myanmar/NotoSerifMyanmar-Regular.ttf --unicodes=101b,200c,103a,1039,25cc

hb-view --font-size=110 --margin=2,16,2,16 --output-file=myanmar-kinzi-ra-after.png --features=+rphf --background=FFFFFF00 /home/nate/SyncThing/fonts-external/temporary-and-testing/Noto_Serif_Myanmar/NotoSerifMyanmar-Regular.ttf --unicodes=101b,103a,1039,25cc

montage myanmar-kinzi-ra-before.png right-arrow.png myanmar-kinzi-ra-after.png -geometry +0+0 -background transparent myanmar-kinzi-ra.png


### Nga

hb-view --font-size=110 --margin=2,16,2,16 --output-file=myanmar-kinzi-nga-before.png --features=-rphf,-abvs --background=FFFFFF00 /home/nate/SyncThing/fonts-external/temporary-and-testing/Noto_Serif_Myanmar/NotoSerifMyanmar-Regular.ttf --unicodes=1004,200c,103a,1039,25cc

hb-view --font-size=110 --margin=2,16,2,16 --output-file=myanmar-kinzi-nga-after.png --features=+rphf --background=FFFFFF00 /home/nate/SyncThing/fonts-external/temporary-and-testing/Noto_Serif_Myanmar/NotoSerifMyanmar-Regular.ttf --unicodes=1004,103a,1039,25cc

montage myanmar-kinzi-nga-before.png right-arrow.png myanmar-kinzi-nga-after.png -geometry +0+0 -background transparent myanmar-kinzi-nga.png


### Mon Nga

hb-view --font-size=110 --margin=2,16,2,16 --output-file=myanmar-kinzi-monnga-before.png --features=-rphf,-abvs --background=FFFFFF00 /home/nate/SyncThing/fonts-external/temporary-and-testing/Noto_Serif_Myanmar/NotoSerifMyanmar-Regular.ttf --unicodes=105a,200c,103a,1039,25cc

hb-view --font-size=110 --margin=2,16,2,16 --output-file=myanmar-kinzi-monnga-after.png --features=+rphf --background=FFFFFF00 /home/nate/SyncThing/fonts-external/temporary-and-testing/Noto_Serif_Myanmar/NotoSerifMyanmar-Regular.ttf --unicodes=105a,103a,1039,25cc

montage myanmar-kinzi-monnga-before.png right-arrow.png myanmar-kinzi-monnga-after.png -geometry +0+0 -background transparent myanmar-kinzi-monnga.png


## 2.4 Medial Ra

hb-view --font-size=110 --margin=2,16,2,16 --output-file=myanmar-medial-ra-before.png --features=+psts --background=FFFFFF00 /home/nate/SyncThing/fonts-external/temporary-and-testing/Noto_Serif_Myanmar/NotoSerifMyanmar-Regular.ttf --unicodes=1017,200D,103C

hb-view --font-size=110 --margin=2,16,2,16 --output-file=myanmar-medial-ra-after.png --features=+psts --background=FFFFFF00 /home/nate/SyncThing/fonts-external/temporary-and-testing/Noto_Serif_Myanmar/NotoSerifMyanmar-Regular.ttf --unicodes=1017,103C

montage myanmar-medial-ra-before.png right-arrow.png myanmar-medial-ra-after.png -geometry +0+0 -background transparent myanmar-medial-ra.png


## 3.1 locl

hb-view --font-size=110 --margin=2,16,2,16 --output-file=myanmar-locl-before.png --features=+psts --background=FFFFFF00 /home/nate/SyncThing/fonts-external/temporary-and-testing/Noto_Serif_Myanmar/NotoSerifMyanmar-Regular.ttf --unicodes=100f,103d,103e

hb-view --font-size=110 --margin=2,16,2,16 --output-file=myanmar-locl-after.png --features=+psts --language=KSW --background=FFFFFF00 /home/nate/SyncThing/fonts-external/temporary-and-testing/Noto_Serif_Myanmar/NotoSerifMyanmar-Regular.ttf --unicodes=100f,103d,103e

montage myanmar-locl-before.png right-arrow.png myanmar-locl-after.png -geometry +0+0 -background transparent myanmar-locl.png


## 3.3 rphf

> Same as Kinzi


## 3.4 pref

> Note: Noto Sans Myanmar does not implement any pref features for
> unknown reasons. This example shows a basic-shaping feature to
> distinguish pref from the more stylistic applications of pres.

hb-view --font-size=110 --margin=2,16,2,16 --output-file=myanmar-pref-before.png --features=-pref  --background=FFFFFF00 /home/nate/SyncThing/fonts-external/temporary-and-testing/Noto_Serif_Myanmar/NotoSerifMyanmar-Regular.ttf --unicodes=103c,103f

hb-view --font-size=110 --margin=2,16,2,16 --output-file=myanmar-pref-after.png --features=+pref  --background=FFFFFF00 /home/nate/SyncThing/fonts-external/temporary-and-testing/Noto_Serif_Myanmar/NotoSerifMyanmar-Regular.ttf --unicodes=103f,103c

montage myanmar-pref-before.png right-arrow.png myanmar-pref-after.png -geometry +0+0 -background transparent myanmar-pref.png


## 3.5 blwf

hb-view --font-size=110 --margin=2,16,2,16 --output-file=myanmar-blwf-before.png --features=-blwf  --background=FFFFFF00 /home/nate/SyncThing/fonts-external/temporary-and-testing/Noto_Serif_Myanmar/NotoSerifMyanmar-Regular.ttf --unicodes=100f,1039,100a

hb-view --font-size=110 --margin=2,16,2,16 --output-file=myanmar-blwf-after.png --features=+blwf  --background=FFFFFF00 /home/nate/SyncThing/fonts-external/temporary-and-testing/Noto_Serif_Myanmar/NotoSerifMyanmar-Regular.ttf --unicodes=100f,1039,100a

montage myanmar-blwf-before.png right-arrow.png myanmar-blwf-after.png -geometry +0+0 -background transparent myanmar-blwf.png


## 3.6 pstf

> Note: Noto Sans Myanmar does not include a pstf feature for unknown
> reasons. This example shows an orthographically-selected variant, as
> referred to on
> https://r12a.github.io/scripts/myanmar/block#charTALL%20AA to
> distinguish pstf as an initial-shaping feature from the more
> stylistic applications of psts.
>
> Note: The example linked to above is used in the Microsoft
> script-development spec for Myanmar:
> https://docs.microsoft.com/en-us/typography/script-development/myanmar#feature-tag-pstf 
> but this usage is not well-attested in real-world Myanmar
> fonts. Instead, the "Aa"/"Tall Aa" distinction is made at the
> encoding level and is expected to happen during text
> entry. Consequently, this image has been removed from the
> script-specific shaping document. See issue #85 for the discussion.

hb-view --font-size=110 --margin=2,16,2,16 --output-file=myanmar-pstf-before.png --features=-pstf  --background=FFFFFF00 /home/nate/SyncThing/fonts-external/temporary-and-testing/Noto_Serif_Myanmar/NotoSerifMyanmar-Regular.ttf --unicodes=101d,102c

hb-view --font-size=110 --margin=2,16,2,16 --output-file=myanmar-pstf-after.png --features=+pstf  --background=FFFFFF00 /home/nate/SyncThing/fonts-external/temporary-and-testing/Noto_Serif_Myanmar/NotoSerifMyanmar-Regular.ttf --unicodes=101d,102b

montage myanmar-pstf-before.png right-arrow.png myanmar-pstf-after.png -geometry +0+0 -background transparent myanmar-pstf.png


## 4 pres 

> Note: Noto Sans Myanmar does not implement this as a pres feature.

hb-view --font-size=110 --margin=2,16,2,16 --output-file=myanmar-pres-before.png --features=-pres,-blws --background=FFFFFF00 /home/nate/SyncThing/fonts-external/temporary-and-testing/Noto_Serif_Myanmar/NotoSerifMyanmar-Regular.ttf --unicodes=100c,103c

hb-view --font-size=110 --margin=2,16,2,16 --output-file=myanmar-pres-after.png --features=+pres,+blws --background=FFFFFF00 /home/nate/SyncThing/fonts-external/temporary-and-testing/Noto_Serif_Myanmar/NotoSerifMyanmar-Regular.ttf --unicodes=100c,103c

montage myanmar-pres-before.png right-arrow.png myanmar-pres-after.png -geometry +0+0 -background transparent myanmar-pres.png


## 4 abvs

hb-view --font-size=110 --margin=2,16,2,16 --output-file=myanmar-abvs-before.png --features=-abvs --background=FFFFFF00 /home/nate/SyncThing/fonts-external/temporary-and-testing/Noto_Serif_Myanmar/NotoSerifMyanmar-Regular.ttf --unicodes=100b,102d,1032

hb-view --font-size=110 --margin=2,16,2,16 --output-file=myanmar-abvs-after.png --features=+abvs --background=FFFFFF00 /home/nate/SyncThing/fonts-external/temporary-and-testing/Noto_Serif_Myanmar/NotoSerifMyanmar-Regular.ttf --unicodes=100b,102d,1032

montage myanmar-abvs-before.png right-arrow.png myanmar-abvs-after.png -geometry +0+0 -background transparent myanmar-abvs.png


## 4 blws

hb-view --font-size=110 --margin=2,16,2,16 --output-file=myanmar-blws-before.png --features=-blws  --background=FFFFFF00 /home/nate/SyncThing/fonts-external/temporary-and-testing/Noto_Serif_Myanmar/NotoSerifMyanmar-Regular.ttf --unicodes=aa6b,103c,103e

hb-view --font-size=110 --margin=2,16,2,16 --output-file=myanmar-blws-after.png --features=+blws  --background=FFFFFF00 /home/nate/SyncThing/fonts-external/temporary-and-testing/Noto_Serif_Myanmar/NotoSerifMyanmar-Regular.ttf --unicodes=aa6b,103c,103e

montage myanmar-blws-before.png right-arrow.png myanmar-blws-after.png -geometry +0+0 -background transparent myanmar-blws.png


## 4 psts

hb-view --font-size=110 --margin=2,16,2,16 --output-file=myanmar-psts-before.png --features=-blws  --background=FFFFFF00 /home/nate/SyncThing/fonts-external/temporary-and-testing/Noto_Serif_Myanmar/NotoSerifMyanmar-Regular.ttf --unicodes=100b,103b

hb-view --font-size=110 --margin=2,16,2,16 --output-file=myanmar-psts-after.png --features=+blws  --background=FFFFFF00 /home/nate/SyncThing/fonts-external/temporary-and-testing/Noto_Serif_Myanmar/NotoSerifMyanmar-Regular.ttf --unicodes=100b,103b

montage myanmar-psts-before.png right-arrow.png myanmar-psts-after.png -geometry +0+0 -background transparent myanmar-psts.png


## 4 liga

hb-view --font-size=110 --margin=2,16,2,16 --output-file=myanmar-liga-before.png --features=-liga,-blws,-blwf  --background=FFFFFF00 /home/nate/SyncThing/fonts-external/temporary-and-testing/Noto_Serif_Myanmar/NotoSerifMyanmar-Regular.ttf --unicodes=1016,103c,103d,103e

hb-view --font-size=110 --margin=2,16,2,16 --output-file=myanmar-liga-after.png --features=+liga  --background=FFFFFF00 /home/nate/SyncThing/fonts-external/temporary-and-testing/Noto_Serif_Myanmar/NotoSerifMyanmar-Regular.ttf --unicodes=1016,103c,103d,103e

montage myanmar-liga-before.png right-arrow.png myanmar-liga-after.png -geometry +0+0 -background transparent myanmar-liga.png


## 5 dist

> Note: Noto Sans Myanmar implements all distance adjustments in
> `kern`.

hb-view --font-size=110 --margin=2,16,2,16 --output-file=myanmar-dist-before.png --features=-kern  --background=FFFFFF00 /home/nate/SyncThing/fonts-external/temporary-and-testing/Noto_Serif_Myanmar/NotoSerifMyanmar-Regular.ttf --unicodes=101b,102b,103a,100f,103c

hb-view --font-size=110 --margin=2,16,2,16 --output-file=myanmar-dist-after.png --features=+kern  --background=FFFFFF00 /home/nate/SyncThing/fonts-external/temporary-and-testing/Noto_Serif_Myanmar/NotoSerifMyanmar-Regular.ttf --unicodes=101b,102b,103a,100f,103c

montage myanmar-dist-before.png right-arrow.png myanmar-dist-after.png -geometry +0+0 -background transparent myanmar-dist.png


## 5 abvm

> Note: Noto Sans Myanmar implements this as `mark`.

hb-view --font-size=110 --margin=2,16,2,16 --output-file=myanmar-abvm-before.png --features=-mark  --background=FFFFFF00 /home/nate/SyncThing/fonts-external/temporary-and-testing/Noto_Serif_Myanmar/NotoSerifMyanmar-Regular.ttf --unicodes=1004,103a,1039,1008

hb-view --font-size=110 --margin=2,16,2,16 --output-file=myanmar-abvm-after.png --features=+mark  --background=FFFFFF00 /home/nate/SyncThing/fonts-external/temporary-and-testing/Noto_Serif_Myanmar/NotoSerifMyanmar-Regular.ttf --unicodes=1004,103a,1039,1008

montage myanmar-abvm-before.png right-arrow.png myanmar-abvm-after.png -geometry +0+0 -background transparent myanmar-abvm.png


## 5 blwm

> Note: Noto Sans Myanmar implements this as `mark`.

hb-view --font-size=110 --margin=2,16,2,16 --output-file=myanmar-blwm-before.png --features=-mark  --background=FFFFFF00 /home/nate/SyncThing/fonts-external/temporary-and-testing/Noto_Serif_Myanmar/NotoSerifMyanmar-Regular.ttf --unicodes=1009,1039,101b

hb-view --font-size=110 --margin=2,16,2,16 --output-file=myanmar-blwm-after.png --features=+mark  --background=FFFFFF00 /home/nate/SyncThing/fonts-external/temporary-and-testing/Noto_Serif_Myanmar/NotoSerifMyanmar-Regular.ttf --unicodes=1009,1039,101b

montage myanmar-blwm-before.png right-arrow.png myanmar-blwm-after.png -geometry +0+0 -background transparent myanmar-blwm.png


## 5 mark

hb-view --font-size=110 --margin=2,16,2,16 --output-file=myanmar-mark-before.png --features=-mark  --background=FFFFFF00 /home/nate/SyncThing/fonts-external/temporary-and-testing/Noto_Serif_Myanmar/NotoSerifMyanmar-Regular.ttf --unicodes=107e,108d

hb-view --font-size=110 --margin=2,16,2,16 --output-file=myanmar-mark-after.png --features=+mark  --background=FFFFFF00 /home/nate/SyncThing/fonts-external/temporary-and-testing/Noto_Serif_Myanmar/NotoSerifMyanmar-Regular.ttf --unicodes=107e,108d

montage myanmar-mark-before.png right-arrow.png myanmar-mark-after.png -geometry +0+0 -background transparent myanmar-mark.png


## 5 mkmk

hb-view --font-size=110 --margin=2,16,2,16 --output-file=myanmar-mkmk-before.png --features=-mkmk  --background=FFFFFF00 /home/nate/SyncThing/fonts-external/temporary-and-testing/Noto_Serif_Myanmar/NotoSerifMyanmar-Regular.ttf --unicodes=1000,1039,105d,105e

hb-view --font-size=110 --margin=2,16,2,16 --output-file=myanmar-mkmk-after.png --features=+mkmk  --background=FFFFFF00 /home/nate/SyncThing/fonts-external/temporary-and-testing/Noto_Serif_Myanmar/NotoSerifMyanmar-Regular.ttf --unicodes=1000,1039,105d,105e

montage myanmar-mkmk-before.png right-arrow.png myanmar-mkmk-after.png -geometry +0+0 -background transparent myanmar-mkmk.png


================================================
FILE: images/myanmar/myanmar-svg-image-generation-log.md
================================================
# Commands used to generate the images in [opentype-shaping-myanmar.md](../../opentype-shaping-myanmar.md)

## Arrow general

hb-view --font-size=110 --output-file=right-arrow.svg --background=FFFFFF00 --margin=0,0,0,0 /usr/share/fonts/opentype/gentiumplus/GentiumPlus-R.ttf --unicodes=2192

## Terminology

### Variation Selector

#### No VS

> Note: SIL Padauk 3.x implements the dotted-form feature, but not
> using Variation Selectors, for unknown reasons.
ﬁ
hb-view --font-size=110 --margin=2,16,2,16 --output-file=myanmar-dotted-before.svg --features=+psts --background=FFFFFF00 /home/nate/SyncThing/fonts-external/temporary-and-testing/padauk-3.003/PadaukBook-Regular.ttf --unicodes=1022

#### VS dotted form

hb-view --font-size=110 --margin=2,16,2,16 --output-file=myanmar-dotted-after.svg --features=+psts --preserve-default-ignorables --language=KHT --background=FFFFFF00 /home/nate/SyncThing/fonts-external/temporary-and-testing/padauk-3.003/PadaukBook-Regular.ttf --unicodes=1022,fe00


svg_stack --direction=h myanmar-dotted-before.svg right-arrow.svg myanmar-dotted-after.svg > myanmar-dotted.svg


## 1. Kinzi

> Note: Noto Sans Myanmar does not implement the `rphf` feature for
> unknown reasons.

### Ra

hb-view --font-size=110 --margin=2,16,2,16 --output-file=myanmar-kinzi-ra-before.svg --features=-rphf,-abvs --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSansMyanmar-Regular.ttf --unicodes=101b,200c,103a,1039,25cc

hb-view --font-size=110 --margin=2,16,2,16 --output-file=myanmar-kinzi-ra-after.svg --features=+rphf --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSansMyanmar-Regular.ttf --unicodes=101b,103a,1039,25cc

svg_stack --direction=h myanmar-kinzi-ra-before.svg right-arrow.svg myanmar-kinzi-ra-after.svg > myanmar-kinzi-ra.svg


### Nga

hb-view --font-size=110 --margin=2,16,2,16 --output-file=myanmar-kinzi-nga-before.svg --features=-rphf,-abvs --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSansMyanmar-Regular.ttf --unicodes=1004,200c,103a,1039,25cc

hb-view --font-size=110 --margin=2,16,2,16 --output-file=myanmar-kinzi-nga-after.svg --features=+rphf --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSansMyanmar-Regular.ttf --unicodes=1004,103a,1039,25cc

svg_stack --direction=h myanmar-kinzi-nga-before.svg right-arrow.svg myanmar-kinzi-nga-after.svg > myanmar-kinzi-nga.svg


### Mon Nga

hb-view --font-size=110 --margin=2,16,2,16 --output-file=myanmar-kinzi-monnga-before.svg --features=-rphf,-abvs --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSansMyanmar-Regular.ttf --unicodes=105a,200c,103a,1039,25cc

hb-view --font-size=110 --margin=2,16,2,16 --output-file=myanmar-kinzi-monnga-after.svg --features=+rphf --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSansMyanmar-Regular.ttf --unicodes=105a,103a,1039,25cc

svg_stack --direction=h myanmar-kinzi-monnga-before.svg right-arrow.svg myanmar-kinzi-monnga-after.svg > myanmar-kinzi-monnga.svg


## 2.4 Medial Ra

hb-view --font-size=110 --margin=2,16,2,16 --output-file=myanmar-medial-ra-before.svg --features=+psts --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSansMyanmar-Regular.ttf --unicodes=1017,200D,103C

hb-view --font-size=110 --margin=2,16,2,16 --output-file=myanmar-medial-ra-after.svg --features=+psts --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSansMyanmar-Regular.ttf --unicodes=1017,103C

svg_stack --direction=h myanmar-medial-ra-before.svg right-arrow.svg myanmar-medial-ra-after.svg > myanmar-medial-ra.svg


## 3.1 locl

hb-view --font-size=110 --margin=2,16,2,16 --output-file=myanmar-locl-before.svg --features=+psts --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSansMyanmar-Regular.ttf --unicodes=100f,103d,103e

hb-view --font-size=110 --margin=2,16,2,16 --output-file=myanmar-locl-after.svg --features=+psts --language=KSW --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSansMyanmar-Regular.ttf --unicodes=100f,103d,103e

svg_stack --direction=h myanmar-locl-before.svg right-arrow.svg myanmar-locl-after.svg > myanmar-locl.svg


## 3.3 rphf

> Same as Kinzi


## 3.4 pref

> Note: Noto Sans Myanmar does not implement any pref features for
> unknown reasons. This example shows a basic-shaping feature to
> distinguish pref from the more stylistic applications of pres.

hb-view --font-size=110 --margin=2,16,2,16 --output-file=myanmar-pref-before.svg --features=-pref  --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSansMyanmar-Regular.ttf --unicodes=103c,103f

hb-view --font-size=110 --margin=2,16,2,16 --output-file=myanmar-pref-after.svg --features=+pref  --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSansMyanmar-Regular.ttf --unicodes=103f,103c

svg_stack --direction=h myanmar-pref-before.svg right-arrow.svg myanmar-pref-after.svg > myanmar-pref.svg


## 3.5 blwf

hb-view --font-size=110 --margin=2,16,2,16 --output-file=myanmar-blwf-before.svg --features=-blwf  --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSansMyanmar-Regular.ttf --unicodes=100f,1039,100a

hb-view --font-size=110 --margin=2,16,2,16 --output-file=myanmar-blwf-after.svg --features=+blwf  --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSansMyanmar-Regular.ttf --unicodes=100f,1039,100a

svg_stack --direction=h myanmar-blwf-before.svg right-arrow.svg myanmar-blwf-after.svg > myanmar-blwf.svg


## 3.6 pstf

> Note: Noto Sans Myanmar does not include a pstf feature for unknown
> reasons. This example shows an orthographically-selected variant, as
> referred to on
> https://r12a.github.io/scripts/myanmar/block#charTALL%20AA to
> distinguish pstf as an initial-shaping feature from the more
> stylistic applications of psts.
>
> Note: The example linked to above is used in the Microsoft
> script-development spec for Myanmar:
> https://docs.microsoft.com/en-us/typography/script-development/myanmar#feature-tag-pstf 
> but this usage is not well-attested in real-world Myanmar
> fonts. Instead, the "Aa"/"Tall Aa" distinction is made at the
> encoding level and is expected to happen during text
> entry. Consequently, this image has been removed from the
> script-specific shaping document. See issue #85 for the discussion.

hb-view --font-size=110 --margin=2,16,2,16 --output-file=myanmar-pstf-before.svg --features=-pstf  --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSansMyanmar-Regular.ttf --unicodes=101d,102c

hb-view --font-size=110 --margin=2,16,2,16 --output-file=myanmar-pstf-after.svg --features=+pstf  --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSansMyanmar-Regular.ttf --unicodes=101d,102b

svg_stack --direction=h myanmar-pstf-before.svg right-arrow.svg myanmar-pstf-after.svg > myanmar-pstf.svg


## 4 pres 

> Note: Noto Sans Myanmar does not implement this as a pres feature.

hb-view --font-size=110 --margin=2,16,2,16 --output-file=myanmar-pres-before.svg --features=-pres,-blws --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSansMyanmar-Regular.ttf --unicodes=100c,103c

hb-view --font-size=110 --margin=2,16,2,16 --output-file=myanmar-pres-after.svg --features=+pres,+blws --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSansMyanmar-Regular.ttf --unicodes=100c,103c

svg_stack --direction=h myanmar-pres-before.svg right-arrow.svg myanmar-pres-after.svg > myanmar-pres.svg


## 4 abvs

hb-view --font-size=110 --margin=2,16,2,16 --output-file=myanmar-abvs-before.svg --features=-abvs --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSansMyanmar-Regular.ttf --unicodes=100b,102d,1032

hb-view --font-size=110 --margin=2,16,2,16 --output-file=myanmar-abvs-after.svg --features=+abvs --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSansMyanmar-Regular.ttf --unicodes=100b,102d,1032

svg_stack --direction=h myanmar-abvs-before.svg right-arrow.svg myanmar-abvs-after.svg > myanmar-abvs.svg


## 4 blws

hb-view --font-size=110 --margin=2,16,2,16 --output-file=myanmar-blws-before.svg --features=-blws  --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSansMyanmar-Regular.ttf --unicodes=aa6b,103c,103e

hb-view --font-size=110 --margin=2,16,2,16 --output-file=myanmar-blws-after.svg --features=+blws  --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSansMyanmar-Regular.ttf --unicodes=aa6b,103c,103e

svg_stack --direction=h myanmar-blws-before.svg right-arrow.svg myanmar-blws-after.svg > myanmar-blws.svg


## 4 psts

hb-view --font-size=110 --margin=2,16,2,16 --output-file=myanmar-psts-before.svg --features=-blws  --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSansMyanmar-Regular.ttf --unicodes=100b,103b

hb-view --font-size=110 --margin=2,16,2,16 --output-file=myanmar-psts-after.svg --features=+blws  --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSansMyanmar-Regular.ttf --unicodes=100b,103b

svg_stack --direction=h myanmar-psts-before.svg right-arrow.svg myanmar-psts-after.svg > myanmar-psts.svg


## 4 liga

hb-view --font-size=110 --margin=2,16,2,16 --output-file=myanmar-liga-before.svg --features=-liga,-blws,-blwf  --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSansMyanmar-Regular.ttf --unicodes=1016,103c,103d,103e

hb-view --font-size=110 --margin=2,16,2,16 --output-file=myanmar-liga-after.svg --features=+liga  --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSansMyanmar-Regular.ttf --unicodes=1016,103c,103d,103e

svg_stack --direction=h myanmar-liga-before.svg right-arrow.svg myanmar-liga-after.svg > myanmar-liga.svg


## 5 dist

> Note: Noto Sans Myanmar implements all distance adjustments in
> `kern`.

hb-view --font-size=110 --margin=2,16,2,16 --output-file=myanmar-dist-before.svg --features=-kern  --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSansMyanmar-Regular.ttf --unicodes=101b,102b,103a,100f,103c

hb-view --font-size=110 --margin=2,16,2,16 --output-file=myanmar-dist-after.svg --features=+kern  --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSansMyanmar-Regular.ttf --unicodes=101b,102b,103a,100f,103c

svg_stack --direction=h myanmar-dist-before.svg right-arrow.svg myanmar-dist-after.svg > myanmar-dist.svg


## 5 abvm

> Note: Noto Sans Myanmar implements this as `mark`.

hb-view --font-size=110 --margin=2,16,2,16 --output-file=myanmar-abvm-before.svg --features=-mark  --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSansMyanmar-Regular.ttf --unicodes=1004,103a,1039,1008

hb-view --font-size=110 --margin=2,16,2,16 --output-file=myanmar-abvm-after.svg --features=+mark  --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSansMyanmar-Regular.ttf --unicodes=1004,103a,1039,1008

svg_stack --direction=h myanmar-abvm-before.svg right-arrow.svg myanmar-abvm-after.svg > myanmar-abvm.svg


## 5 blwm

> Note: Noto Sans Myanmar implements this as `mark`.

hb-view --font-size=110 --margin=2,16,2,16 --output-file=myanmar-blwm-before.svg --features=-mark  --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSansMyanmar-Regular.ttf --unicodes=1009,1039,101b

hb-view --font-size=110 --margin=2,16,2,16 --output-file=myanmar-blwm-after.svg --features=+mark  --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSansMyanmar-Regular.ttf --unicodes=1009,1039,101b

svg_stack --direction=h myanmar-blwm-before.svg right-arrow.svg myanmar-blwm-after.svg > myanmar-blwm.svg


## 5 mark

hb-view --font-size=110 --margin=2,16,2,16 --output-file=myanmar-mark-before.svg --features=-mark  --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSansMyanmar-Regular.ttf --unicodes=107e,108d

hb-view --font-size=110 --margin=2,16,2,16 --output-file=myanmar-mark-after.svg --features=+mark  --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSansMyanmar-Regular.ttf --unicodes=107e,108d

svg_stack --direction=h myanmar-mark-before.svg right-arrow.svg myanmar-mark-after.svg > myanmar-mark.svg


## 5 mkmk

hb-view --font-size=110 --margin=2,16,2,16 --output-file=myanmar-mkmk-before.svg --features=-mkmk  --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSansMyanmar-Regular.ttf --unicodes=1000,1039,105d,105e

hb-view --font-size=110 --margin=2,16,2,16 --output-file=myanmar-mkmk-after.svg --features=+mkmk  --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSansMyanmar-Regular.ttf --unicodes=1000,1039,105d,105e

svg_stack --direction=h myanmar-mkmk-before.svg right-arrow.svg myanmar-mkmk-after.svg > myanmar-mkmk.svg


================================================
FILE: images/nko/nko-png-image-generation-log.md
================================================
# Commands used to generate the images in [opentype-shaping-nko.md](../../opentype-shaping-nko.md)

## Arrow general

hb-view --font-size=110 --output-file=right-arrow.png --background=FFFFFF00 --margin=0,0,0,0 /usr/share/fonts/opentype/gentiumplus/GentiumPlus-R.ttf --unicodes=2192

## 4.3 `fina`

> Note: Noto Sans NKo does not have a dotted-circle glyph. These
> images use `U+07fa`, the lajanyalan (N'Ko kashida) in its place.

hb-view --font-size=110 --margin=2,16,2,16 --output-file=nko-fina-before.png --features=-fina --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSansNKo-Regular.ttf --unicodes=07fa,07e5

hb-view --font-size=110 --margin=2,16,2,16 --output-file=nko-fina-after.png --features=+fina --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSansNKo-Regular.ttf --unicodes=07fa,07e5

montage nko-fina-before.png right-arrow.png nko-fina-after.png -geometry +0+0 -background transparent nko-fina.png


## 4.6 `medi`

> Note: Noto Sans NKo does not have a dotted-circle glyph. These
> images use `U+07fa`, the lajanyalan (N'Ko kashida) in its place.

hb-view --font-size=110 --margin=2,16,2,16 --output-file=nko-medi-before.png --features=-medi --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSansNKo-Regular.ttf --unicodes=07fa,07e8,07fa

b-view --font-size=110 --margin=2,16,2,16 --output-file=nko-medi-after.png --features=+medi --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSansNKo-Regular.ttf --unicodes=07fa,07e8,07fa

montage nko-medi-before.png right-arrow.png nko-medi-after.png -geometry +0+0 -background transparent nko-medi.png


## 4.8 `init`

> Note: Noto Sans NKo does not have a dotted-circle glyph. These
> images use `U+07fa`, the lajanyalan (N'Ko kashida) in its place.

hb-view --font-size=110 --margin=2,16,2,16 --output-file=nko-init-before.png --features=-init --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSansNKo-Regular.ttf --unicodes=07da,07fa

hb-view --font-size=110 --margin=2,16,2,16 --output-file=nko-init-after.png --features=+init --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSansNKo-Regular.ttf --unicodes=07da,07fa

montage nko-init-before.png right-arrow.png nko-init-after.png -geometry +0+0 -background transparent nko-init.png


## 7.3 `mark`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=nko-mark-before.png --features=-mark --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSansNKo-Regular.ttf --unicodes=07fa,07d5,07f1

hb-view --font-size=110 --margin=2,16,2,16 --output-file=nko-mark-after.png --features=+mark --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSansNKo-Regular.ttf --unicodes=07fa,07d5,07f1

montage nko-mark-before.png right-arrow.png nko-mark-after.png -geometry +0+0 -background transparent nko-mark.png


================================================
FILE: images/nko/nko-svg-image-generation-log.md
================================================
# Commands used to generate the images in [opentype-shaping-nko.md](../../opentype-shaping-nko.md)

## Arrow general

hb-view --font-size=110 --output-file=right-arrow.svg --background=FFFFFF00 --margin=0,0,0,0 /usr/share/fonts/opentype/gentiumplus/GentiumPlus-R.ttf --unicodes=2192

## 4.3 `fina`

> Note: Noto Sans NKo does not have a dotted-circle glyph. These
> images use `U+07fa`, the lajanyalan (N'Ko kashida) in its place.

hb-view --font-size=110 --margin=2,16,2,16 --output-file=nko-fina-before.svg --features=-fina --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSansNKo-Regular.ttf --unicodes=07fa,07e5

hb-view --font-size=110 --margin=2,16,2,16 --output-file=nko-fina-after.svg --features=+fina --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSansNKo-Regular.ttf --unicodes=07fa,07e5

svg_stack --direction=h nko-fina-before.svg right-arrow.svg nko-fina-after.svg > nko-fina.svg


## 4.6 `medi`

> Note: Noto Sans NKo does not have a dotted-circle glyph. These
> images use `U+07fa`, the lajanyalan (N'Ko kashida) in its place.

hb-view --font-size=110 --margin=2,16,2,16 --output-file=nko-medi-before.svg --features=-medi --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSansNKo-Regular.ttf --unicodes=07fa,07e8,07fa

hb-view --font-size=110 --margin=2,16,2,16 --output-file=nko-medi-after.svg --features=+medi --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSansNKo-Regular.ttf --unicodes=07fa,07e8,07fa

svg_stack --direction=h nko-medi-before.svg right-arrow.svg nko-medi-after.svg > nko-medi.svg


## 4.8 `init`

> Note: Noto Sans NKo does not have a dotted-circle glyph. These
> images use `U+07fa`, the lajanyalan (N'Ko kashida) in its place.

hb-view --font-size=110 --margin=2,16,2,16 --output-file=nko-init-before.svg --features=-init --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSansNKo-Regular.ttf --unicodes=07da,07fa

hb-view --font-size=110 --margin=2,16,2,16 --output-file=nko-init-after.svg --features=+init --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSansNKo-Regular.ttf --unicodes=07da,07fa

svg_stack --direction=h nko-init-before.svg right-arrow.svg nko-init-after.svg > nko-init.svg


## 7.3 `mark`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=nko-mark-before.svg --features=-mark --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSansNKo-Regular.ttf --unicodes=07fa,07d5,07f1

hb-view --font-size=110 --margin=2,16,2,16 --output-file=nko-mark-after.svg --features=+mark --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSansNKo-Regular.ttf --unicodes=07fa,07d5,07f1

svg_stack --direction=h nko-mark-before.svg right-arrow.svg nko-mark-after.svg > nko-mark.svg


================================================
FILE: images/oriya/oriya-png-image-generation-log.md
================================================
# Commands used to generate the images in [opentype-shaping-oriya.md](../../opentype-shaping-oriya.md)

## Arrow general

hb-view --font-size=110 --output-file=right-arrow.png --background=FFFFFF00 --margin=0,0,0,0 /usr/share/fonts/opentype/gentiumplus/GentiumPlus-R.ttf --unicodes=2192


## 2.2 Matra decomposition

hb-view --font-size=110 --margin=2,16,2,16 --output-file=oriya-matra-decompose-before.png --features= --background=FFFFFF00 /home/nate/SyncThing/fonts-external/temporary-and-testing/NotoSerifOriya-Regular.ttf --unicodes=0b48

hb-view --font-size=110 --margin=2,16,2,16 --output-file=oriya-matra-decompose-after.png --features= --background=FFFFFF00 /home/nate/SyncThing/fonts-external/temporary-and-testing/NotoSerifOriya-Regular.ttf --unicodes=0b47,25cc,0b56

montage oriya-matra-decompose-before.png right-arrow.png oriya-matra-decompose-after.png -geometry +0+0 -background transparent oriya-matra-decompose.png


## 2.7 Post-base consonants

### Ya

hb-view --font-size=110 --margin=2,16,2,16 --output-file=oriya-pstf-ya-before.png --features=-pstf --background=FFFFFF00 /home/nate/SyncThing/fonts-external/temporary-and-testing/NotoSerifOriya-Regular.ttf --unicodes=25cc,0b4d,0b2f

hb-view --font-size=110 --margin=2,16,2,16 --output-file=oriya-pstf-ya-after.png --features=+pstf --background=FFFFFF00 /home/nate/SyncThing/fonts-external/temporary-and-testing/NotoSerifOriya-Regular.ttf --unicodes=25cc,0b4d,0b2f

montage oriya-pstf-ya-before.png right-arrow.png oriya-pstf-ya-after.png -geometry +0+0 -background transparent oriya-pstf-ya.png

### Yya

hb-view --font-size=110 --margin=2,16,2,16 --output-file=oriya-pstf-yya-before.png --features=-pstf --background=FFFFFF00 /home/nate/SyncThing/fonts-external/temporary-and-testing/NotoSerifOriya-Regular.ttf --unicodes=25cc,0b4d,0b5f

hb-view --font-size=110 --margin=2,16,2,16 --output-file=oriya-pstf-yya-after.png --features=+pstf --background=FFFFFF00 /home/nate/SyncThing/fonts-external/temporary-and-testing/NotoSerifOriya-Regular.ttf --unicodes=25cc,0b4d,0b5f

montage oriya-pstf-yya-before.png right-arrow.png oriya-pstf-yya-after.png -geometry +0+0 -background transparent oriya-pstf-yya.png


## 3.2 `nukt`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=oriya-nukt-before.png --features=-nukt --background=FFFFFF00 /home/nate/SyncThing/fonts-external/temporary-and-testing/NotoSerifOriya-Regular.ttf --unicodes=0b16,25cc,0b3c

hb-view --font-size=110 --margin=2,16,2,16 --output-file=oriya-nukt-after.png --features=+nukt --background=FFFFFF00 /home/nate/SyncThing/fonts-external/temporary-and-testing/NotoSerifOriya-Regular.ttf --unicodes=0b16,0b3c

montage oriya-nukt-before.png right-arrow.png oriya-nukt-after.png -geometry +0+0 -background transparent oriya-nukt.png


## 3.3 `akhn`

### KSsa

> Note: Noto Sans Oriya implements this in a `pres`+`blwf` combination
> for unknown reasons.

hb-view --font-size=110 --margin=2,16,2,16 --output-file=oriya-akhn-kssa-before.png --features=-pres,-blwf --background=FFFFFF00 /home/nate/SyncThing/fonts-external/temporary-and-testing/NotoSerifOriya-Regular.ttf --unicodes=0b15,0b4d,0b37

hb-view --font-size=110 --margin=2,16,2,16 --output-file=oriya-akhn-kssa-after.png --features=+pres,+blwf --background=FFFFFF00 /home/nate/SyncThing/fonts-external/temporary-and-testing/NotoSerifOriya-Regular.ttf --unicodes=0b15,0b4d,0b37

montage oriya-akhn-kssa-before.png right-arrow.png oriya-akhn-kssa-after.png -geometry +0+0 -background transparent oriya-akhn-kssa.png

### JNya

> Note: Noto Sans Oriya implements this in a `blwf`+`cjct` combination
> for unknown reasons.

hb-view --font-size=110 --margin=2,16,2,16 --output-file=oriya-akhn-jnya-before.png --features=-pres,-cjct,-blwf --background=FFFFFF00 /home/nate/SyncThing/fonts-external/temporary-and-testing/NotoSerifOriya-Regular.ttf --unicodes=0b1c,0b4d,0b1e

hb-view --font-size=110 --margin=2,16,2,16 --output-file=oriya-akhn-jnya-after.png --features=+pres,+blwf --background=FFFFFF00 /home/nate/SyncThing/fonts-external/temporary-and-testing/NotoSerifOriya-Regular.ttf --unicodes=0b1c,0b4d,0b1e

montage oriya-akhn-jnya-before.png right-arrow.png oriya-akhn-jnya-after.png -geometry +0+0 -background transparent oriya-akhn-jnya.png


## 3.4 `rphf`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=oriya-rphf-before.png --features=-rphf --background=FFFFFF00 /home/nate/SyncThing/fonts-external/temporary-and-testing/NotoSerifOriya-Regular.ttf --unicodes=0b30,0b4d,25cc

hb-view --font-size=110 --margin=2,16,2,16 --output-file=oriya-rphf-after.png --features=+rphf --background=FFFFFF00 /home/nate/SyncThing/fonts-external/temporary-and-testing/NotoSerifOriya-Regular.ttf --unicodes=0b30,0b4d,25cc

montage oriya-rphf-before.png right-arrow.png oriya-rphf-after.png -geometry +0+0 -background transparent oriya-rphf.png


## 3.7 `blwf`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=oriya-blwf-before.png --features=-blwf --background=FFFFFF00 /home/nate/SyncThing/fonts-external/temporary-and-testing/NotoSerifOriya-Regular.ttf --unicodes=25cc,0b4d,0b25

hb-view --font-size=110 --margin=2,16,2,16 --output-file=oriya-blwf-after.png --features=+blwf --background=FFFFFF00 /home/nate/SyncThing/fonts-external/temporary-and-testing/NotoSerifOriya-Regular.ttf --unicodes=25cc,0b4d,0b25

montage oriya-blwf-before.png right-arrow.png oriya-blwf-after.png -geometry +0+0 -background transparent oriya-blwf.png


## 3.9 `half`

> No examples found.

## 3.10 `pstf`

> Same as 2.7

## 3.12 `cjct`

> Not a perfect example....

hb-view --font-size=110 --margin=2,16,2,16 --output-file=oriya-cjct-before.png --features=-pres --background=FFFFFF00 /home/nate/SyncThing/fonts-external/temporary-and-testing/NotoSerifOriya-Regular.ttf --unicodes=0b38,0b4d,25cc,0b4d,0b2a,0b4d,0b5d

hb-view --font-size=110 --margin=2,16,2,16 --output-file=oriya-cjct-after.png --features=+pres --background=FFFFFF00 /home/nate/SyncThing/fonts-external/temporary-and-testing/NotoSerifOriya-Regular.ttf --unicodes=0b38,0b4d,0b2a,0b4d,0b5d

montage oriya-cjct-before.png right-arrow.png oriya-cjct-after.png -geometry +0+0 -background transparent oriya-cjct.png


## 4.2 Pre-base matras

hb-view --font-size=110 --margin=2,16,2,16 --output-file=oriya-matra-position-before.png --features= --background=FFFFFF00 /home/nate/SyncThing/fonts-external/temporary-and-testing/NotoSerifOriya-Regular.ttf --unicodes=0b47,0b28,0b4d,200d,0b2d,0b4d,0b27,0b57

hb-view --font-size=110 --margin=2,16,2,16 --output-file=oriya-matra-position-after.png --features= --background=FFFFFF00 /home/nate/SyncThing/fonts-external/temporary-and-testing/NotoSerifOriya-Regular.ttf --unicodes=0b28,0b4d,200d,0b2d,0b4d,0b27,0b4c

montage oriya-matra-position-before.png right-arrow.png oriya-matra-position-after.png -geometry +0+0 -background transparent oriya-matra-position.png


## 4.3 Reph position

hb-view --font-size=110 --margin=2,16,2,16 --output-file=oriya-reph-position-before.png --features= --background=FFFFFF00 /home/nate/SyncThing/fonts-external/temporary-and-testing/NotoSerifOriya-Regular.ttf --unicodes=0b30,0b4d,25cc,0b2a,0b4d,0b2a,0b4d,0b26,0b4d,0b2f,0b3e

hb-view --font-size=110 --margin=2,16,2,16 --output-file=oriya-reph-position-after.png --features= --background=FFFFFF00 /home/nate/SyncThing/fonts-external/temporary-and-testing/NotoSerifOriya-Regular.ttf --unicodes=0b30,0b4d,0b2a,0b4d,0b2a,0b4d,0b26,0b4d,0b2f,0b3e

montage oriya-reph-position-before.png right-arrow.png oriya-reph-position-after.png -geometry +0+0 -background transparent oriya-reph-position.png


## 5 `pres`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=oriya-pres-before.png --features=-pres --background=FFFFFF00 /home/nate/SyncThing/fonts-external/temporary-and-testing/NotoSerifOriya-Regular.ttf --unicodes=0b2e,0b4d,0b2d

hb-view --font-size=110 --margin=2,16,2,16 --output-file=oriya-pres-after.png --features=+pres --background=FFFFFF00 /home/nate/SyncThing/fonts-external/temporary-and-testing/NotoSerifOriya-Regular.ttf --unicodes=0b2e,0b4d,0b2d

montage oriya-pres-before.png right-arrow.png oriya-pres-after.png -geometry +0+0 -background transparent oriya-pres.png


## 5 `abvs`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=oriya-abvs-before.png --features=-abvs --background=FFFFFF00 /home/nate/SyncThing/fonts-external/temporary-and-testing/NotoSerifOriya-Regular.ttf --unicodes=0b13,200d,0b01

hb-view --font-size=110 --margin=2,16,2,16 --output-file=oriya-abvs-after.png --features=+abvs --background=FFFFFF00 /home/nate/SyncThing/fonts-external/temporary-and-testing/NotoSerifOriya-Regular.ttf --unicodes=0b13,200d,0b01

montage oriya-abvs-before.png right-arrow.png oriya-abvs-after.png -geometry +0+0 -background transparent oriya-abvs.png


## 5 `blws`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=oriya-blws-before.png --features=-blws --background=FFFFFF00 /home/nate/SyncThing/fonts-external/temporary-and-testing/NotoSerifOriya-Regular.ttf --unicodes=0b28,0b4d,0b24,0b42

hb-view --font-size=110 --margin=2,16,2,16 --output-file=oriya-blws-after.png --features=+blws --background=FFFFFF00 /home/nate/SyncThing/fonts-external/temporary-and-testing/NotoSerifOriya-Regular.ttf --unicodes=0b28,0b4d,0b24,0b42

montage oriya-blws-before.png right-arrow.png oriya-blws-after.png -geometry +0+0 -background transparent oriya-blws.png


## 5 `psts`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=oriya-psts-before.png --features=-psts --background=FFFFFF00 /home/nate/SyncThing/fonts-external/temporary-and-testing/NotoSerifOriya-Regular.ttf --unicodes=0b23,0b4c,0b01

hb-view --font-size=110 --margin=2,16,2,16 --output-file=oriya-psts-after.png --features=+psts --background=FFFFFF00 /home/nate/SyncThing/fonts-external/temporary-and-testing/NotoSerifOriya-Regular.ttf --unicodes=0b23,0b4c,0b01

montage oriya-psts-before.png right-arrow.png oriya-psts-after.png -geometry +0+0 -background transparent oriya-psts.png


## 5 `haln`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=oriya-haln-before.png --features=-haln,-blwm --background=FFFFFF00 /home/nate/SyncThing/fonts-external/temporary-and-testing/NotoSerifOriya-Regular.ttf --unicodes=0b1d,0b4d

hb-view --font-size=110 --margin=2,16,2,16 --output-file=oriya-haln-after.png --features=+haln,+blwm --background=FFFFFF00 /home/nate/SyncThing/fonts-external/temporary-and-testing/NotoSerifOriya-Regular.ttf --unicodes=0b1d,0b4d

montage oriya-haln-before.png right-arrow.png oriya-haln-after.png -geometry +0+0 -background transparent oriya-haln.png


## 6 `abvm`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=oriya-abvm-before.png --features=-abvm --background=FFFFFF00 /home/nate/SyncThing/fonts-external/temporary-and-testing/NotoSerifOriya-Regular.ttf --unicodes=0b19,0b4d,0b18,0b48

hb-view --font-size=110 --margin=2,16,2,16 --output-file=oriya-abvm-after.png --features=+abvm --background=FFFFFF00 /home/nate/SyncThing/fonts-external/temporary-and-testing/NotoSerifOriya-Regular.ttf --unicodes=0b19,0b4d,0b18,0b48

montage oriya-abvm-before.png right-arrow.png oriya-abvm-after.png -geometry +0+0 -background transparent oriya-abvm.png


## 6 `blwm`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=oriya-blwm-before.png --features=-blwm --background=FFFFFF00 /home/nate/SyncThing/fonts-external/temporary-and-testing/NotoSerifOriya-Regular.ttf --unicodes=0b2e,0b4d,0b2b,0b44

hb-view --font-size=110 --margin=2,16,2,16 --output-file=oriya-blwm-after.png --features=+blwm --background=FFFFFF00 /home/nate/SyncThing/fonts-external/temporary-and-testing/NotoSerifOriya-Regular.ttf --unicodes=0b2e,0b4d,0b2b,0b44

montage oriya-blwm-before.png right-arrow.png oriya-blwm-after.png -geometry +0+0 -background transparent oriya-blwm.png


================================================
FILE: images/oriya/oriya-svg-image-generation-log.md
================================================
# Commands used to generate the images in [opentype-shaping-oriya.md](../../opentype-shaping-oriya.md)

## Arrow general

hb-view --font-size=110 --output-file=right-arrow.svg --background=FFFFFF00 --margin=0,0,0,0 /usr/share/fonts/opentype/gentiumplus/GentiumPlus-R.ttf --unicodes=2192


## 2.2 Matra decomposition

hb-view --font-size=110 --margin=2,16,2,16 --output-file=oriya-matra-decompose-before.svg --features= --background=FFFFFF00 /home/nate/SyncThing/fonts-external/temporary-and-testing/NotoSerifOriya-Regular.ttf --unicodes=0b4c

hb-view --font-size=110 --margin=2,16,2,16 --output-file=oriya-matra-decompose-after.svg --features= --background=FFFFFF00 /home/nate/SyncThing/fonts-external/temporary-and-testing/NotoSerifOriya-Regular.ttf --unicodes=0b47,25cc,0b57

svg_stack.py --direction=h oriya-matra-decompose-before.svg right-arrow.svg oriya-matra-decompose-after.svg > oriya-matra-decompose.svg


## 2.7 Below-base consonants

### Ra

hb-view --font-size=110 --margin=2,16,2,16 --output-file=oriya-blwf-ra-before.svg --features=-pstf --background=FFFFFF00 /home/nate/SyncThing/fonts-external/temporary-and-testing/NotoSerifOriya-Regular.ttf --unicodes=25cc,0b4d,0b30

hb-view --font-size=110 --margin=2,16,2,16 --output-file=oriya-blwf-ra-after.svg --features=+pstf --background=FFFFFF00 /home/nate/SyncThing/fonts-external/temporary-and-testing/NotoSerifOriya-Regular.ttf --unicodes=25cc,0b4d,0b30

svg_stack.py --direction=h oriya-blwf-ra-before.svg right-arrow.svg oriya-blwf-ra-after.svg > oriya-blwf-ra.svg


#### Duplicates for other subsections

cp oriya-blwf-ra.svg oriya-blwf-ra-1.svg

cluster_styles = [

cp oriya-blwf-ra.svg oriya-blwf-ra-2.svg

cluster_styles = [


## 2.7 Post-base consonants

### Ya

hb-view --font-size=110 --margin=2,16,2,16 --output-file=oriya-pstf-ya-before.svg --features=-pstf --background=FFFFFF00 /home/nate/SyncThing/fonts-external/temporary-and-testing/NotoSerifOriya-Regular.ttf --unicodes=25cc,0b4d,0b2f

hb-view --font-size=110 --margin=2,16,2,16 --output-file=oriya-pstf-ya-after.svg --features=+pstf --background=FFFFFF00 /home/nate/SyncThing/fonts-external/temporary-and-testing/NotoSerifOriya-Regular.ttf --unicodes=25cc,0b4d,0b2f

svg_stack.py --direction=h oriya-pstf-ya-before.svg right-arrow.svg oriya-pstf-ya-after.svg > oriya-pstf-ya.svg

### Yya

hb-view --font-size=110 --margin=2,16,2,16 --output-file=oriya-pstf-yya-before.svg --features=-pstf --background=FFFFFF00 /home/nate/SyncThing/fonts-external/temporary-and-testing/NotoSerifOriya-Regular.ttf --unicodes=25cc,0b4d,0b5f

hb-view --font-size=110 --margin=2,16,2,16 --output-file=oriya-pstf-yya-after.svg --features=+pstf --background=FFFFFF00 /home/nate/SyncThing/fonts-external/temporary-and-testing/NotoSerifOriya-Regular.ttf --unicodes=25cc,0b4d,0b5f

svg_stack.py --direction=h oriya-pstf-yya-before.svg right-arrow.svg oriya-pstf-yya-after.svg > oriya-pstf-yya.svg


#### Duplicates for other subsections

cp oriya-pstf-ya.svg oriya-pstf-ya-1.svg

cluster_styles = [


cp oriya-pstf-yya.svg oriya-pstf-yya-1.svg

cluster_styles = [


## 3.2 `nukt`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=oriya-nukt-before.svg --features=-nukt --background=FFFFFF00 /home/nate/SyncThing/fonts-external/temporary-and-testing/NotoSerifOriya-Regular.ttf --unicodes=0b16,25cc,0b3c

hb-view --font-size=110 --margin=2,16,2,16 --output-file=oriya-nukt-after.svg --features=+nukt --background=FFFFFF00 /home/nate/SyncThing/fonts-external/temporary-and-testing/NotoSerifOriya-Regular.ttf --unicodes=0b16,0b3c

svg_stack.py --direction=h oriya-nukt-before.svg right-arrow.svg oriya-nukt-after.svg > oriya-nukt.svg


## 3.3 `akhn`

### KSsa

> Note: Noto Sans Oriya implements this in a `pres`+`blwf` combination
> for unknown reasons.

hb-view --font-size=110 --margin=2,16,2,16 --output-file=oriya-akhn-kssa-before.svg --features=-pres,-blwf,-akhn,-haln --background=FFFFFF00 /home/nate/SyncThing/fonts-external/temporary-and-testing/NotoSerifOriya-Regular.ttf --unicodes=0b15,0b4d,0b37

hb-view --font-size=110 --margin=2,16,2,16 --output-file=oriya-akhn-kssa-after.svg --features=+pres,+blwf --background=FFFFFF00 /home/nate/SyncThing/fonts-external/temporary-and-testing/NotoSerifOriya-Regular.ttf --unicodes=0b15,0b4d,0b37

svg_stack.py --direction=h oriya-akhn-kssa-before.svg right-arrow.svg oriya-akhn-kssa-after.svg > oriya-akhn-kssa.svg

### JNya

> Note: Noto Sans Oriya implements this in a `blwf`+`cjct` combination
> for unknown reasons.

hb-view --font-size=110 --margin=2,16,2,16 --output-file=oriya-akhn-jnya-before.svg --features=-pres,-cjct,-blwf,-haln --background=FFFFFF00 /home/nate/SyncThing/fonts-external/temporary-and-testing/NotoSerifOriya-Regular.ttf --unicodes=0b1c,0b4d,0b1e

hb-view --font-size=110 --margin=2,16,2,16 --output-file=oriya-akhn-jnya-after.svg --features=+pres,+blwf --background=FFFFFF00 /home/nate/SyncThing/fonts-external/temporary-and-testing/NotoSerifOriya-Regular.ttf --unicodes=0b1c,0b4d,0b1e

svg_stack.py --direction=h oriya-akhn-jnya-before.svg right-arrow.svg oriya-akhn-jnya-after.svg > oriya-akhn-jnya.svg


## 3.4 `rphf`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=oriya-rphf-before.svg --features=-rphf,-haln --background=FFFFFF00 /home/nate/SyncThing/fonts-external/temporary-and-testing/NotoSerifOriya-Regular.ttf --unicodes=0b30,0b4d,25cc

hb-view --font-size=110 --margin=2,16,2,16 --output-file=oriya-rphf-after.svg --features=+rphf --background=FFFFFF00 /home/nate/SyncThing/fonts-external/temporary-and-testing/NotoSerifOriya-Regular.ttf --unicodes=0b30,0b4d,25cc

svg_stack.py --direction=h oriya-rphf-before.svg right-arrow.svg oriya-rphf-after.svg > oriya-rphf.svg


## 3.7 `blwf`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=oriya-blwf-before.svg --features=-blwf --background=FFFFFF00 /home/nate/SyncThing/fonts-external/temporary-and-testing/NotoSerifOriya-Regular.ttf --unicodes=25cc,0b4d,0b25

hb-view --font-size=110 --margin=2,16,2,16 --output-file=oriya-blwf-after.svg --features=+blwf --background=FFFFFF00 /home/nate/SyncThing/fonts-external/temporary-and-testing/NotoSerifOriya-Regular.ttf --unicodes=25cc,0b4d,0b25

svg_stack.py --direction=h oriya-blwf-before.svg right-arrow.svg oriya-blwf-after.svg > oriya-blwf.svg


## 3.9 `half`

> No examples found.

## 3.10 `pstf`

> Same as 2.7

## 3.12 `cjct`

> Not a perfect example....
> Noto Serif Oriya implements this in a combination of multiple
> features, including akhn and blwf. It also applies haln, which must
> be deactivated in this illustration because it is documented as
> being applied later.

hb-view --font-size=110 --margin=2,16,2,16 --output-file=oriya-cjct-before.svg --features=-blwf,-akhn,-cjct --background=FFFFFF00 /home/nate/SyncThing/fonts-external/temporary-and-testing/NotoSerifOriya-Regular.ttf --unicodes=0b38,0bd4,0b2a,0b40

hb-view --font-size=110 --margin=2,16,2,16 --output-file=oriya-cjct-after.svg --features=+cjct --background=FFFFFF00 /home/nate/SyncThing/fonts-external/temporary-and-testing/NotoSerifOriya-Regular.ttf --unicodes=0b38,0bd4,0b2a,0b40

svg_stack.py --direction=h oriya-cjct-before.svg right-arrow.svg oriya-cjct-after.svg > oriya-cjct.svg


## 4.2 Pre-base matras

hb-view --font-size=110 --margin=2,16,2,16 --output-file=oriya-matra-position-before.svg --features= --background=FFFFFF00 /home/nate/SyncThing/fonts-external/temporary-and-testing/NotoSerifOriya-Regular.ttf --unicodes=0b47,0b36,0b4d,0b24,0b4d,0b30,0b4d,0b2f,0b56

hb-view --font-size=110 --margin=2,16,2,16 --output-file=oriya-matra-position-after.svg --features= --background=FFFFFF00 /home/nate/SyncThing/fonts-external/temporary-and-testing/NotoSerifOriya-Regular.ttf --unicodes=0b36,0b4d,0b24,0b4d,0b30,0b4d,0b2f,0b48

svg_stack.py --direction=h oriya-matra-position-before.svg right-arrow.svg oriya-matra-position-after.svg > oriya-matra-position.svg


## 4.3 Reph position

hb-view --font-size=110 --margin=2,16,2,16 --output-file=oriya-reph-position-before.svg --features= --background=FFFFFF00 /home/nate/SyncThing/fonts-external/temporary-and-testing/NotoSerifOriya-Regular.ttf --unicodes=0b30,0b4d,25cc,0b2a,0b4d,0b27,0b4d,0b30,0b4d,0b2f,0b3e,0b41

hb-view --font-size=110 --margin=2,16,2,16 --output-file=oriya-reph-position-after.svg --features= --background=FFFFFF00 /home/nate/SyncThing/fonts-external/temporary-and-testing/NotoSerifOriya-Regular.ttf --unicodes=0b30,0b4d,0b2a,0b4d,0b27,0b4d,0b30,0b4d,0b2f,0b3e,0b41

svg_stack.py --direction=h oriya-reph-position-before.svg right-arrow.svg oriya-reph-position-after.svg > oriya-reph-position.svg


## 5 `pres`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=oriya-pres-before.svg --features=-pres --background=FFFFFF00 /home/nate/SyncThing/fonts-external/temporary-and-testing/NotoSerifOriya-Regular.ttf --unicodes=0b2e,0b4d,0b2d

hb-view --font-size=110 --margin=2,16,2,16 --output-file=oriya-pres-after.svg --features=+pres --background=FFFFFF00 /home/nate/SyncThing/fonts-external/temporary-and-testing/NotoSerifOriya-Regular.ttf --unicodes=0b2e,0b4d,0b2d

svg_stack.py --direction=h oriya-pres-before.svg right-arrow.svg oriya-pres-after.svg > oriya-pres.svg


## 5 `abvs`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=oriya-abvs-before.svg --features=-abvs --background=FFFFFF00 /home/nate/SyncThing/fonts-external/temporary-and-testing/NotoSerifOriya-Regular.ttf --unicodes=0b13,200d,0b01

hb-view --font-size=110 --margin=2,16,2,16 --output-file=oriya-abvs-after.svg --features=+abvs --background=FFFFFF00 /home/nate/SyncThing/fonts-external/temporary-and-testing/NotoSerifOriya-Regular.ttf --unicodes=0b13,200d,0b01

svg_stack.py --direction=h oriya-abvs-before.svg right-arrow.svg oriya-abvs-after.svg > oriya-abvs.svg


## 5 `blws`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=oriya-blws-before.svg --features=-blws --background=FFFFFF00 /home/nate/SyncThing/fonts-external/temporary-and-testing/NotoSerifOriya-Regular.ttf --unicodes=0b28,0b4d,0b24,0b42

hb-view --font-size=110 --margin=2,16,2,16 --output-file=oriya-blws-after.svg --features=+blws --background=FFFFFF00 /home/nate/SyncThing/fonts-external/temporary-and-testing/NotoSerifOriya-Regular.ttf --unicodes=0b28,0b4d,0b24,0b42

svg_stack.py --direction=h oriya-blws-before.svg right-arrow.svg oriya-blws-after.svg > oriya-blws.svg


## 5 `psts`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=oriya-psts-before.svg --features=-psts --background=FFFFFF00 /home/nate/SyncThing/fonts-external/temporary-and-testing/NotoSerifOriya-Regular.ttf --unicodes=0b23,0b4c,0b01

hb-view --font-size=110 --margin=2,16,2,16 --output-file=oriya-psts-after.svg --features=+psts --background=FFFFFF00 /home/nate/SyncThing/fonts-external/temporary-and-testing/NotoSerifOriya-Regular.ttf --unicodes=0b23,0b4c,0b01

svg_stack.py --direction=h oriya-psts-before.svg right-arrow.svg oriya-psts-after.svg > oriya-psts.svg


## 5 `haln`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=oriya-haln-before.svg --features=-haln,-blwm --background=FFFFFF00 /home/nate/SyncThing/fonts-external/temporary-and-testing/NotoSerifOriya-Regular.ttf --unicodes=0b1d,0b4d

hb-view --font-size=110 --margin=2,16,2,16 --output-file=oriya-haln-after.svg --features=+haln,+blwm --background=FFFFFF00 /home/nate/SyncThing/fonts-external/temporary-and-testing/NotoSerifOriya-Regular.ttf --unicodes=0b1d,0b4d

svg_stack.py --direction=h oriya-haln-before.svg right-arrow.svg oriya-haln-after.svg > oriya-haln.svg


## 6 `dist`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=oriya-dist-before.svg --features=-dist --background=FFFFFF00 /home/nate/SyncThing/fonts-external/temporary-and-testing/NotoSerifOriya-Regular.ttf --unicodes=0b2e,0b42,0b15

hb-view --font-size=110 --margin=2,16,2,16 --output-file=oriya-dist-after.svg --features=+dist --background=FFFFFF00 /home/nate/SyncThing/fonts-external/temporary-and-testing/NotoSerifOriya-Regular.ttf --unicodes=0b2e,0b42,0b15

svg_stack.py --direction=h oriya-dist-before.svg right-arrow.svg oriya-dist-after.svg > oriya-dist.svg


## 6 `abvm`

> Note: Noto Serif Oriya implements this as `blwm` for unknown reasons.

hb-view --font-size=110 --margin=2,16,2,16 --output-file=oriya-abvm-before.svg --features=-abvm,-blwm --background=FFFFFF00 /home/nate/SyncThing/fonts-external/temporary-and-testing/NotoSerifOriya-Regular.ttf --unicodes=0b19,0b48

hb-view --font-size=110 --margin=2,16,2,16 --output-file=oriya-abvm-after.svg --features=+abvm,+blwm --background=FFFFFF00 /home/nate/SyncThing/fonts-external/temporary-and-testing/NotoSerifOriya-Regular.ttf --unicodes=0b19,0b48

svg_stack.py --direction=h oriya-abvm-before.svg right-arrow.svg oriya-abvm-after.svg > oriya-abvm.svg


## 6 `blwm`

> Note: Noto Serif Oriya implements this as `abvm` for unknown reasons.

hb-view --font-size=110 --margin=2,16,2,16 --output-file=oriya-blwm-before.svg --features=-abvm,-blwm --background=FFFFFF00 /home/nate/SyncThing/fonts-external/temporary-and-testing/NotoSerifOriya-Regular.ttf --unicodes=0b2e,0b4d,0b2b,0b44

hb-view --font-size=110 --margin=2,16,2,16 --output-file=oriya-blwm-after.svg --features=+blwm --background=FFFFFF00 /home/nate/SyncThing/fonts-external/temporary-and-testing/NotoSerifOriya-Regular.ttf --unicodes=0b2e,0b4d,0b2b,0b44

svg_stack.py --direction=h oriya-blwm-before.svg right-arrow.svg oriya-blwm-after.svg > oriya-blwm.svg


================================================
FILE: images/sinhala/sinhala-png-image-generation-log.md
================================================
# Commands used to generate the images in [opentype-shaping-sinhala.md](../../opentype-shaping-sinhala.md)

## Arrow general

hb-view --font-size=110 --output-file=right-arrow.png --background=FFFFFF00 --margin=0,0,0,0 /usr/share/fonts/opentype/gentiumplus/GentiumPlus-R.ttf --unicodes=2192


## 2.2 Matra decomposition

hb-view --font-size=110 --margin=2,16,2,16 --output-file=sinhala-matra-decompose-before.png --features= --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifSinhala-Regular.ttf --unicodes=0dda

hb-view --font-size=110 --margin=2,16,2,16 --output-file=sinhala-matra-decompose-after.png --features= --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifSinhala-Regular.ttf --unicodes=25cc,0dd9,25cc,0dca

montage sinhala-matra-decompose-before.png right-arrow.png sinhala-matra-decompose-after.png -geometry +0+0 -background transparent sinhala-matra-decompose.png


## 2.7 Post-base consonants

hb-view --font-size=110 --margin=2,16,2,16 --output-file=sinhala-vatu-va-before.png --features=-vatu --preserve-default-ignorables --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifSinhala-Regular.ttf --unicodes=25cc,0dca,200d,0dba

hb-view --font-size=110 --margin=2,16,2,16 --output-file=sinhala-vatu-va-after.png --features=+vatu --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifSinhala-Regular.ttf --unicodes=25cc,0dca,200d,0dba

montage sinhala-vatu-va-before.png right-arrow.png sinhala-vatu-va-after.png -geometry +0+0 -background transparent sinhala-vatu-va.png


## 3.3 `akhn`

### Ligature

hb-view --font-size=110 --margin=2,16,2,16 --output-file=sinhala-akhn-ligature-before.png --features=-akhn --preserve-default-ignorables --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifSinhala-Regular.ttf --unicodes=0d9a,25cc,0dca,200d,0dc2

hb-view --font-size=110 --margin=2,16,2,16 --output-file=sinhala-akhn-ligature-after.png --features=+akhn --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifSinhala-Regular.ttf --unicodes=0d9a,0dca,200d,0dc2

montage sinhala-akhn-ligature-before.png right-arrow.png sinhala-akhn-ligature-after.png -geometry +0+0 -background transparent sinhala-akhn-ligature.png

### Touching

hb-view --font-size=110 --margin=2,16,2,16 --output-file=sinhala-akhn-touching-before.png --features=-akhn,-pres --preserve-default-ignorables --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifSinhala-Regular.ttf --unicodes=0d9c,200d,25cc,0dca,0d9d

hb-view --font-size=110 --margin=2,16,2,16 --output-file=sinhala-akhn-touching-after.png --features=+akhn,+pres --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifSinhala-Regular.ttf --unicodes=0d9c,200d,0dca,0d9d

montage sinhala-akhn-touching-before.png right-arrow.png sinhala-akhn-touching-after.png -geometry +0+0 -background transparent sinhala-akhn-touching.png


## 3.4 `rphf`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=sinhala-rphf-before.png --features=-rphf --preserve-default-ignorables --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifSinhala-Regular.ttf --unicodes=0dbb,0dca,200d,25cc

hb-view --font-size=110 --margin=2,16,2,64 --output-file=sinhala-rphf-after.png --features=+rphf --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifSinhala-Regular.ttf --unicodes=0dbb,0dca,200d,25cc

montage sinhala-rphf-before.png right-arrow.png sinhala-rphf-after.png -geometry +0+0 -background transparent sinhala-rphf.png


## 3.10 `pstf`

> Not needed?

hb-view --font-size=110 --margin=2,16,2,16 --output-file=sinhala-pstf-before.png --features=-pstf --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifSinhala-Regular.ttf --unicodes=0dde

hb-view --font-size=110 --margin=2,16,2,16 --output-file=sinhala-pstf-after.png --features=+pstf --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifSinhala-Regular.ttf --unicodes=0ddf

montage sinhala-pstf-before.png right-arrow.png sinhala-pstf-after.png -geometry +0+0 -background transparent sinhala-pstf.png


## 3.11 `vatu`

### Ra

hb-view --font-size=110 --margin=2,16,2,16 --output-file=sinhala-vatu-ra-before.png --features=-vatu --preserve-default-ignorables --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifSinhala-Regular.ttf --unicodes=25cc,0dca,200d,0dbb

hb-view --font-size=110 --margin=2,16,2,16 --output-file=sinhala-vatu-ra-after.png --features=+vatu --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifSinhala-Regular.ttf --unicodes=25cc,0dca,200d,0dbb

montage sinhala-vatu-ra-before.png right-arrow.png sinhala-vatu-ra-after.png -geometry +0+0 -background transparent sinhala-vatu-ra.png

### Va

> Same as 2.7


## 4.2 Pre-base matras

hb-view --font-size=110 --margin=2,16,2,16 --output-file=sinhala-matra-position-before.png --features= --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifSinhala-Regular.ttf --unicodes=25cc,0dd9,0da0,0dca,0db1,0dca,200d,0daf,0dca,200d,0dbb,0dcf

hb-view --font-size=110 --margin=2,16,2,16 --output-file=sinhala-matra-position-after.png --features= --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifSinhala-Regular.ttf --unicodes=0da0,0dca,0db1,0dca,200d,0daf,0dca,200d,0dbb,0ddc

montage sinhala-matra-position-before.png right-arrow.png sinhala-matra-position-after.png -geometry +0+0 -background transparent sinhala-matra-position.png


## 4.3 Reph position

hb-view --font-size=110 --margin=2,16,2,64 --output-file=sinhala-reph-position-before.png --features= --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifSinhala-Regular.ttf --unicodes=0dbb,0dca,200d,25cc,0dad,0dca,200d,0dae,0dca,200d,0dba,0dd1

hb-view --font-size=110 --margin=2,16,2,16 --output-file=sinhala-reph-position-after.png --features= --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifSinhala-Regular.ttf --unicodes=0dbb,0dca,200d,0dad,0dca,200d,0dae,0dca,200d,0dba,0dd1

montage sinhala-reph-position-before.png right-arrow.png sinhala-reph-position-after.png -geometry +0+0 -background transparent sinhala-reph-position.png


## 5 `pres`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=sinhala-pres-before.png --features=-pres --preserve-default-ignorables --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifSinhala-Regular.ttf --unicodes=0d9b,200d,25cc,0dca,0da2

hb-view --font-size=110 --margin=2,16,2,16 --output-file=sinhala-pres-after.png --features=+pres --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifSinhala-Regular.ttf --unicodes=0d9b,200d,0dca,0da2

montage sinhala-pres-before.png right-arrow.png sinhala-pres-after.png -geometry +0+0 -background transparent sinhala-pres.png


## 5 `abvs`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=sinhala-abvs-before.png --features=-abvs --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifSinhala-Regular.ttf --unicodes=0db6,0dd3

hb-view --font-size=110 --margin=2,16,2,16 --output-file=sinhala-abvs-after.png --features=+abvs --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifSinhala-Regular.ttf --unicodes=0db6,0dd3

montage sinhala-abvs-before.png right-arrow.png sinhala-abvs-after.png -geometry +0+0 -background transparent sinhala-abvs.png


## 5 `blws`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=sinhala-blws-before.png --features=-blws --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifSinhala-Regular.ttf --unicodes=0db7,0dd6

hb-view --font-size=110 --margin=2,16,2,16 --output-file=sinhala-blws-after.png --features=+blws --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifSinhala-Regular.ttf --unicodes=0db7,0dd6

montage sinhala-blws-before.png right-arrow.png sinhala-blws-after.png -geometry +0+0 -background transparent sinhala-blws.png


## 5 `psts`

> Note: this lookup only works in Noto Sans. Needs more investigation.

hb-view --font-size=110 --margin=2,16,2,16 --output-file=sinhala-psts-before.png --features=-psts --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSansSinhala-Regular.ttf --unicodes=0daf,0dca,200d,0dba,0ddd

hb-view --font-size=110 --margin=2,16,2,16 --output-file=sinhala-psts-after.png --features=+psts --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSansSinhala-Regular.ttf --unicodes=0daf,0dca,200d,0dba,0ddd

montage sinhala-psts-before.png right-arrow.png sinhala-psts-after.png -geometry +0+0 -background transparent sinhala-psts.png


## 6 `abvm`

> Note: Noto Sans Sinhala implements this as an `abvs` substitution
> for unknown reasons.

hb-view --font-size=110 --margin=2,16,2,16 --output-file=sinhala-abvm-before.png --features=-abvs --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifSinhala-Regular.ttf --unicodes=0dbb,0dca,200d,0dae

hb-view --font-size=110 --margin=2,16,2,16 --output-file=sinhala-abvm-after.png --features=+abvs --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifSinhala-Regular.ttf --unicodes=0dbb,0dca,200d,0dae

montage sinhala-abvm-before.png right-arrow.png sinhala-abvm-after.png -geometry +0+0 -background transparent sinhala-abvm.png


## 6 `blwm`

> Note: Noto Sans Sinhala double-implements this in both `blwm` and
> `abvs`, even though it is clearly not above-base.

hb-view --font-size=110 --margin=2,16,2,16 --output-file=sinhala-blwm-before.png --features=-blwm,-abvs --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifSinhala-Regular.ttf --unicodes=0d9f,0dca,200d,0dbb

hb-view --font-size=110 --margin=2,16,2,16 --output-file=sinhala-blwm-after.png --features=+blwm,+abvs --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifSinhala-Regular.ttf --unicodes=0d9f,0dca,200d,0dbb

montage sinhala-blwm-before.png right-arrow.png sinhala-blwm-after.png -geometry +0+0 -background transparent sinhala-blwm.png


================================================
FILE: images/sinhala/sinhala-svg-image-generation-log.md
================================================
# Commands used to generate the images in [opentype-shaping-sinhala.md](../../opentype-shaping-sinhala.md)

## Arrow general

hb-view --font-size=110 --output-file=right-arrow.svg --background=FFFFFF00 --margin=0,0,0,0 /usr/share/fonts/opentype/gentiumplus/GentiumPlus-R.ttf --unicodes=2192


## 2.2 Matra decomposition

hb-view --font-size=110 --margin=2,16,2,16 --output-file=sinhala-matra-decompose-before.svg --features= --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifSinhala-Regular.ttf --unicodes=0dda

hb-view --font-size=110 --margin=2,16,2,16 --output-file=sinhala-matra-decompose-after.svg --features= --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifSinhala-Regular.ttf --unicodes=25cc,0dd9,25cc,0dca

svg_stack.py --direction=h sinhala-matra-decompose-before.svg right-arrow.svg sinhala-matra-decompose-after.svg > sinhala-matra-decompose.svg


## 2.7 Post-base consonants

### Ra

hb-view --font-size=110 --margin=2,16,2,16 --output-file=sinhala-vatu-ra-before.svg --features=-vatu --preserve-default-ignorables --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifSinhala-Regular.ttf --unicodes=25cc,0dca,200d,0dbb

hb-view --font-size=110 --margin=2,16,2,16 --output-file=sinhala-vatu-ra-after.svg --features=+vatu --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifSinhala-Regular.ttf --unicodes=25cc,0dca,200d,0dbb

svg_stack.py --direction=h sinhala-vatu-ra-before.svg right-arrow.svg sinhala-vatu-ra-after.svg > sinhala-vatu-ra.svg

### Va

hb-view --font-size=110 --margin=2,16,2,16 --output-file=sinhala-vatu-va-before.svg --features=-vatu --preserve-default-ignorables --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifSinhala-Regular.ttf --unicodes=25cc,0dca,200d,0dba

hb-view --font-size=110 --margin=2,16,2,16 --output-file=sinhala-vatu-va-after.svg --features=+vatu --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifSinhala-Regular.ttf --unicodes=25cc,0dca,200d,0dba

svg_stack.py --direction=h sinhala-vatu-va-before.svg right-arrow.svg
sinhala-vatu-va-after.svg > sinhala-vatu-va.svg


#### Duplicates for other subsections

cp sinhala-vatu-ra.svg sinhala-vatu-ra-1.svg

cluster_styles = [


cp sinhala-vatu-va.svg sinhala-vatu-va-1.svg

cluster_styles = [


## 3.3 `akhn`

### Ligature

hb-view --font-size=110 --margin=2,16,2,16 --output-file=sinhala-akhn-ligature-before.svg --features=-akhn --preserve-default-ignorables --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifSinhala-Regular.ttf --unicodes=0d9a,25cc,0dca,200d,0dc2

hb-view --font-size=110 --margin=2,16,2,16 --output-file=sinhala-akhn-ligature-after.svg --features=+akhn --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifSinhala-Regular.ttf --unicodes=0d9a,0dca,200d,0dc2

svg_stack.py --direction=h sinhala-akhn-ligature-before.svg right-arrow.svg sinhala-akhn-ligature-after.svg > sinhala-akhn-ligature.svg

### Touching

hb-view --font-size=110 --margin=2,16,2,16 --output-file=sinhala-akhn-touching-before.svg --features=-akhn,-pres --preserve-default-ignorables --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifSinhala-Regular.ttf --unicodes=0d9c,200d,25cc,0dca,0d9d

hb-view --font-size=110 --margin=2,16,2,16 --output-file=sinhala-akhn-touching-after.svg --features=+akhn,+pres --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifSinhala-Regular.ttf --unicodes=0d9c,200d,0dca,0d9d

svg_stack.py --direction=h sinhala-akhn-touching-before.svg right-arrow.svg sinhala-akhn-touching-after.svg > sinhala-akhn-touching.svg


## 3.4 `rphf`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=sinhala-rphf-before.svg --features=-rphf --preserve-default-ignorables --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifSinhala-Regular.ttf --unicodes=0dbb,0dca,200d,25cc

hb-view --font-size=110 --margin=2,16,2,64 --output-file=sinhala-rphf-after.svg --features=+rphf --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifSinhala-Regular.ttf --unicodes=0dbb,0dca,200d,25cc

svg_stack.py --direction=h sinhala-rphf-before.svg right-arrow.svg sinhala-rphf-after.svg > sinhala-rphf.svg


#### Duplicates for other subsections

cp sinhala-rphf.svg sinhala-rphf-1.svg

cluster_styles = [


## 3.10 `pstf`

> Not needed?

hb-view --font-size=110 --margin=2,16,2,16 --output-file=sinhala-pstf-before.svg --features=-pstf --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifSinhala-Regular.ttf --unicodes=0dde

hb-view --font-size=110 --margin=2,16,2,16 --output-file=sinhala-pstf-after.svg --features=+pstf --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifSinhala-Regular.ttf --unicodes=0ddf

svg_stack.py --direction=h sinhala-pstf-before.svg right-arrow.svg sinhala-pstf-after.svg > sinhala-pstf.svg


## 3.11 `vatu`

> Same as 2.7


### Va

> Same as 2.7


## 4.2 Pre-base matras

hb-view --font-size=110 --margin=2,16,2,16 --output-file=sinhala-matra-position-before.svg --features= --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifSinhala-Regular.ttf --unicodes=25cc,0dd9,0da0,0dca,0db1,0dca,200d,0daf,0dca,200d,0dbb,0dcf

hb-view --font-size=110 --margin=2,16,2,16 --output-file=sinhala-matra-position-after.svg --features= --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifSinhala-Regular.ttf --unicodes=0da0,0dca,0db1,0dca,200d,0daf,0dca,200d,0dbb,0ddc

svg_stack.py --direction=h sinhala-matra-position-before.svg right-arrow.svg sinhala-matra-position-after.svg > sinhala-matra-position.svg


## 4.3 Reph position

hb-view --font-size=110 --margin=2,16,2,64 --output-file=sinhala-reph-position-before.svg --features= --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifSinhala-Regular.ttf --unicodes=0dbb,0dca,200d,25cc,0dad,0dca,200d,0dae,0dca,200d,0dba,0dd1

hb-view --font-size=110 --margin=2,16,2,16 --output-file=sinhala-reph-position-after.svg --features= --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifSinhala-Regular.ttf --unicodes=0dbb,0dca,200d,0dad,0dca,200d,0dae,0dca,200d,0dba,0dd1

svg_stack.py --direction=h sinhala-reph-position-before.svg right-arrow.svg sinhala-reph-position-after.svg > sinhala-reph-position.svg


## 5 `pres`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=sinhala-pres-before.svg --features=-pres --preserve-default-ignorables --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifSinhala-Regular.ttf --unicodes=0d9b,200d,25cc,0dca,0da2

hb-view --font-size=110 --margin=2,16,2,16 --output-file=sinhala-pres-after.svg --features=+pres --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifSinhala-Regular.ttf --unicodes=0d9b,200d,0dca,0da2

svg_stack.py --direction=h sinhala-pres-before.svg right-arrow.svg sinhala-pres-after.svg > sinhala-pres.svg


## 5 `abvs`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=sinhala-abvs-before.svg --features=-abvs --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifSinhala-Regular.ttf --unicodes=0db6,0dd3

hb-view --font-size=110 --margin=2,16,2,16 --output-file=sinhala-abvs-after.svg --features=+abvs --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifSinhala-Regular.ttf --unicodes=0db6,0dd3

svg_stack.py --direction=h sinhala-abvs-before.svg right-arrow.svg sinhala-abvs-after.svg > sinhala-abvs.svg


## 5 `blws`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=sinhala-blws-before.svg --features=-blws --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifSinhala-Regular.ttf --unicodes=0db7,0dd6

hb-view --font-size=110 --margin=2,16,2,16 --output-file=sinhala-blws-after.svg --features=+blws --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifSinhala-Regular.ttf --unicodes=0db7,0dd6

svg_stack.py --direction=h sinhala-blws-before.svg right-arrow.svg sinhala-blws-after.svg > sinhala-blws.svg


## 5 `psts`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=sinhala-psts-before.svg --features=-psts --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifSinhala-Regular.ttf --unicodes=0dbb,0dd1

hb-view --font-size=110 --margin=2,16,2,16 --output-file=sinhala-psts-after.svg --features=+psts --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifSinhala-Regular.ttf --unicodes=0dbb,0dd1

svg_stack.py --direction=h sinhala-psts-before.svg right-arrow.svg sinhala-psts-after.svg > sinhala-psts.svg


## 6 `dist`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=sinhala-dist-before.svg --features=-abvs --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifSinhala-Regular.ttf --unicodes=0dc5,0ddf

hb-view --font-size=110 --margin=2,16,2,16 --output-file=sinhala-dist-after.svg --features=+abvs --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifSinhala-Regular.ttf --unicodes=0dc5,0ddf

svg_stack.py --direction=h sinhala-dist-before.svg right-arrow.svg sinhala-dist-after.svg > sinhala-dist.svg


## 6 `abvm`

> Note: Noto Serif Sinhala implements this as an `abvs`
> substitution. This makes it a less-than ideal illustration, because
> the "after" SVG is a ligated glyph; it must suffice until a suitable
> alternative is found.

hb-view --font-size=110 --margin=2,16,2,16 --output-file=sinhala-abvm-before.svg --features=-abvs --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifSinhala-Regular.ttf --unicodes=0dc6,0dd3

hb-view --font-size=110 --margin=2,16,2,16 --output-file=sinhala-abvm-after.svg --features=+abvs --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifSinhala-Regular.ttf --unicodes=0dc6,0dd3

svg_stack.py --direction=h sinhala-abvm-before.svg right-arrow.svg sinhala-abvm-after.svg > sinhala-abvm.svg


## 6 `blwm`

> Note: Noto Serif Sinhala double-implements this as a `blws`
> substitution. This makes it a less-than ideal illustration, because
> the "after" SVG is a ligated glyph; it must suffice until a suitable
> alternative is found.

hb-view --font-size=110 --margin=2,16,2,16 --output-file=sinhala-blwm-before.svg --features=-blws --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifSinhala-Regular.ttf --unicodes=0da7,0dca,200d,0da8,0dd4

hb-view --font-size=110 --margin=2,16,2,16 --output-file=sinhala-blwm-after.svg --features=+blws --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifSinhala-Regular.ttf --unicodes=0da7,0dca,200d,0da8,0dd4

svg_stack.py --direction=h sinhala-blwm-before.svg right-arrow.svg sinhala-blwm-after.svg > sinhala-blwm.svg


================================================
FILE: images/syriac/syriac-png-image-generation-log.md
================================================
# Commands used to generate the images in [opentype-shaping-syriac.md](../../opentype-shaping-syriac.md)

## Arrow general

hb-view --font-size=110 --output-file=right-arrow.png --background=FFFFFF00 --margin=0,0,0,0 /usr/share/fonts/opentype/gentiumplus/GentiumPlus-R.ttf --unicodes=2192

## Dalath Rish group ##

hb-view --font-size=110 --margin=2,16,2,16 --output-file=syriac-dalath-rish.png --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSansSyriacEastern-Regular.ttf --unicodes=0715,072a,0716


## 3. `stch`

> Note: Noto seems to implement this in a set of `calt` substitutions,
> for unknown reasons.

hb-view --font-size=110 --margin=2,16,2,16 --output-file=syriac-stch-before.png --features=-stch,-calt --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSansSyriacEastern-Regular.ttf --unicodes=0712,0732,070f,0728,0721,0735,0710

hb-view --font-size=110 --margin=2,16,2,16 --output-file=syriac-stch-after.png --features=+stch --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSansSyriacEastern-Regular.ttf --unicodes=0712,0732,070f,0728,0721,0735,0710

montage syriac-stch-before.png right-arrow.png syriac-stch-after.png -geometry +0+0 -background transparent syriac-stch.png


## 4.1 `locl`

> Note: None found in Noto fonts.


## 4.2 `isol`

> Note: none found in Noto fonts.


## 4.3 `fina`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=syriac-fina-before.png --features=-fina --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSansSyriacWestern-Regular.ttf --unicodes=25cc,0722

hb-view --font-size=110 --margin=2,16,2,16 --output-file=syriac-fina-after.png --features=+fina --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSansSyriacWestern-Regular.ttf --unicodes=25cc,0722


## 4.4 `fin2`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=syriac-fin2-before.png --features=-fin2 --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSansSyriacWestern-Regular.ttf --unicodes=0717,0710

hb-view --font-size=110 --margin=2,16,2,16 --output-file=syriac-fin2-after.png --features=+fin2 --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSansSyriacWestern-Regular.ttf --unicodes=0717,0710

montage syriac-fin2-before.png right-arrow.png syriac-fin2-after.png -geometry +0+0 -background transparent syriac-fin2.png


## 4.5 `fin3`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=syriac-fin3-before.png --features=-fin3 --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSansSyriacWestern-Regular.ttf --unicodes=072f,0710

hb-view --font-size=110 --margin=2,16,2,16 --output-file=syriac-fin3-after.png --features=+fin3 --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSansSyriacWestern-Regular.ttf --unicodes=072f,0710

montage syriac-fin3-before.png right-arrow.png syriac-fin3-after.png -geometry +0+0 -background transparent syriac-fin3.png


## 4.6 `medi`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=syriac-medi-before.png --features=-medi --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSansSyriacWestern-Regular.ttf --unicodes=25cc,0724,25cc

hb-view --font-size=110 --margin=2,16,2,16 --output-file=syriac-medi-after.png --features=+medi --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSansSyriacWestern-Regular.ttf --unicodes=25cc,0724,25cc

montage syriac-medi-before.png right-arrow.png syriac-medi-after.png -geometry +0+0 -background transparent syriac-medi.png


## 4.7 `med2`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=syriac-med2-before.png --features=-med2 --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSansSyriacWestern-Regular.ttf --unicodes=25cc,0710,25cc

hb-view --font-size=110 --margin=2,16,2,16 --output-file=syriac-med2-after.png --features=+med2 --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSansSyriacWestern-Regular.ttf --unicodes=25cc,0710,25cc

montage syriac-med2-before.png right-arrow.png syriac-med2-after.png -geometry +0+0 -background transparent syriac-med2.png


## 4.8 `init`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=syriac-init-before.png --features=-init --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSansSyriacEstrangela-Regular.ttf --unicodes=0721,25cc

hb-view --font-size=110 --margin=2,16,2,16 --output-file=syriac-init-after.png --features=+init --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSansSyriacEstrangela-Regular.ttf --unicodes=0721,25cc

montage syriac-init-before.png right-arrow.png syriac-init-after.png -geometry +0+0 -background transparent syriac-init.png


## 4.9 `rlig`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=syriac-rlig-before.png --features=-rlig --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSansSyriacWestern-Regular.ttf --unicodes=072a,25cc,0308

hb-view --font-size=110 --margin=2,16,2,16 --output-file=syriac-rlig-after.png --features=+rlig --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSansSyriacWestern-Regular.ttf --unicodes=072a,0308

montage syriac-rlig-before.png right-arrow.png syriac-rlig-after.png -geometry +0+0 -background transparent syriac-rlig.png


## 4.11 `calt`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=syriac-calt-before.png --features=-calt --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSansSyriacWestern-Regular.ttf --unicodes=0720,071c

hb-view --font-size=110 --margin=2,16,2,16 --output-file=syriac-calt-after.png --features=+calt --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSansSyriacWestern-Regular.ttf --unicodes=0720,071c

montage syriac-calt-before.png right-arrow.png syriac-calt-after.png -geometry +0+0 -background transparent syriac-calt.png


## 5.1 `liga`

> Note: Noto Syriac implements this as a `calt` lookup for unknown reasons.
>
> This seems to be a known shortcoming. See
> [https://github.com/googlei18n/noto-fonts/issues/665](https://github.com/googlei18n/noto-fonts/issues/665)
> for more information.

hb-view --font-size=110 --margin=2,16,2,16 --output-file=syriac-liga-before.png --features=-calt --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSansSyriacEstrangela-Regular.ttf --unicodes=0720,0710

hb-view --font-size=110 --margin=2,16,2,16 --output-file=syriac-liga-after.png --features=+calt --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSansSyriacEstrangela-Regular.ttf --unicodes=0720,0710

montage syriac-liga-before.png right-arrow.png syriac-liga-after.png -geometry +0+0 -background transparent syriac-liga.png


## 5.2 `dlig`

> Note: none found in Noto Syriac.
>
> This seems to be a known shortcoming. See
> [https://github.com/googlei18n/noto-fonts/issues/665](https://github.com/googlei18n/noto-fonts/issues/665)
> for more information.


## 7.3 `mark`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=syriac-mark-before.png --features=-mark --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSansSyriacEastern-Regular.ttf --unicodes=0712,0733

hb-view --font-size=110 --margin=2,16,2,16 --output-file=syriac-mark-after.png --features=+mark --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSansSyriacEastern-Regular.ttf --unicodes=0712,0733

montage syriac-mark-before.png right-arrow.png syriac-mark-after.png -geometry +0+0 -background transparent syriac-mark.png


## 7.4 `mkmk`

> Note: Noto Sans Syriac (all) fonts have a `mkmk` table but it does
> not seem to work.


================================================
FILE: images/syriac/syriac-svg-image-generation-log.md
================================================
# Commands used to generate the images in [opentype-shaping-syriac.md](../../opentype-shaping-syriac.md)

## Arrow general

hb-view --font-size=110 --output-file=right-arrow.svg --background=FFFFFF00 --margin=0,0,0,0 /usr/share/fonts/opentype/gentiumplus/GentiumPlus-R.ttf --unicodes=2192

## Dalath Rish group ##

hb-view --font-size=110 --margin=2,16,2,16 --output-file=syriac-dalath-rish.svg --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSansSyriacEastern-Regular.ttf --unicodes=0715,072a,0716


## 3. `stch`

> Note: Noto seems to implement this in a set of `calt` substitutions,
> for unknown reasons.

hb-view --font-size=110 --margin=2,16,2,16 --output-file=syriac-stch-before.svg --features=-stch,-calt --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSansSyriacEastern-Regular.ttf --unicodes=0712,0732,070f,0728,0721,0735,0710

hb-view --font-size=110 --margin=2,16,2,16 --output-file=syriac-stch-after.svg --features=+stch --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSansSyriacEastern-Regular.ttf --unicodes=0712,0732,070f,0728,0721,0735,0710

svg_stack --direction=h syriac-stch-before.svg right-arrow.svg syriac-stch-after.svg > syriac-stch.svg


## 4.1 `locl`

> Note: None found in Noto fonts.


## 4.2 `isol`

> Note: none found in Noto fonts.


## 4.3 `fina`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=syriac-fina-before.svg --features=-fina --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSansSyriacWestern-Regular.ttf --unicodes=25cc,0722

hb-view --font-size=110 --margin=2,16,2,16 --output-file=syriac-fina-after.svg --features=+fina --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSansSyriacWestern-Regular.ttf --unicodes=25cc,0722

svg_stack --direction=h syriac-fina-before.svg right-arrow.svg syriac-fina-after.svg > syriac-fina.svg


## 4.4 `fin2`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=syriac-fin2-before.svg --features=-fin2 --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSansSyriacWestern-Regular.ttf --unicodes=0717,0710

hb-view --font-size=110 --margin=2,16,2,16 --output-file=syriac-fin2-after.svg --features=+fin2 --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSansSyriacWestern-Regular.ttf --unicodes=0717,0710

svg_stack --direction=h syriac-fin2-before.svg right-arrow.svg syriac-fin2-after.svg > syriac-fin2.svg


## 4.5 `fin3`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=syriac-fin3-before.svg --features=-fin3 --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSansSyriacWestern-Regular.ttf --unicodes=072f,0710

hb-view --font-size=110 --margin=2,16,2,16 --output-file=syriac-fin3-after.svg --features=+fin3 --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSansSyriacWestern-Regular.ttf --unicodes=072f,0710

svg_stack --direction=h syriac-fin3-before.svg right-arrow.svg syriac-fin3-after.svg > syriac-fin3.svg


## 4.6 `medi`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=syriac-medi-before.svg --features=-medi --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSansSyriacWestern-Regular.ttf --unicodes=25cc,0724,25cc

hb-view --font-size=110 --margin=2,16,2,16 --output-file=syriac-medi-after.svg --features=+medi --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSansSyriacWestern-Regular.ttf --unicodes=25cc,0724,25cc

svg_stack --direction=h syriac-medi-before.svg right-arrow.svg syriac-medi-after.svg > syriac-medi.svg


## 4.7 `med2`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=syriac-med2-before.svg --features=-med2 --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSansSyriacWestern-Regular.ttf --unicodes=25cc,0710,25cc

hb-view --font-size=110 --margin=2,16,2,16 --output-file=syriac-med2-after.svg --features=+med2 --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSansSyriacWestern-Regular.ttf --unicodes=25cc,0710,25cc

svg_stack --direction=h syriac-med2-before.svg right-arrow.svg syriac-med2-after.svg > syriac-med2.svg


## 4.8 `init`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=syriac-init-before.svg --features=-init --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSansSyriacEstrangela-Regular.ttf --unicodes=0721,25cc

hb-view --font-size=110 --margin=2,16,2,16 --output-file=syriac-init-after.svg --features=+init --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSansSyriacEstrangela-Regular.ttf --unicodes=0721,25cc

svg_stack --direction=h syriac-init-before.svg right-arrow.svg syriac-init-after.svg > syriac-init.svg


## 4.9 `rlig`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=syriac-rlig-before.svg --features=-rlig --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSansSyriacWestern-Regular.ttf --unicodes=072a,25cc,0308

hb-view --font-size=110 --margin=2,16,2,16 --output-file=syriac-rlig-after.svg --features=+rlig --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSansSyriacWestern-Regular.ttf --unicodes=072a,0308

svg_stack --direction=h syriac-rlig-before.svg right-arrow.svg syriac-rlig-after.svg > syriac-rlig.svg


## 4.11 `calt`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=syriac-calt-before.svg --features=-calt --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSansSyriacWestern-Regular.ttf --unicodes=0720,071c

hb-view --font-size=110 --margin=2,16,2,16 --output-file=syriac-calt-after.svg --features=+calt --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSansSyriacWestern-Regular.ttf --unicodes=0720,071c

svg_stack --direction=h syriac-calt-before.svg right-arrow.svg syriac-calt-after.svg > syriac-calt.svg


## 5.1 `liga`

> Note: Noto Syriac implements this as a `calt` lookup for unknown reasons.
>
> This seems to be a known shortcoming. See
> [https://github.com/googlei18n/noto-fonts/issues/665](https://github.com/googlei18n/noto-fonts/issues/665)
> for more information.

hb-view --font-size=110 --margin=2,16,2,16 --output-file=syriac-liga-before.svg --features=-calt --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSansSyriacEstrangela-Regular.ttf --unicodes=0720,0710

hb-view --font-size=110 --margin=2,16,2,16 --output-file=syriac-liga-after.svg --features=+calt --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSansSyriacEstrangela-Regular.ttf --unicodes=0720,0710

svg_stack --direction=h syriac-liga-before.svg right-arrow.svg syriac-liga-after.svg > syriac-liga.svg


## 5.2 `dlig`

> Note: none found in Noto Syriac.
>
> This seems to be a known shortcoming. See
> [https://github.com/googlei18n/noto-fonts/issues/665](https://github.com/googlei18n/noto-fonts/issues/665)
> for more information.


## 7.3 `mark`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=syriac-mark-before.svg --features=-mark --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSansSyriacEastern-Regular.ttf --unicodes=0712,0733

hb-view --font-size=110 --margin=2,16,2,16 --output-file=syriac-mark-after.svg --features=+mark --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSansSyriacEastern-Regular.ttf --unicodes=0712,0733

svg_stack --direction=h syriac-mark-before.svg right-arrow.svg syriac-mark-after.svg > syriac-mark.svg


## 7.4 `mkmk`

> Note: Noto Sans Syriac (all) fonts have a `mkmk` table but it does
> not seem to work.


================================================
FILE: images/tamil/tamil-png-image-generation-log.md
================================================
# Commands used to generate the images in [opentype-shaping-tamil.md](../../opentype-shaping-tamil.md)

## Arrow general

hb-view --font-size=110 --output-file=right-arrow.png --background=FFFFFF00 --margin=0,0,0,0 /usr/share/fonts/opentype/gentiumplus/GentiumPlus-R.ttf --unicodes=2192


## 2.2 Matra decomposition

hb-view --font-size=110 --margin=2,16,2,16 --output-file=tamil-matra-decompose-before.png --features= --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifTamil-Regular.ttf --unicodes=0bcc

hb-view --font-size=110 --margin=2,16,2,16 --output-file=tamil-matra-decompose-after.png --features= --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifTamil-Regular.ttf --unicodes=0bc6,25cc,0bd7

montage tamil-matra-decompose-before.png right-arrow.png tamil-matra-decompose-after.png -geometry +0+0 -background transparent tamil-matra-decompose.png


## 3.2 `nukt`

> None found.


## 3.3 `akhn`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=tamil-akhn-kssa-before.png --features=-akhn --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifTamil-Regular.ttf --unicodes=0b95,0bcd,0bb7

hb-view --font-size=110 --margin=2,16,2,16 --output-file=tamil-akhn-kssa-after.png --features=+akhn --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifTamil-Regular.ttf --unicodes=0b95,0bcd,0bb7

montage tamil-akhn-kssa-before.png right-arrow.png tamil-akhn-kssa-after.png -geometry +0+0 -background transparent tamil-akhn-kssa.png


## 3.9 `half`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=tamil-half-before.png --features=-half,-haln --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSansTamil-Regular.ttf --unicodes=0b99,25cc,0bcd

hb-view --font-size=110 --margin=2,16,2,16 --output-file=tamil-half-after.png --features=+half --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSansTamil-Regular.ttf --unicodes=0b99,0bcd

montage tamil-half-before.png right-arrow.png tamil-half-after.png -geometry +0+0 -background transparent tamil-half.png


## 4.2 Pre-base matras

hb-view --font-size=110 --margin=2,16,2,16 --output-file=tamil-matra-position-before.png --features= --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifTamil-Regular.ttf --unicodes=0bc6,0bb0,0bcd,0b9a,0bcd,0b9c,0bbe

hb-view --font-size=110 --margin=2,16,2,16 --output-file=tamil-matra-position-after.png --features= --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifTamil-Regular.ttf --unicodes=0bb0,0bcd,0b9a,0bcd,0b9c,0bca

montage tamil-matra-position-before.png right-arrow.png tamil-matra-position-after.png -geometry +0+0 -background transparent tamil-matra-position.png


## 5 `pres`

> Note: Noto Serif Tamil implements this as an `akhn` feature for
> unknown reasons.

hb-view --font-size=110 --margin=2,16,2,16 --output-file=tamil-pres-before.png --features=-akhn --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifTamil-Regular.ttf --unicodes=0bb6,0bcd,0bb0,0bc0

hb-view --font-size=110 --margin=2,16,2,16 --output-file=tamil-pres-after.png --features=+akhn --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifTamil-Regular.ttf --unicodes=0bb6,0bcd,0bb0,0bc0

montage tamil-pres-before.png right-arrow.png tamil-pres-after.png -geometry +0+0 -background transparent tamil-pres.png


## 5 `abvs`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=tamil-abvs-before.png --features=-abvs --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSansTamil-Regular.ttf --unicodes=0baf,0bc0

hb-view --font-size=110 --margin=2,16,2,16 --output-file=tamil-abvs-after.png --features=+abvs --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSansTamil-Regular.ttf --unicodes=0baf,0bc0

montage tamil-abvs-before.png right-arrow.png tamil-abvs-after.png -geometry +0+0 -background transparent tamil-abvs.png


## 5 `blws`

> None found.


## 5 `psts`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=tamil-psts-before.png --features=-psts --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifTamil-Regular.ttf --unicodes=0bb4,0bc2

hb-view --font-size=110 --margin=2,16,2,16 --output-file=tamil-psts-after.png --features=+psts --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifTamil-Regular.ttf --unicodes=0bb4,0bc2

montage tamil-psts-before.png right-arrow.png tamil-psts-after.png -geometry +0+0 -background transparent tamil-psts.png


## `haln`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=tamil-haln-before.png --features=-haln,-half,-abvm --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSansTamil-Regular.ttf --unicodes=0b9e,0bcd

hb-view --font-size=110 --margin=2,16,2,16 --output-file=tamil-haln-after.png --features=+haln --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSansTamil-Regular.ttf --unicodes=0b9e,0bcd

montage tamil-haln-before.png right-arrow.png tamil-haln-after.png -geometry +0+0 -background transparent tamil-haln.png


## 6 `abvm`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=tamil-abvm-before.png --features=-abvm --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifTamil-Regular.ttf --unicodes=0bb9,0bcd

hb-view --font-size=110 --margin=2,16,2,16 --output-file=tamil-abvm-after.png --features=+abvm --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifTamil-Regular.ttf --unicodes=0bb9,0bcd

montage tamil-abvm-before.png right-arrow.png tamil-abvm-after.png -geometry +0+0 -background transparent tamil-abvm.png


## 6 `blwm`

> None found.


================================================
FILE: images/tamil/tamil-svg-image-generation-log.md
================================================
# Commands used to generate the images in [opentype-shaping-tamil.md](../../opentype-shaping-tamil.md)

## Arrow general

hb-view --font-size=110 --output-file=right-arrow.svg --background=FFFFFF00 --margin=0,0,0,0 /usr/share/fonts/opentype/gentiumplus/GentiumPlus-R.ttf --unicodes=2192


## 2.2 Matra decomposition

hb-view --font-size=110 --margin=2,16,2,16 --output-file=tamil-matra-decompose-before.svg --features= --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifTamil-Regular.ttf --unicodes=0bcc

hb-view --font-size=110 --margin=2,16,2,16 --output-file=tamil-matra-decompose-after.svg --features= --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifTamil-Regular.ttf --unicodes=0bc6,25cc,0bd7

svg_stack.py --direction=h tamil-matra-decompose-before.svg right-arrow.svg tamil-matra-decompose-after.svg > tamil-matra-decompose.svg


## 3.2 `nukt`

> None found. Testing with Grantha Nukta.

hb-view --font-size=110 --margin=2,16,2,16 --output-file=tamil-nukt-before.svg --features=-akhn --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifTamil-Regular.ttf --unicodes=0bf5,25cc,1133c

hb-view --font-size=110 --margin=2,16,2,16 --output-file=tamil-nukt-after.svg --features=+akhn --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifTamil-Regular.ttf --unicodes=0bf5,1133c

svg_stack.py --direction=h tamil-nukt-before.svg right-arrow.svg tamil-nukt-after.svg > tamil-nukt.svg


## 3.3 `akhn`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=tamil-akhn-kssa-before.svg --features=-akhn --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifTamil-Regular.ttf --unicodes=0b95,0bcd,0bb7

hb-view --font-size=110 --margin=2,16,2,16 --output-file=tamil-akhn-kssa-after.svg --features=+akhn --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifTamil-Regular.ttf --unicodes=0b95,0bcd,0bb7

svg_stack.py --direction=h tamil-akhn-kssa-before.svg right-arrow.svg tamil-akhn-kssa-after.svg > tamil-akhn-kssa.svg


## 3.9 `half`

> Simulated output using a `mark` lookup; no example found.

hb-view --font-size=110 --margin=2,16,2,16 --output-file=tamil-half-before.svg --features=-half,-mark --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSansTamil-Regular.ttf --unicodes=0b99,25cc,0bcd

hb-view --font-size=110 --margin=2,16,2,16 --output-file=tamil-half-after.svg --features=+half --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSansTamil-Regular.ttf --unicodes=0b99,0bcd

svg_stack.py --direction=h tamil-half-before.svg right-arrow.svg tamil-half-after.svg > tamil-half.svg


## 4.2 Pre-base matras

hb-view --font-size=110 --margin=2,16,2,16 --output-file=tamil-matra-position-before.svg --features= --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifTamil-Regular.ttf --unicodes=0bc6,0bb0,0bcd,0b9a,0bcd,0b9c,0bbe

hb-view --font-size=110 --margin=2,16,2,16 --output-file=tamil-matra-position-after.svg --features= --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifTamil-Regular.ttf --unicodes=0bb0,0bcd,0b9a,0bcd,0b9c,0bca

svg_stack.py --direction=h tamil-matra-position-before.svg right-arrow.svg tamil-matra-position-after.svg > tamil-matra-position.svg


## 5 `pres`

> Note: Noto Serif Tamil implements this as an `akhn` feature for
> unknown reasons.

hb-view --font-size=110 --margin=2,16,2,16 --output-file=tamil-pres-before.svg --features=-akhn --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifTamil-Regular.ttf --unicodes=0bb6,0bcd,0bb0,0bc0

hb-view --font-size=110 --margin=2,16,2,16 --output-file=tamil-pres-after.svg --features=+akhn --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifTamil-Regular.ttf --unicodes=0bb6,0bcd,0bb0,0bc0

svg_stack.py --direction=h tamil-pres-before.svg right-arrow.svg tamil-pres-after.svg > tamil-pres.svg


## 5 `abvs`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=tamil-abvs-before.svg --features=-abvs --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSansTamil-Regular.ttf --unicodes=0baf,0bc0

hb-view --font-size=110 --margin=2,16,2,16 --output-file=tamil-abvs-after.svg --features=+abvs --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSansTamil-Regular.ttf --unicodes=0baf,0bc0

svg_stack.py --direction=h tamil-abvs-before.svg right-arrow.svg tamil-abvs-after.svg > tamil-abvs.svg


## 5 `blws`

> None found.


## 5 `psts`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=tamil-psts-before.svg --features=-psts --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifTamil-Regular.ttf --unicodes=0bb4,0bc2

hb-view --font-size=110 --margin=2,16,2,16 --output-file=tamil-psts-after.svg --features=+psts --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifTamil-Regular.ttf --unicodes=0bb4,0bc2

svg_stack.py --direction=h tamil-psts-before.svg right-arrow.svg tamil-psts-after.svg > tamil-psts.svg


## `haln`

> Simulated output using a `mark` lookup; no example found.

hb-view --font-size=110 --margin=2,16,2,16 --output-file=tamil-haln-before.svg --features=-haln,-mark --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSansTamil-Regular.ttf --unicodes=0b9e,25cc,0bcd

hb-view --font-size=110 --margin=2,16,2,16 --output-file=tamil-haln-after.svg --features=+haln --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSansTamil-Regular.ttf --unicodes=0b9e,0bcd

svg_stack.py --direction=h tamil-haln-before.svg right-arrow.svg tamil-haln-after.svg > tamil-haln.svg


## 6 `dist`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=tamil-dist-before.svg --features=-dist --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifTamil-Regular.ttf --unicodes=0bf4,0b85

hb-view --font-size=110 --margin=2,16,2,16 --output-file=tamil-dist-after.svg --features=+dist --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifTamil-Regular.ttf --unicodes=0bf4,0b85

svg_stack.py --direction=h tamil-dist-before.svg right-arrow.svg tamil-dist-after.svg > tamil-dist.svg


## 6 `kern`

> None found.


## 6 `abvm`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=tamil-abvm-before.svg --features=-abvm --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifTamil-Regular.ttf --unicodes=0bb9,0bcd

hb-view --font-size=110 --margin=2,16,2,16 --output-file=tamil-abvm-after.svg --features=+abvm --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifTamil-Regular.ttf --unicodes=0bb9,0bcd

svg_stack.py --direction=h tamil-abvm-before.svg right-arrow.svg tamil-abvm-after.svg > tamil-abvm.svg


## 6 `blwm`

> Note: Noto Serif Tamil has a `blwm` feature, but it fails to attach
> the included mark (`U+0952`) for unknown reasons.

hb-view --font-size=110 --margin=2,16,2,16 --output-file=tamil-blwm-before.svg --features=-blwm --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifTamil-Regular.ttf --unicodes=0bf7,0952

hb-view --font-size=110 --margin=2,16,2,16 --output-file=tamil-blwm-after.svg --features=+blwm --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifTamil-Regular.ttf --unicodes=0bf7,0952

svg_stack.py --direction=h tamil-blwm-before.svg right-arrow.svg tamil-blwm-after.svg > tamil-blwm.svg


================================================
FILE: images/telugu/telugu-png-image-generation-log.md
================================================
# Commands used to generate the images in [opentype-shaping-telugu.md](../../opentype-shaping-telugu.md)

## Arrow general

hb-view --font-size=110 --output-file=right-arrow.png --background=FFFFFF00 --margin=0,0,0,0 /usr/share/fonts/opentype/gentiumplus/GentiumPlus-R.ttf --unicodes=2192


> Note: always use `--features=-init` in examples where the `init`
> feature itself is not being explained.


## 2.2 Matra decomposition

hb-view --font-size=110 --margin=2,16,2,16 --output-file=telugu-matra-decompose-before.png --features= --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifTelugu-Regular.ttf --unicodes=0c48

hb-view --font-size=110 --margin=2,16,2,16 --output-file=telugu-matra-decompose-after.png --features= --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifTelugu-Regular.ttf --unicodes=0c46,25cc,0c56

montage telugu-matra-decompose-before.png right-arrow.png telugu-matra-decompose-after.png -geometry +0+0 -background transparent telugu-matra-decompose.png


## 3.3 `akhn`

### KSsa

> Note: Noto Serif Telugu implements this as a `pres`+`blwf`
> substitution for unknown reasons.

hb-view --font-size=110 --margin=2,16,2,16 --output-file=telugu-akhn-kssa-before.png --features=-blwf,-pres --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifTelugu-Regular.ttf --unicodes=0c15,25cc,0c4d,0c37

hb-view --font-size=110 --margin=2,16,2,16 --output-file=telugu-akhn-kssa-after.png --features= --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifTelugu-Regular.ttf --unicodes=0c15,0c4d,0c37

montage telugu-akhn-kssa-before.png right-arrow.png telugu-akhn-kssa-after.png -geometry +0+0 -background transparent telugu-akhn-kssa.png

### JNya

> None found. Microsoft docs reference a "SsJa" akhand form, which is
> also not found.


## 3.4 `rphf`

> None found. 


## 3.7 `blwf`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=telugu-blwf-before.png --features=-blwf --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifTelugu-Regular.ttf --unicodes=0c17,25cc,0c4d,0c24

hb-view --font-size=110 --margin=2,16,2,16 --output-file=telugu-blwf-after.png --features=+blwf --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifTelugu-Regular.ttf --unicodes=0c17,0c4d,0c24

montage telugu-blwf-before.png right-arrow.png telugu-blwf-after.png -geometry +0+0 -background transparent telugu-blwf.png


## 3.9 `half`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=telugu-half-before.png --features=-half,-haln --preserve-default-ignorables --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSansTelugu-Regular.ttf --unicodes=0c22,25cc,0c4d,200d

hb-view --font-size=110 --margin=2,16,2,16 --output-file=telugu-half-after.png --features=+half --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSansTelugu-Regular.ttf --unicodes=0c22,0c4d,200d

montage telugu-half-before.png right-arrow.png telugu-half-after.png -geometry +0+0 -background transparent telugu-half.png


## 3.10 `pstf`

> None found.


## 3.12 `cjct`

> None found.


## 4.2 Pre-base matras

> Not applicable.


## 4.3 Reph position

> No examples found; existing fonts seem not to incorporate Reph for
> Telugu....


## 5 `pres`

> Note: Example from Noto Serif Telugu, but it looks like it should be
> a `abvs` substitution instead....

hb-view --font-size=110 --margin=2,16,2,16 --output-file=telugu-pres-before.png --features=-pres --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifTelugu-Regular.ttf --unicodes=0c39,0c4c

hb-view --font-size=110 --margin=2,16,2,16 --output-file=telugu-pres-after.png --features=+pres --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifTelugu-Regular.ttf --unicodes=0c39,0c4c

montage telugu-pres-before.png right-arrow.png telugu-pres-after.png -geometry +0+0 -background transparent telugu-pres.png


## 5 `abvs`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=telugu-abvs-before.png --features=-abvs --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSansTelugu-Regular.ttf --unicodes=0c16,0c40

hb-view --font-size=110 --margin=2,16,2,16 --output-file=telugu-abvs-after.png --features=+abvs --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSansTelugu-Regular.ttf --unicodes=0c16,0c40

montage telugu-abvs-before.png right-arrow.png telugu-abvs-after.png -geometry +0+0 -background transparent telugu-abvs.png


## 5 `blws`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=telugu-blws-before.png --features=-blws --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSansTelugu-Regular.ttf --unicodes=0c16,0c46,0c56

hb-view --font-size=110 --margin=2,16,2,16 --output-file=telugu-blws-after.png --features=+blws --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSansTelugu-Regular.ttf --unicodes=0c16,0c46,0c56

montage telugu-blws-before.png right-arrow.png telugu-blws-after.png -geometry +0+0 -background transparent telugu-blws.png


## 5 `psts`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=telugu-psts-before.png --features=-psts --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSansTelugu-Regular.ttf --unicodes=0c2b,0c42

hb-view --font-size=110 --margin=2,16,2,16 --output-file=telugu-psts-after.png --features=+psts --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSansTelugu-Regular.ttf --unicodes=0c2b,0c42

montage telugu-psts-before.png right-arrow.png telugu-psts-after.png -geometry +0+0 -background transparent telugu-psts.png


## 5 `haln`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=telugu-haln-before.png --features=-haln --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifTelugu-Regular.ttf --unicodes=0c2f,0c4d

hb-view --font-size=110 --margin=2,16,2,16 --output-file=telugu-haln-after.png --features=+haln --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifTelugu-Regular.ttf --unicodes=0c2f,0c4d

montage telugu-haln-before.png right-arrow.png telugu-haln-after.png -geometry +0+0 -background transparent telugu-haln.png


## `abvm`

> None found.


## `blwm`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=telugu-blwm-before.png --features=-blwm --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifTelugu-Regular.ttf --unicodes=0c1d,0c62

hb-view --font-size=110 --margin=2,16,2,16 --output-file=telugu-blwm-after.png --features=+blwm --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifTelugu-Regular.ttf --unicodes=0c1d,0c62

montage telugu-blwm-before.png right-arrow.png telugu-blwm-after.png -geometry +0+0 -background transparent telugu-blwm.png


================================================
FILE: images/telugu/telugu-svg-image-generation-log.md
================================================
# Commands used to generate the images in [opentype-shaping-telugu.md](../../opentype-shaping-telugu.md)

## Arrow general

hb-view --font-size=110 --output-file=right-arrow.svg --background=FFFFFF00 --margin=0,0,0,0 /usr/share/fonts/opentype/gentiumplus/GentiumPlus-R.ttf --unicodes=2192


> Note: always use `--features=-init` in examples where the `init`
> feature itself is not being explained.


## 2.2 Matra decomposition

hb-view --font-size=110 --margin=2,16,2,16 --output-file=telugu-matra-decompose-before.svg --features= --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifTelugu-Regular.ttf --unicodes=0c48

hb-view --font-size=110 --margin=2,16,2,16 --output-file=telugu-matra-decompose-after.svg --features= --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifTelugu-Regular.ttf --unicodes=0c46,25cc,0c56

svg_stack.py --direction=h telugu-matra-decompose-before.svg right-arrow.svg telugu-matra-decompose-after.svg > telugu-matra-decompose.svg


## 3.2 `nukt`

> Note: Noto Serif Telugu implements this in a `blwm` feature.

hb-view --font-size=110 --margin=2,16,2,16 --output-file=telugu-nukt-before.svg --features=-blwm --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifTelugu-Regular.ttf --unicodes=0c18,0c3c

hb-view --font-size=110 --margin=2,16,2,16 --output-file=telugu-nukt-after.svg --features=+blwm --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifTelugu-Regular.ttf --unicodes=0c19,0c3c

svg_stack.py --direction=h telugu-nukt-before.svg right-arrow.svg telugu-nukt-after.svg > telugu-nukt.svg


## 3.3 `akhn`

### KSsa

> Note: Noto Serif Telugu implements this as a `pres`+`blwf`
> substitution for unknown reasons.

hb-view --font-size=110 --margin=2,16,2,16 --output-file=telugu-akhn-kssa-before.svg --features=-blwf,-pres --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifTelugu-Regular.ttf --unicodes=0c15,25cc,0c4d,0c37

hb-view --font-size=110 --margin=2,16,2,16 --output-file=telugu-akhn-kssa-after.svg --features= --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifTelugu-Regular.ttf --unicodes=0c15,0c4d,0c37

svg_stack.py --direction=h telugu-akhn-kssa-before.svg right-arrow.svg telugu-akhn-kssa-after.svg > telugu-akhn-kssa.svg

### JNya

> None found. Microsoft docs reference a "SsJa" akhand form, which is
> also not found.


## 3.4 `rphf`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=telugu-rphf-before.svg --features=-rphf,-haln --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifTelugu-Regular.ttf --preserve-default-ignorables --unicodes=0c30,0c4d,200d

hb-view --font-size=110 --margin=2,16,2,16 --output-file=telugu-rphf-after.svg --features=+rphf --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifTelugu-Regular.ttf --unicodes=0c30,0c4d,200d

svg_stack.py --direction=h telugu-rphf-before.svg right-arrow.svg telugu-rphf-after.svg > telugu-rphf.svg


## 3.7 `blwf`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=telugu-blwf-before.svg --features=-blwf --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifTelugu-Regular.ttf --unicodes=0c17,25cc,0c4d,0c24

hb-view --font-size=110 --margin=2,16,2,16 --output-file=telugu-blwf-after.svg --features=+blwf --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifTelugu-Regular.ttf --unicodes=0c17,0c4d,0c24

svg_stack.py --direction=h telugu-blwf-before.svg right-arrow.svg telugu-blwf-after.svg > telugu-blwf.svg


## 3.9 `half`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=telugu-half-before.svg --features=-half,-haln --preserve-default-ignorables --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSansTelugu-Regular.ttf --unicodes=0c22,25cc,0c4d,200d

hb-view --font-size=110 --margin=2,16,2,16 --output-file=telugu-half-after.svg --features=+half --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSansTelugu-Regular.ttf --unicodes=0c22,0c4d,200d

svg_stack.py --direction=h telugu-half-before.svg right-arrow.svg telugu-half-after.svg > telugu-half.svg


## 3.10 `pstf`

> None found.


## 3.12 `cjct`

> None found.


## 4.2 Pre-base matras

> Not applicable.


## 4.3 Reph position

> No examples found; existing fonts seem not to incorporate Reph for
> Telugu....


## 5 `pres`

> Note: Example from Noto Serif Telugu, but it looks like it should be
> a `abvs` substitution instead....

hb-view --font-size=110 --margin=2,16,2,16 --output-file=telugu-pres-before.svg --features=-pres --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifTelugu-Regular.ttf --unicodes=0c39,0c4c

hb-view --font-size=110 --margin=2,16,2,16 --output-file=telugu-pres-after.svg --features=+pres --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifTelugu-Regular.ttf --unicodes=0c39,0c4c

svg_stack.py --direction=h telugu-pres-before.svg right-arrow.svg telugu-pres-after.svg > telugu-pres.svg


## 5 `abvs`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=telugu-abvs-before.svg --features=-abvs --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifTelugu-Regular.ttf --unicodes=0c16,0c40

hb-view --font-size=110 --margin=2,16,2,16 --output-file=telugu-abvs-after.svg --features=+abvs --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifTelugu-Regular.ttf --unicodes=0c16,0c40

svg_stack.py --direction=h telugu-abvs-before.svg right-arrow.svg telugu-abvs-after.svg > telugu-abvs.svg


## 5 `blws`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=telugu-blws-before.svg --features=-blws --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifTelugu-Regular.ttf --unicodes=0c16,0c4d,0c24,0c4d,0c30

hb-view --font-size=110 --margin=2,16,2,16 --output-file=telugu-blws-after.svg --features=+blws --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifTelugu-Regular.ttf --unicodes=0c16,0c4d,0c24,0c4d,0c30

svg_stack.py --direction=h telugu-blws-before.svg right-arrow.svg telugu-blws-after.svg > telugu-blws.svg


## 5 `psts`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=telugu-psts-before.svg --features=-psts --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifTelugu-Regular.ttf --unicodes=0c2b,0c42

hb-view --font-size=110 --margin=2,16,2,16 --output-file=telugu-psts-after.svg --features=+psts --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifTelugu-Regular.ttf --unicodes=0c2b,0c42

svg_stack.py --direction=h telugu-psts-before.svg right-arrow.svg telugu-psts-after.svg > telugu-psts.svg


## 5 `haln`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=telugu-haln-before.svg --features=-haln --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifTelugu-Regular.ttf --unicodes=0c2f,0c4d

hb-view --font-size=110 --margin=2,16,2,16 --output-file=telugu-haln-after.svg --features=+haln --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifTelugu-Regular.ttf --unicodes=0c2f,0c4d

svg_stack.py --direction=h telugu-haln-before.svg right-arrow.svg telugu-haln-after.svg > telugu-haln.svg


## `dist`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=telugu-dist-before.svg --features=-dist --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifTelugu-Regular.ttf --unicodes=0c19,0c44

hb-view --font-size=110 --margin=2,16,2,16 --output-file=telugu-dist-after.svg --features=+dist --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifTelugu-Regular.ttf --unicodes=0c19,0c44

svg_stack.py --direction=h telugu-dist-before.svg right-arrow.svg telugu-dist-after.svg > telugu-dist.svg


## `abvm`

> Note: Noto Serif Telugu implements this in a `blwm` feature, for
> unknown reasons.

hb-view --font-size=110 --margin=32,16,2,16 --output-file=telugu-abvm-before.svg --features=-blwm --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifTelugu-Regular.ttf --unicodes=0c0f,0c00

hb-view --font-size=110 --margin=32,16,2,16 --output-file=telugu-abvm-after.svg --features=+blwm --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifTelugu-Regular.ttf --unicodes=0c0f,0c00

svg_stack.py --direction=h telugu-abvm-before.svg right-arrow.svg telugu-abvm-after.svg > telugu-abvm.svg


## `blwm`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=telugu-blwm-before.svg --features=-blwm --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifTelugu-Regular.ttf --unicodes=0c1d,0c4d,0c26

hb-view --font-size=110 --margin=2,16,2,16 --output-file=telugu-blwm-after.svg --features=+blwm --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifTelugu-Regular.ttf --unicodes=0c1d,0c4d,0c26

svg_stack.py --direction=h telugu-blwm-before.svg right-arrow.svg telugu-blwm-after.svg > telugu-blwm.svg


================================================
FILE: images/thai-lao/thai-lao-png-image-generation-log.md
================================================
# Commands used to generate the images in [opentype-shaping-thai-lao.md](../../opentype-shaping-thai-lao.md)

## Arrow general

hb-view --font-size=110 --output-file=right-arrow.png --background=FFFFFF00 --margin=0,0,0,0 /usr/share/fonts/opentype/gentiumplus/GentiumPlus-R.ttf --unicodes=2192

## 1.1 `ccmp`

hb-view --font-size=110 --margin=16,16,2,16 --output-file=thai-ccmp-before.png --features=-ccmp --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifThai-Regular.ttf --unicodes=25cc,0e4a,0e4d

hb-view --font-size=110 --margin=16,16,2,16 --output-file=thai-ccmp-after.png --features=+ccmp --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifThai-Regular.ttf --unicodes=25cc,0e4a,0e4d

montage thai-ccmp-before.png right-arrow.png thai-ccmp-after.png -geometry +0+0 -background transparent thai-ccmp.png

## 1.2 Decomposition

## 1.2 Am sign decomposition

hb-view --font-size=110 --margin=2,16,2,16 --output-file=lao-am-decomposition-before.png --features=-ccmp --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifLao-Regular.ttf --unicodes=25cc,0eb3

hb-view --font-size=110 --margin=2,16,2,16 --output-file=lao-am-decomposition-after.png --features=+ccmp --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifLao-Regular.ttf --unicodes=25cc,0ecd,25cc,0eb2

montage lao-am-decomposition-before.png right-arrow.png lao-am-decomposition-after.png -geometry +0+0 -background transparent lao-am-decomposition.png

## 4 `kern`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=lao-kern-before.png --features=-kern --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifLao-Regular.ttf --unicodes=0ec1,0e9a

 hb-view --font-size=110 --margin=2,16,2,16 --output-file=lao-kern-after.png --features=+kern --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifLao-Regular.ttf --unicodes=0ec1,0e9a

montage lao-kern-before.png right-arrow.png lao-kern-after.png -geometry +0+0 -background transparent lao-kern.png

## 4 `mark`

hb-view --font-size=110 --margin=16,16,2,16 --output-file=thai-mark-before.png --features=-mark --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifThai-Regular.ttf --unicodes=0e0e,25cc,0e38

hb-view --font-size=110 --margin=16,16,2,16 --output-file=thai-mark-after.png --features=+mark --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifThai-Regular.ttf --unicodes=0e0e,0e38

montage thai-mark-before.png right-arrow.png thai-mark-after.png -geometry +0+0 -background transparent thai-mark.png


## 4 `mkmk`

hb-view --font-size=110 --margin=16,16,2,16 --output-file=thai-mkmk-before.png --features=-mkmk --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifThai-Regular.ttf --unicodes=25cc,0e31,0e48

hb-view --font-size=110 --margin=16,16,2,16 --output-file=thai-mkmk-after.png --features=+mkmk --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifThai-Regular.ttf --unicodes=25cc,0e31,0e48

montage thai-mkmk-before.png right-arrow.png thai-mkmk-after.png -geometry +0+0 -background transparent thai-mkmk.png


## PUA 1 - Sara Am decomposition

hb-view --font-size=110 --margin=2,16,2,16 --output-file=thai-am-decomposition-before.png --features=-ccmp --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifThai-Regular.ttf --unicodes=25cc,0e33

hb-view --font-size=110 --margin=2,16,2,16 --output-file=thai-am-decomposition-after.png --features=+ccmp --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifThai-Regular.ttf --unicodes=25cc,0e4d,25cc,0e32

montage thai-am-decomposition-before.png right-arrow.png thai-am-decomposition-after.png -geometry +0+0 -background transparent thai-am-decomposition.png


================================================
FILE: images/thai-lao/thai-lao-svg-image-generation-log.md
================================================
# Commands used to generate the images in [opentype-shaping-thai-lao.md](../../opentype-shaping-thai-lao.md)

## Arrow general

hb-view --font-size=110 --output-file=right-arrow.svg --background=FFFFFF00 --margin=0,0,0,0 /usr/share/fonts/opentype/gentiumplus/GentiumPlus-R.ttf --unicodes=2192

## 1.1 `ccmp`

hb-view --font-size=110 --margin=16,16,2,16 --output-file=thai-ccmp-before.svg --features=-ccmp --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifThai-Regular.ttf --unicodes=25cc,0e4a,0e4d

hb-view --font-size=110 --margin=16,16,2,16 --output-file=thai-ccmp-after.svg --features=+ccmp --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifThai-Regular.ttf --unicodes=25cc,0e4a,0e4d

svg_stack --direction=h thai-ccmp-before.svg right-arrow.svg thai-ccmp-after.svg > thai-ccmp.svg

## 1.2 Decomposition

## 1.2 Am sign decomposition

hb-view --font-size=110 --margin=2,16,2,16 --output-file=lao-am-decomposition-before.svg --features=-ccmp --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifLao-Regular.ttf --unicodes=25cc,0eb3

hb-view --font-size=110 --margin=2,16,2,16 --output-file=lao-am-decomposition-after.svg --features=+ccmp --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifLao-Regular.ttf --unicodes=25cc,0ecd,25cc,0eb2

svg_stack --direction=h lao-am-decomposition-before.svg right-arrow.svg lao-am-decomposition-after.svg > lao-am-decomposition.svg

## 4 `kern`

hb-view --font-size=110 --margin=2,16,2,16 --output-file=lao-kern-before.svg --features=-kern --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifLao-Regular.ttf --unicodes=0ec1,0e9a

hb-view --font-size=110 --margin=2,16,2,16 --output-file=lao-kern-after.svg --features=+kern --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifLao-Regular.ttf --unicodes=0ec1,0e9a

svg_stack --direction=h lao-kern-before.svg right-arrow.svg lao-kern-after.svg > lao-kern.svg

## 4 `mark`

hb-view --font-size=110 --margin=16,16,2,16 --output-file=thai-mark-before.svg --features=-mark --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifThai-Regular.ttf --unicodes=0e0e,25cc,0e38

hb-view --font-size=110 --margin=16,16,2,16 --output-file=thai-mark-after.svg --features=+mark --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifThai-Regular.ttf --unicodes=0e0e,0e38

svg_stack --direction=h thai-mark-before.svg right-arrow.svg thai-mark-after.svg > thai-mark.svg


## 4 `mkmk`

hb-view --font-size=110 --margin=16,16,2,16 --output-file=thai-mkmk-before.svg --features=-mkmk --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifThai-Regular.ttf --unicodes=25cc,0e31,0e48

hb-view --font-size=110 --margin=16,16,2,16 --output-file=thai-mkmk-after.svg --features=+mkmk --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifThai-Regular.ttf --unicodes=25cc,0e31,0e48

svg_stack --direction=h thai-mkmk-before.svg right-arrow.svg thai-mkmk-after.svg > thai-mkmk.svg


## PUA 1 - Sara Am decomposition

hb-view --font-size=110 --margin=2,16,2,16 --output-file=thai-am-decomposition-before.svg --features=-ccmp --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifThai-Regular.ttf --unicodes=25cc,0e33

hb-view --font-size=110 --margin=2,16,2,16 --output-file=thai-am-decomposition-after.svg --features=+ccmp --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifThai-Regular.ttf --unicodes=25cc,0e4d,25cc,0e32

svg_stack --direction=h thai-am-decomposition-before.svg right-arrow.svg thai-am-decomposition-after.svg > thai-am-decomposition.svg


================================================
FILE: images/tibetan/tibetan-png-image-generation-log.md
================================================
# Commands used to generate the images in [opentype-shaping-tibetan.md](../../opentype-shaping-tibetan.md)

## Arrow general

hb-view --font-size=110 --output-file=right-arrow.png --background=FFFFFF00 --margin=0,0,0,0 /usr/share/fonts/opentype/gentiumplus/GentiumPlus-R.ttf --unicodes=2192

## Syllable identification

hb-view --font-size=110 --margin=2,16,2,16 --output-file=tibetan-syllable.png --features=  --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifTibetan-Regular.ttf --unicodes=0f51,0f54,0f7a,0f0b,0f56,0f66,0f90,0fb2,0f74,0f53


## 1.2 ccmp

hb-view --font-size=110 --margin=2,16,2,72 --output-file=tibetan-ccmp-before.png --features=-ccmp  --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifTibetan-Regular.ttf --unicodes=0f77

hb-view --font-size=110 --margin=2,16,2,16 --output-file=tibetan-ccmp-after.png --features=+ccmp  --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifTibetan-Regular.ttf --unicodes=0fb2,00a0,00a0,0f71,25cc,0f80

montage tibetan-ccmp-before.png right-arrow.png tibetan-ccmp-after.png -geometry +0+0 -background transparent tibetan-ccmp.png


## 2.1 abvs

hb-view --font-size=110 --margin=2,16,2,16 --output-file=tibetan-abvs-before.png --features=-abvs  --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifTibetan-Regular.ttf --unicodes=0f49,0f7b,0f7e

hb-view --font-size=110 --margin=2,16,2,16 --output-file=tibetan-abvs-after.png --features=+abvs  --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifTibetan-Regular.ttf --unicodes=0f49,0f7b,0f7e

montage tibetan-abvs-before.png right-arrow.png tibetan-abvs-after.png -geometry +0+0 -background transparent tibetan-abvs.png


## 2.2 blws

hb-view --font-size=110 --margin=2,16,2,16 --output-file=tibetan-blws-before.png --features=-blws  --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifTibetan-Regular.ttf --unicodes=0f4a,0f91

hb-view --font-size=110 --margin=2,16,2,16 --output-file=tibetan-blws-after.png --features=+blws  --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifTibetan-Regular.ttf --unicodes=0f4a,0f91

montage tibetan-blws-before.png right-arrow.png tibetan-blws-after.png -geometry +0+0 -background transparent tibetan-blws.png


## 2.3 calt

> Note: Noto Sans Tibetan calls this substitution twice, in calt and
> in abvs.

hb-view --font-size=110 --margin=2,16,2,16 --output-file=tibetan-calt-before.png --features=-calt,-abvs  --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifTibetan-Regular.ttf --unicodes=0f59,0f7d

hb-view --font-size=110 --margin=2,16,2,16 --output-file=tibetan-calt-after.png --features=+calt  --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifTibetan-Regular.ttf --unicodes=0f59,0f7d

montage tibetan-calt-before.png right-arrow.png tibetan-calt-after.png -geometry +0+0 -background transparent tibetan-calt.png


## 2.4 liga

> Note: Noto Sans Tibetan implements this as a ccmp substitution for
> unknown reasons.

hb-view --font-size=110 --margin=2,16,2,16 --output-file=tibetan-liga-before.png --features=-ccmp  --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifTibetan-Regular.ttf --unicodes=0f97,0f39,0fb7

hb-view --font-size=110 --margin=2,16,2,16 --output-file=tibetan-liga-after.png --features=+ccmp  --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifTibetan-Regular.ttf --unicodes=0f97,0f39,0fb7

montage tibetan-liga-before.png right-arrow.png tibetan-liga-after.png -geometry +0+0 -background transparent tibetan-liga.png


## 3 kern

hb-view --font-size=110 --margin=2,16,2,16 --output-file=tibetan-kern-before.png --features=-kern  --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifTibetan-Regular.ttf --unicodes=0f65,0f0b,0f62,0fa9

hb-view --font-size=110 --margin=2,16,2,16 --output-file=tibetan-kern-after.png --features=+kern  --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifTibetan-Regular.ttf --unicodes=0f65,0f0b,0f62,0fa9

montage tibetan-kern-before.png right-arrow.png tibetan-kern-after.png -geometry +0+0 -background transparent tibetan-kern.png


## 3 abvm

hb-view --font-size=110 --margin=2,16,2,16 --output-file=tibetan-abvm-before.png --features=-abvm  --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifTibetan-Regular.ttf --unicodes=0f61,0f80,0f7e

hb-view --font-size=110 --margin=2,16,2,16 --output-file=tibetan-abvm-after.png --features=+abvm  --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifTibetan-Regular.ttf --unicodes=0f61,0f80,0f7e

montage tibetan-abvm-before.png right-arrow.png tibetan-abvm-after.png -geometry +0+0 -background transparent tibetan-abvm.png


## 3 blwm

hb-view --font-size=110 --margin=2,16,2,16 --output-file=tibetan-blwm-before.png --features=-blwm  --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifTibetan-Regular.ttf --unicodes=0f59,0fb3,0f71,0f74

hb-view --font-size=110 --margin=2,16,2,16 --output-file=tibetan-blwm-after.png --features=+blwm  --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifTibetan-Regular.ttf --unicodes=0f59,0fb3,0f71,0f74

montage tibetan-blwm-before.png right-arrow.png tibetan-blwm-after.png -geometry +0+0 -background transparent tibetan-blwm.png


## 3 mkmk

> Note: Noto Sans Tibetan implements this is both blwm and mkmk for
> unknown reasons.

hb-view --font-size=110 --margin=2,16,2,16 --output-file=tibetan-mkmk-before.png --features=-mkmk,-blwm  --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifTibetan-Regular.ttf --unicodes=0f51,0f71,0f35

hb-view --font-size=110 --margin=2,16,2,16 --output-file=tibetan-mkmk-after.png --features=+mkmk  --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSerifTibetan-Regular.ttf --unicodes=0f51,0f71,0f35

montage tibetan-mkmk-before.png right-arrow.png tibetan-mkmk-after.png -geometry +0+0 -background transparent tibetan-mkmk.png


================================================
FILE: images/tibetan/tibetan-svg-image-generation-log.md
================================================
# Commands used to generate the images in [opentype-shaping-tibetan.md](../../opentype-shaping-tibetan.md)

## Arrow general

hb-view --font-size=110 --output-file=right-arrow.svg --background=FFFFFF00 --margin=0,0,0,0 /usr/share/fonts/opentype/gentiumplus/GentiumPlus-R.ttf --unicodes=2192

## Syllable identification

hb-view --font-size=110 --margin=2,16,2,16 --output-file=tibetan-syllable.svg --features=  --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSansTibetan-Regular.ttf --unicodes=0f51,0f54,0f7a,0f0b,0f56,0f66,0f90,0fb2,0f74,0f53


## 1.2 ccmp

hb-view --font-size=110 --margin=2,16,2,72 --output-file=tibetan-ccmp-before.svg --features=-ccmp  --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSansTibetan-Regular.ttf --unicodes=0f77

hb-view --font-size=110 --margin=2,16,2,16 --output-file=tibetan-ccmp-after.svg --features=+ccmp  --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSansTibetan-Regular.ttf --unicodes=0fb2,00a0,00a0,0f71,25cc,0f80

svg_stack --direction=h tibetan-ccmp-before.svg right-arrow.svg tibetan-ccmp-after.svg > tibetan-ccmp.svg


## 2.1 abvs

hb-view --font-size=110 --margin=2,16,2,16 --output-file=tibetan-abvs-before.svg --features=-abvs  --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSansTibetan-Regular.ttf --unicodes=0f49,0f7b,0f7e

hb-view --font-size=110 --margin=2,16,2,16 --output-file=tibetan-abvs-after.svg --features=+abvs  --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSansTibetan-Regular.ttf --unicodes=0f49,0f7b,0f7e

svg_stack --direction=h tibetan-abvs-before.svg right-arrow.svg tibetan-abvs-after.svg > tibetan-abvs.svg


## 2.2 blws

hb-view --font-size=110 --margin=2,16,2,16 --output-file=tibetan-blws-before.svg --features=-blws  --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSansTibetan-Regular.ttf --unicodes=0f4a,0f91

hb-view --font-size=110 --margin=2,16,2,16 --output-file=tibetan-blws-after.svg --features=+blws  --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSansTibetan-Regular.ttf --unicodes=0f4a,0f91

svg_stack --direction=h tibetan-blws-before.svg right-arrow.svg tibetan-blws-after.svg > tibetan-blws.svg


## 2.3 calt

> Note: Noto Sans Tibetan calls this substitution twice, in calt and
> in abvs.

hb-view --font-size=110 --margin=2,16,2,16 --output-file=tibetan-calt-before.svg --features=-calt,-abvs  --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSansTibetan-Regular.ttf --unicodes=0f59,0f7d

hb-view --font-size=110 --margin=2,16,2,16 --output-file=tibetan-calt-after.svg --features=+calt  --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSansTibetan-Regular.ttf --unicodes=0f59,0f7d

svg_stack --direction=h tibetan-calt-before.svg right-arrow.svg tibetan-calt-after.svg > tibetan-calt.svg


## 2.4 liga

> Note: Noto Sans Tibetan implements this as a ccmp substitution for
> unknown reasons.

hb-view --font-size=110 --margin=2,16,2,16 --output-file=tibetan-liga-before.svg --features=-ccmp  --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSansTibetan-Regular.ttf --unicodes=0f97,0f39,0fb7

hb-view --font-size=110 --margin=2,16,2,16 --output-file=tibetan-liga-after.svg --features=+ccmp  --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSansTibetan-Regular.ttf --unicodes=0f97,0f39,0fb7

svg_stack --direction=h tibetan-liga-before.svg right-arrow.svg tibetan-liga-after.svg > tibetan-liga.svg


## 3 kern

hb-view --font-size=110 --margin=2,16,2,16 --output-file=tibetan-kern-before.svg --features=-kern  --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSansTibetan-Regular.ttf --unicodes=0f65,0f0b,0f62,0fa9

hb-view --font-size=110 --margin=2,16,2,16 --output-file=tibetan-kern-after.svg --features=+kern  --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSansTibetan-Regular.ttf --unicodes=0f65,0f0b,0f62,0fa9

svg_stack --direction=h tibetan-kern-before.svg right-arrow.svg tibetan-kern-after.svg > tibetan-kern.svg


## 3 abvm

hb-view --font-size=110 --margin=2,16,2,16 --output-file=tibetan-abvm-before.svg --features=-abvm  --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSansTibetan-Regular.ttf --unicodes=0f61,0f80,0f7e

hb-view --font-size=110 --margin=2,16,2,16 --output-file=tibetan-abvm-after.svg --features=+abvm  --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSansTibetan-Regular.ttf --unicodes=0f61,0f80,0f7e

svg_stack --direction=h tibetan-abvm-before.svg right-arrow.svg tibetan-abvm-after.svg > tibetan-abvm.svg


## 3 blwm

hb-view --font-size=110 --margin=2,16,2,16 --output-file=tibetan-blwm-before.svg --features=-blwm  --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSansTibetan-Regular.ttf --unicodes=0f59,0fb3,0f71,0f74

hb-view --font-size=110 --margin=2,16,2,16 --output-file=tibetan-blwm-after.svg --features=+blwm  --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSansTibetan-Regular.ttf --unicodes=0f59,0fb3,0f71,0f74

svg_stack --direction=h tibetan-blwm-before.svg right-arrow.svg tibetan-blwm-after.svg > tibetan-blwm.svg


## 3 mkmk

> Note: Noto Sans Tibetan implements this is both blwm and mkmk for
> unknown reasons.

hb-view --font-size=110 --margin=2,16,2,16 --output-file=tibetan-mkmk-before.svg --features=-mkmk,-blwm  --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSansTibetan-Regular.ttf --unicodes=0f51,0f71,0f35

hb-view --font-size=110 --margin=2,16,2,16 --output-file=tibetan-mkmk-after.svg --features=+mkmk  --background=FFFFFF00 /usr/share/fonts/truetype/noto/NotoSansTibetan-Regular.ttf --unicodes=0f51,0f71,0f35

svg_stack --direction=h tibetan-mkmk-before.svg right-arrow.svg tibetan-mkmk-after.svg > tibetan-mkmk.svg


================================================
FILE: index.md
================================================
```{include} /_global.md
```

# OpenType shaping documents #

Sponsored by [YesLogic](https://yeslogic.com/) 

_<aside>Thanks also to the developers of HarfBuzz and AllSorts, plus many other font engineers and text-encoding experts for their generosity of time and insightful contributions.</aside>_

:::{admonition} &#127366; &#127344; &#127361; &#127357; &#127352; &#127357; &#127350;
:class: caution
These documents are an active WORK IN PROGRESS.

NONE of the documents you currently see here are complete
nor are they suitable for reference. PLEASE do not use
them as a guide or as a general information source.

As long as this warning text remains visible, the above 
holds true. 
:::


These documents are meant to provide a functional specification for
text shaping. The expectation is that an implementer of this
specification will be using fonts in the OpenType font format applied
to input text that complies with Unicode.

Because application software and end-user documents may utilize
non-OpenType fonts and non-Unicode text (in particular, when older
fonts or documents are encountered), these documents also provide
functional information that a shaping engine may use to implement a
reasonable best-effort attempt at producing useful output in the most
common of such scenarios.


## Shapers

The shaping behavior described here can be roughly divided into five
categories.

All non-complex scripts follow the same
[default](opentype-shaping-default.md) shaping model.


The _Indic Model_ is shared by ten individual scripts. These scripts
follow the same overall approach to shaping, described in the [Indic
general](opentype-shaping-indic-general.md) document, but each script
incorporates script-specific details, which are more fully described
in its own document:

  - [Devanagari](opentype-shaping-devanagari.md)
  - [Bengali](opentype-shaping-bengali.md)
  - [Gujarati](opentype-shaping-gujarati.md)
  - [Gurmukhi](opentype-shaping-gurmukhi.md)
  - [Kannada](opentype-shaping-kannada.md)
  - [Malayalam](opentype-shaping-malayalam.md)
  - [Oriya](opentype-shaping-oriya.md)
  - [Tamil](opentype-shaping-tamil.md)
  - [Telugu](opentype-shaping-telugu.md)
  - [Sinhala](opentype-shaping-sinhala.md)


The _Arabic Model_ is shared by four individual scripts. These scripts
follow the same overall approach to shaping, described in the [Arabic
general](opentype-shaping-arabic-general.md) document, but each script
incorporates script-specific details, which are more fully described
in its own document:

  - [Arabic](opentype-shaping-arabic.md)
  - [N'Ko](opentype-shaping-nko.md)
  - [Syriac](opentype-shaping-syriac.md)
  - [Mongolian](opentype-shaping-mongolian.md)


Five of the remaining scripts each use a distinct, script-specific
model, with two others (Thai and Lao) sharing enough details to be
handled by a common shaper:

  - [Hangul](opentype-shaping-hangul.md)
  - [Hebrew](opentype-shaping-hebrew.md)
  - [Khmer](opentype-shaping-khmer.md)
  - [Thai and Lao](opentype-shaping-thai-lao.md)
  - [Tibetan](opentype-shaping-tibetan.md)
  - [Myanmar](opentype-shaping-myanmar.md)
  

Finally, the Universal Shaping Engine (<abbr title="Universal Shaping
Engine">USE</abbr>) model is designed to shape all
complex scripts that are not handled by a dedicated
script-specific shaping model in the lists above:

  - [Universal Shaping Engine (<abbr>USE</abbr>)](opentype-shaping-use.md)


In addition, these documents describe the handling of emoji
sequences. Although emoji sequences do not constitute a separate
shaping model, handling emoji sequences can incorporate many of the
same shaping mechanisms and shaping engine implementations may be
expected to handle them:

  - [Emoji](opentype-shaping-emoji.md)
  

Shaping is just one part of the overall text-handling process. These
documents assume that other components in the software stack will be
responsible for details such as handling higher-level markup, layout,
font matching and loading, rasterization, and so on. Most importantly,
these documents assume that the input text has already been segmented
into text runs that consist of a single language, script, font, and
all other markup considerations (such as size or color, for example).

Within those assumptions, the shaping of a particular text run should
be consistent, regardless of whether the higher-level processes
involve a document, user-interface element, network stream, or any
other context for displaying text.


## Normalization

However, these documents also include a description of text
[normalization](opentype-shaping-normalization.md) in the OpenType
shaping context, which differs from Unicode normalization in several
respects. Shaping engine implementations may differ as to whether the
shaping engine itself is responsible for handling normalization or
whether normalization is handled by another component
in the stack. 


## Additional information

Various practical [notes](notes/index.md) about this document set and
the details of its scope, limitations, and quirks are also provided.

Some [errata](errata.md) about the "upstream" specifications and
reference documents are noted separately. 

In its final form, this repository will hold documentation describing
the shaping behavior used for layout of OpenType text. In particular,
it will focus on complex scripts.

In addition to the primary, per-script documents, implementers and
other interested readers are encouraged to check the
[character tables](character-tables/index.md) for correctness and to
examine the [image-generation logs](https://github.com/n8willis/opentype-shaping-documents/images/README.md) to identify
issues seen in the inline images.


## Feedback

Interested readers, font developers, and shaping-engine implementers
are encouraged to provide feedback, ask questions, and propose
improvements to any part of these documents. Shaping is the concern of
software developers and readers across the world, and all are welcome
to participate in recording and clarifying what is required to produce
the best and most accurate text output possible, both now and in the
future.

See the upstream git repository at
[github.com/n8willis/opentype-shaping-documents](https://github.com/n8willis/opentype-shaping-documents)
to raise issues, ask questions, or add comments.


## References

These documents cite the following informative references:

1. The Microsoft [Script development
   specifications](https://docs.microsoft.com/en-us/typography/script-development/standard),
   which document the behaviors expected for OpenType Layout fonts and
   provide guidance &amp; examples for type designers. OpenType is a
   registered trademark of Microsoft Corporation. 
2. Related portions of the Microsoft OpenType specification, such as the
   [OpenType Layout tag
   registry](https://docs.microsoft.com/en-us/typography/opentype/spec/ttoreg)
   and [OpenType Layout common table
   formats](https://docs.microsoft.com/en-us/typography/opentype/spec/chapter2),
   which list and define feature tags, script &amp; language tags, and
   other internals of compliant OpenType font binaries. OpenType is a
   registered trademark of Microsoft Corporation. 
3. The [HarfBuzz](https://github.com/harfbuzz/harfbuzz) project, which
   includes a free-software/open-source implementation of OpenType
   Layout shaping with full source code and documentation. 
4. The [AllSorts](https://github.com/yeslogic/allsorts) project, which
   includes a free-software/open-source implementation of OpenType
   Layout shaping with full source code and documentation.
5. The [Unicode
   Standard](http://www.unicode.org/standard/standard.html) and
   related Unicode Consortium projects such as the [Unicode Character
   Database](http://www.unicode.org/reports/tr44/), which defines
   Unicode code points and formal character properties used in
   shaping. Unicode and the Unicode Logo are registered trademarks of
   Unicode, Inc. in the United States and other countries.
6. The YesLogic [text corpus](https://github.com/yeslogic/corpus),
   which includes real-world text data for several Indic scripts,
   scraped from Wikipedia, Reddit, and multiple online news
   sources. This data is used to test shaping in AllSorts and Prince.
7. Known but unofficial information about other shaping-engine
   projects. Primarily this includes tests and reproducible issues
   found via [HarfBuzz](https://github.com/harfbuzz/harfbuzz), because
   HarfBuzz intentionally aims to produce results that will 100% match
   the output of Microsoft Uniscribe (not counting cases where
   Uniscribe's output is known to be incorrect, of course).
   > Note: occasionally, tests or issues documenting the behavior of
   > Apple CoreText are also included, but CoreText compatibility is
   > not an explicit goal for HarfBuzz.
   

---
Version {{ env.config.version }}, release {{ env.config.release }};
built {sub-ref}`today`.


================================================
FILE: make.bat
================================================
@ECHO OFF

pushd %~dp0

REM Command file for Sphinx documentation

if "%SPHINXBUILD%" == "" (
	set SPHINXBUILD=sphinx-build
)
set SOURCEDIR=.
set BUILDDIR=_build

%SPHINXBUILD% >NUL 2>NUL
if errorlevel 9009 (
	echo.
	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
	echo.installed, then set the SPHINXBUILD environment variable to point
	echo.to the full path of the 'sphinx-build' executable. Alternatively you
	echo.may add the Sphinx directory to PATH.
	echo.
	echo.If you don't have Sphinx installed, grab it from
	echo.https://www.sphinx-doc.org/
	exit /b 1
)

if "%1" == "" goto help

%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
goto end

:help
%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%

:end
popd


================================================
FILE: notes/README.md
================================================
# Notes #

The files in this directory include auxiliary information that is either
tangential to the main shaping-behavior documentation or is
excessively long enough that trying to include it inline would disrupt
the flow of the text for readers.

Notes included cover:

  - [Uniscribe compatibility](/notes/uniscribe-bug-compatibility.md):
    Information on preserving strict compatibility with Microsoft's
    Uniscribe shaping engine
  - [Ragel state-machine operators](/notes/ragel-machine-notation.md):
    Information on the syntax of the
    [Ragel](http://www.colm.net/open-source/ragel/) state-machine
    compiler, which is the reference regular-expression syntax used
    when listing regular expressions in the shaping-behavior
    documentation, but which itself is not mandatory.
  - [Emoji implementation](/notes/emoji-implementation.md): Information
    on the image formats, codepoint visibility, and <abbr title="Glyph Substitution table">GSUB</abbr>/<abbr title="Glyph Positioning table">GPOS</abbr> features
    used in real-world Emoji fonts distributed by major vendors.


================================================
FILE: notes/emoji-implementation.md
================================================
# Notes on Emoji font implementation #

This document notes details on how common Emoji fonts implement
sequence, modifier, varation-selection, text-presentation 
fallback, and other behavior, for the purposes of testing and
debugging.

Emoji fonts are deployed by vendors using a variety of different
image formats (including the `SVG `, `COLR`v0/`CPAL`,
`COLR`v1/`CPAL`, `glyf`, and `cff ` vector formats and the `CBDT`
and `sbix` raster formats), which can make it difficult to
characterize Emoji font behavior.

Similarly, Emoji font vendors have employed a variety of
different OpenType features to implement support for standard
sequences, modifier-based sequences, <abbr title="Zero-Width Joiner">ZWJ</abbr>-based sequences and
permutations.

See the [Emoji shaping document](../opentype-shaping-emoji.md)
for more details on the sequences and definitions involved.

## Format, features, and control-codepoint visiblity table ##

This table lists the image format, the <abbr title="Glyph Substitution table">GSUB</abbr> feature(s) used for
basic Emoji sequence support and <abbr title="Zero-Width Joiner">ZWJ</abbr>-based sequence support, and
whether or not the font includes a visible glyph for the
presentation selector codepoints (VS15, `U+FE0E`; VS16, `U+FE0F`)
and modifier codepoints (`U+1F3FB`..`U+1F3FF`).


:::{table} Emoji sequence implementation details

| Font                   | publisher | image format | sequence formation feature | ZWJ sequence feature | visible presentation selector | visible modifier |
|:-----------------------|:----------|:-------------|:---------------------------|:---------------------|:------------------------------|:-----------------|
| Source Emoji           | Adobe     | cff          | ccmp                       | ccmp, salt           | YES                           | YES              |
| Blobmoji               | C1710     | CBDT         | ccmp                       | ccmp                 | no                            | YES              |
| Twemoji                | Twitter   | SVG          | liga                       | liga                 | no                            | YES              |
| Noto Color Emoji       | Google    | CBDT         | ccmp                       | ccmp                 | no                            | YES              |
| Noto Color Emoji       | Google    | COLRv1       | ccmp                       | ccmp                 | no                            | YES              |
| EmojiTwo Android       | EmojiTwo  | CBDT         | ccmp                       | ccmp                 | no                            | YES              |
| EmojiTwo Apple         | EmojiTwo  | sbix         | morx                       | morx                 | no                            | YES              |
| EmojiTwo SVG          | EmojiTwo  | SVG          | ccmp                       | ccmp                 | no                            | YES              |
| Openmoji               | HfG Gmünd | SVG          | liga                       | liga                 | no                            | YES              |
| FirefoxEmoji           | Mozilla   | COLRv0       | rlig                       | rlig                 | no                            | no               |
| Noto Emoji             | Google    | glyf         | ccmp                       | ccmp                 | no                            | YES              |
| Old Noto B&amp;W Emoji | Google    | glyf         | ccmp                       | ccmp                 | no                            | no               |
| JoyPixels              | JoyPixels | CBDT         | ccmp                       | ccmp                 | no                            | YES              |
| Apple Color Emoji      | Apple    | sbix         | morx                       | morx                 | no                            | YES              |
| Samsung Color Emoji    | Samsung  | CBDT         | ccmp                       | ccmp                 | no                            | YES              |
| Segoe UI Emoji         | Microsoft| COLRv0       | ccmp                       | ccmp                 | YES                           | YES              |
:::


### Contributing additional data ###

Volunteers or implementers who wish to contribute data for additional
Emoji fonts may need to collecting the information themselves by
inspecting font binaries.

Options available include:

1. **FontTools / TTX**
   - Users can run `ttx -l somefontfilename.ttf` (or `.otf` or `.ttc`
     or `.otc`) to get a short list of the tables. The presence of `SVG `,
     `CBDT`, `sbix`, or `COLR` indicates that whichever one of those exists
     is the image format. _If_ none of the above are there but `glyf` or `CFF `
     or `CFF2` _is_ there, then whichever of those three exists is the
     image format (and means it's a black-and-white emoji font, which users
     would probably know beforehand anyway). If there's more than one of
     `SVG `, `CBDT`, `sbix`, or `COLR` present in the same font file, that
     would likely mean unknown behavior; comments on such cases are welcome.
   - Users can run the `layout-features.py somefontfilename.ttf`
     script (which can be found in the `/Snippets/` directory of the
     `FontTools` package source) and it will print out an indented
     list of the <abbr title="Glyph Substitution table">GSUB</abbr> and <abbr title="Glyph Positioning table">GPOS</abbr> features
     used. All that matters for the table above is what the script
     reports on the `Feature: ` line. For a typical emoji font there's
     probably only one feature -- but, if there are several, listing
     them is useful.

2. **AllSorts / allsorts-tools**
   - Users can use the `dump` tool from the `allsorts-tools` package
     to run `allsorts dump somefilename.ttf` and get a list of tables plus
     other metadata; the tables are the first output. Same interpretation
     as above.
   - At the moment it sounds like there isn't a single-command option in
     `allsorts` to list <abbr title="Glyph Substitution table">GSUB</abbr>/<abbr title="Glyph Positioning table">GPOS</abbr> features. Corrections are welcome.

3. **GUI font editors**
   - Users can also open up the font file in a font editor and look at what
     it presents. 
   - FontForge:
     - In FontForge, go to Element -> Font Info in the menu to open the
       font-info dialog box. It will show the <abbr title="Glyph Substitution table">GSUB</abbr>/<abbr title="Glyph Positioning table">GPOS</abbr> lookups in the
       "Lookups" tab (left-hand side).
     - FontForge does _not_ just show a convenient list of all the tables.
       However, when users open the font file, the "Warnings" dialog box will
       report if it finds `SVG `, `CBDT`, `sbix`, or `COLR` tables.
       Unfortunately, it will only actually open the font for
       editing/inspection if it finds a `glyf`, `CFF `, or `CFF2` table
       (which a `COLR` font would have) or an `SVG ` table. So users can't
       use it to inspect the features of the other formats.

(Further instructions will be added to this list for other editors if volunteers
can contribute them)

For determining if there's a printable glyph for the selectors/modifiers:
1. **GUI font editors**
   - Users can open up the font in an editor and look at the slots for the
     Unicode codepoints for the presentation selectors (`U+FE0E` and `U+FE0F`)
     and the modifiers (`U+1F3FB` through `U+1F3FF`), if they exist (they might not).
2. **HarfBuzz**
   - Users can run the `hb-view` utility to output glyph contents for specific
     Unicode codepoints, but one might have to try a couple of options, depending
     on the image format. Run `hb-view --preserve-default-ignorables somefontfilename.ttf --unicodes=fe0e`
     to start (for `U+FE0E`). Users may also try adding the `--font-funcs=ot`
     and/or `--shapers=ot` flags to that command if it gives trouble. 


================================================
FILE: notes/index.md
================================================
# Notes #

This section includes auxiliary information that is either
tangential to the main shaping-behavior documentation or is
excessively long enough that trying to include it inline would disrupt
the flow of the text for readers.

Notes included cover:

  - [Uniscribe compatibility](/notes/uniscribe-bug-compatibility.md):
    Information on preserving strict compatibility with Microsoft's
    Uniscribe shaping engine
  - [Ragel state-machine operators](/notes/ragel-machine-notation.md):
    Information on the syntax of the
    [Ragel](http://www.colm.net/open-source/ragel/) state-machine
    compiler, which is the reference regular-expression syntax used
    when listing regular expressions in the shaping-behavior
    documentation, but which itself is not mandatory.
  - [Emoji implementation](/notes/emoji-implementation.md): Information
    on the image formats, codepoint visibility, and <abbr title="Glyph Substitution table">GSUB</abbr>/<abbr title="Glyph Positioning table">GPOS</abbr> features
    used in real-world Emoji fonts distributed by major vendors.


================================================
FILE: notes/ragel-machine-notation.md
================================================
# Ragel State Machine operators #

As used in the regular expressions cited in various shaper-engine
guides.

```
a* = zero or more copies of a
b+ = one or more copies of b
c? = optional instance of c
d{n} = exactly n copies of d
d{,n} = zero to n copies of d
d{n,} = n or more copies of d
d{n,m} = n to m copies of d
!e = not e
^f = character-level not f
g.h = concatenation of g and h
i|j = i or j
( ) = grouping of expression elements
```


================================================
FILE: notes/uniscribe-bug-compatibility.md
================================================
# Notes for preserving Uniscribe compatibility #

This document details behavior that shaping engines may wish to
implement in order to maintain strict shaping compatibility with
Microsoft's Uniscribe OpenType shaper, including behavior that may be
regarded as bugs by end users.

**Contents**

  - [Indic standalone-syllable dotted circles](#indic-standalone-syllable-dotted-circles)
  - [Indic syllable cluster merging](#indic-syllable-cluster-merging)
  - [Indic fallback Reph reordering](#indic-fallback-reph-reordering)
  - [Kannada legacy treatment of "Ra,Halant,ZWJ"](#kannada-legacy-treatment-of-ra-halant-zwj)
  - [Khmer kerning](#khmer-kerning)
  - [Sinhala matra decomposition](#sinhala-matra-decomposition)
  - [Miscellaneous](#miscellaneous)
      - [Bengali init feature matching](#bengali-init-feature-matching)
      - [Old-model post-base Halant reordering](#old-model-post-base-halant-reordering)
          - [Kannada final double Halants](#kannada-final-double-halants)
      - [Halants and left matras](#halants-and-left-matras)
	  - [Explicit half-forms followed by matras](#explicit-half-forms-followed-by-matras)


Compatibility notes in the "miscellaneous" category deal with
behaviors that are incompletely documented, deal solely with
deprecated script tags, or do not violate known conventions. Thus, the
scenarios with which they deal may not be regarded as bugs.


## Indic standalone-syllable dotted circles ##

In Indic syllables that include a `PLACEHOLDER` or `DOTTED_CIRCLE`
codepoint, if a dotted-circle glyph is the last consonant of the
syllable, Uniscribe ignores the glyph when processing the syllable.

For example, the dotted-circle glyph is not counted as a consonant
when locating the syllable's base consonant. Therefore, the sequence
<samp>"Ra,Halant,Dotted_Circle"</samp> does not trigger Reph formation (which would
result in the sequence <samp>"Reph,Dotted_Circle"</samp>).


## Indic syllable cluster merging ##

Other shaping engines, such as HarfBuzz, track the indivisible
components of a syllable in "clusters". Each individual letter usually
corresponds to a cluster; when two letters ligate or form a conjunct,
their clusters are merged. When a codepoint is decomposed, its
components remain part of the same, original cluster as the
precomposed version. Uniscribe appears to follow this pattern as well.

When shaping Indic text in most scripts, after shaping the entire
syllable, Uniscribe merges all of the clusters of the syllable into a
single, indivisible cluster. 

The exceptions to this behavior occur when Uniscribe is shaping Tamil
and Sinhala. In those cases, the full-syllable cluster merge is not
performed.

> Note: This full-syllable clustering makes it hard for application
> software to position the cursor within the word. It may also have
> other implications for software above the shaping engine in the
> stack.


## Indic fallback Reph reordering ##

When shaping Indic syllables, any one of several Reph-positioning
strategies may be required by the active script. In the event that no
correct position can be determined by the shaping engine for a
syllable, Uniscribe's ultimate fallback behavior is to reorder the
Reph to the end of the syllable.

If the Reph is reordered to the end of the syllable and this final
position happens to occur immediately after a <samp>"Matra,Halant"</samp> sequence,
Uniscribe leaves the Reph in this position.

Other shaping engines, in this situation, will reorder the Reph to a
position immediately before the <samp>"Matra,Halant"</samp> sequence. This allows
for any <abbr title="Glyph Substitution table">GSUB</abbr> substitutions that match <samp>"Reph,Matra"</samp> sequences to be
activated, if any such substitution rules are present in the active
font. 

## Kannada legacy treatment of "Ra,Halant,ZWJ" ##

In the `<knda>` shaping model (which was deprecated in 2005 in favor
of `<knd2>`), the sequence <samp>"Ra,Halant,ZWJ"</samp> was treated as equivalent
to the sequence <samp>"Ra,ZWJ,Halant"</samp>.

## Khmer kerning ##

Uniscribe does not apply the `kern` feature to Khmer text, even if the
active font includes kerning tables for Khmer codepoints.


## Sinhala matra decomposition ##

Sinhala text in OpenType presents two possible methods for
decomposing multi-part matras. 

One is the canonical Unicode decompositions for the matra codepoints,
as is used in most other Indic scripts. This decomposition is usually
performed early in the shaping process.

The second is the `pstf` feature of <abbr title="Glyph Substitution table">GSUB</abbr>, which is defined differently
for Sinhala. In Sinhala, the `pstf` feature replaces multi-part
dependent vowels (matras) with the right-side matra component of the
canonical decomposition. This substitution generally occurs late in
the shaping process.

Uniscribe supports the `pstf` behavior by handling the decomposition
of multi-part dependent vowels differently for Sinhala -- in a sense,
decomposing each matra into its left-side component followed by a
duplicate of the original matra, then substituting the duplicated
matra with the right-side matra component when the `pstf` feature is
applied.

Shaping engines may opt to decompose multi-part dependent
vowels into their canonical Unicode decompositions, as is done in
other scripts, and substitute the decomposed right-side matra
components at that point.
 
Doing so will negate the need to apply the `pstf` substitution.
However, fonts that were engineered to support the
Uniscribe-supported behavior might not include <abbr title="Glyph Positioning table">GPOS</abbr> positioning
rules for the right-side matra components, relying instead on the
`pstf` substitution to provide a suitable replacement.


## Miscellaneous ##


### Bengali `init` feature matching ###

The `init` feature in Bengali is defined in the OpenType specification
as applying to word-initial left-side dependent vowels (matras).
However, Uniscribe specifically applies the feature whenever
the matra is preceded by any character that falls within the following
range in the Unicode `General Category` property:

- `GENERAL_CATEGORY_FORMAT` [Cf]
- `GENERAL_CATEGORY_UNASSIGNED` [Cn]
- `GENERAL_CATEGORY_PRIVATE_USE` [Co]
- `GENERAL_CATEGORY_SURROGATE` [Cs]
- `GENERAL_CATEGORY_LOWERCASE_LETTER` [Ll]
- `GENERAL_CATEGORY_MODIFIER_LETTER` [Lm]
- `GENERAL_CATEGORY_OTHER_LETTER` [Lo]
- `GENERAL_CATEGORY_TITLECASE_LETTER` [Lt]
- `GENERAL_CATEGORY_UPPERCASE_LETTER` [Lu]
- `GENERAL_CATEGORY_SPACING_MARK` [Mc]
- `GENERAL_CATEGORY_ENCLOSING_MARK` [Me]
- `GENERAL_CATEGORY_NON_SPACING_MARK` [Mn]


### Old-model post-base Halant reordering ###

In old-model (Indic1) script tags, Uniscribe treats some
scripts differently when reordering the first post-base <samp>"Halant"</samp>. This
Halant-reordering is done in Indic1 scripts in order to prepare the
syllable for Indic1's different post-base <abbr title="Glyph Substitution table">GSUB</abbr> substitution rules.

For example, the old-model Indic syllable

	Pre-baseC Halant BaseC Halant Post-baseC

would be reordered to

	Pre-baseC Halant BaseC Post-baseC Halant

before features are applied.

In Malayalam, Uniscribe always reorders the first post-base <samp>"Halant"</samp> in
a syllable to the position immediately after the syllable's last consonant.

#### Kannada final double Halants ####

In old-model Kannada (`<knda>`) runs, Uniscribe is known to reorder
the first post-base <samp>"Halant"</samp> only when there is not already a <samp>"Halant"</samp>
after the last consonant.

For example, the old-model Indic syllable

	Pre-baseC Halant BaseC Halant Post-baseC Halant

would _not_ be reordered. 

This behavior is an exception to the general Indic1 post-base <samp>"Halant"</samp>
reordering operation. It is believed to be script-specific and has
only been observed for Kannada text runs. However, there may still be
undiscovered sequences in other Indic1-script text which trigger the
same behavior; implementers targeting full compatibility should
exercise caution.

If the standard post-base <samp>"Halant"</samp> reordering were performed, then the
likely result of the <abbr title="Glyph Substitution table">GSUB</abbr> feature-application phase would be a
sequence of the form <samp>"BaseC,belowbaseC,Halant"</samp> which, in turn, might
trigger mark-attachment issues for correctly positioning the final
<samp>"Halant"</samp>.

This Uniscribe behavior is not documented, however; therefore the only
recommended workaround for maintaining compatibility is to define a
special-case exception for avoiding the creation of final double
<samp>"Halant"</samp>s in `<knda>` text.


### Halants and left matras ###

When reordering left-side matras, when a <samp>"Halant"</samp> occurs immediately
after a left-side matra, Uniscribe does not move the <samp>"Halant"</samp> with the matra.

Generally, marks (including <samp>"Halant"</samp>) are tagged for reordering with
the same positioning tag as the closest non-mark character that the
mark has affinity with. 

In post-base position, where a yet-to-be-reordered left-side matra
would be found, the closest non-mark character with affinity for the
mark might be a post-base consonant. Uniscribe appears to make a check
ensuring that the <samp>"Halant"</samp> after a left-side matra is not tagged for
reordering with the matra.

This check is required for shaping Sinhala, because the `U+0DDA`
multi-part matra decomposes into the sequence <samp>"`U+0DD9`,Halant"</samp>. The
decomposed <samp>"Halant"</samp> should remain where it is, serving as the right-side
matra component.


### Explicit half-forms followed by matras ###

As a general rule, Uniscribe and other shapers insert a dotted-circle
character before a non-spacing mark character (such as a matra in
Indic2-model scripts) when that non-spacing mark character is not
matched with a base character in a permitted syllable. In such
circumstances, the dotted-circle visually serves to communicate to
readers that a base character has not been found, and also
functionally serves as a surrogate base on which the mark character
can be positioned.

However, Uniscribe is known not to insert a dotted-circle before a
matra character when it is preceded by two sequential
explicit-half-form sequences (meaning two consecutive occurrences of
<samp>"_Consonant_,Halant,ZWJ"</samp>) in Indic2 runs.

Therefore, the sequence:

    `_Consonant_,Halant,ZWJ,_matra_`

would be transformed to:

    `_Consonant_,Halant,ZWJ,Dotted-Circle,_matra_`

but the sequence:

    `_Consonant_,Halant,ZWJ,_Consonant_,Halant,ZWJ,_matra_`

would _not_ be transformed with a dotted-circle insertion.

This exception is regarded as a likely bug.


================================================
FILE: opentype-shaping-arabic-general.md
================================================
# Arabic-style shaping in OpenType #

This document details the general shaping procedure shared by Arabic, N'Ko,
Syriac, and Mongolian. 


**Contents**

  - [General information](#general-information)
  - [Terminology](#terminology)
  - [Glyph classification](#glyph-classification)
      - [Joining properties](#joining-properties)
	  - [Mark classification](#mark-classification)
	  - [Character tables](#character-tables)
  - [The general Arabic-based shaping model](#the-general-arabic-based-shaping-model)
      - [Stage 1: Transient reordering of modifier combining marks](#stage-1-transient-reordering-of-modifier-combining-marks)
      - [Stage 2: Compound character composition and decomposition](#stage-2-compound-character-composition-and-decomposition)
      - [Stage 3: Computing letter joining states](#stage-3-computing-letter-joining-states)
      - [Stage 4: Applying the `stch` feature](#stage-4-applying-the-stch-feature)
      - [Stage 5: Applying the language-form substitution features from <abbr>GSUB</abbr>](#stage-5-applying-the-language-form-substitution-features-from-gsub)
      - [Stage 6: Applying the typographic-form substitution features from <abbr>GSUB</abbr>](#stage-6-applying-the-typographic-form-substitution-features-from-gsub)
      - [Stage 7: Applying the positioning features from <abbr>GPOS</abbr>](#stage-7-applying-the-positioning-features-from-gpos)
  

## General information ##

Several scripts can be supported by the general OpenType shaping model
used for Arabic. These writing systems observe similar rules and
conventions, even if they are not historically related to
Arabic. Therefore, OpenType defines many of the same <abbr title="Glyph Substitution table">GSUB</abbr> and <abbr title="Glyph Positioning table">GPOS</abbr>
features as supported for the corresponding script tags. These scripts include:

  - [Arabic](opentype-shaping-arabic.md)
  - [N'Ko](opentype-shaping-nko.md)
  - [Syriac](opentype-shaping-syriac.md)
  - [Mongolian](opentype-shaping-mongolian.md)

The information found below is intended to serve as a general guide;
script-specific information can be found in the linked document for
each script.

Each of these writing systems uses a joining script that uses
inter-word spaces. Therefore, each codepoint in a text run may be
substituted with one of several contextual forms corresponding to
what, if any, characters appear before and after the codepoint. Most,
but not all, letter sequences join; shaping engines must track which
positions trigger joining behavior for each letter. 

Arabic, N'Ko, and Syriac are written (and, therefore, rendered) from right to
left. Mongolian is written vertically, from top to bottom. Shaping
engines must track the directionality of the text run when scripts of
different direction are mixed.

## Terminology ##

OpenType shaping uses a standard set of terms for elements of the
supported scripts. The terms used colloquially in any particular language
may vary, however, potentially causing confusion.

**Base** glyph or character is the standard term for a
character that is capable of taking a diacritical mark. 

**Kashida** (or **tatweel**) is the term for a glyph inserted into a
sequence for the purpose of elongating the baseline stroke of a
letter. Unicode documents use the term "tatweel" most frequently,
while OpenType documents use the term "kashida" most
frequently. Kashidas are typically inserted in order to justify lines
of text. 


## Glyph classification ##

In joining (or cursive) scripts, proper shaping of
text runs involves identifying the joining behavior of each character,
then combining that information with any preceding or subsequent
characters to determine the contextually correct form for display.

### Joining properties ###

Characters are assigned a `JOINING_TYPE` property in the
Unicode standard that indicates how they join to adjacent
characters. There are six possible values: 

  - `JOINING_TYPE_LEFT` indicates that a character joins with
    the subsequent character, but does not join with the preceding
    character. 
	
  - `JOINING_TYPE_RIGHT` indicates that a character joins with the
    preceding character, but does not join with the subsequent character.	

  - `JOINING_TYPE_DUAL` indicates that a character joins with the
    preceding character and joins with the subsequent character.
	
  - `JOINING_TYPE_NON_JOINING` indicates that a character does not
    join with the preceding or with the subsequent character.
	
  - `JOINING_TYPE_TRANSPARENT` indicates that the character does not
    join with adjacent characters _and_ that the character must be
    skipped over when the shaping engine is evaluating the joining
    positions in a sequence of characters. When a
    `JOINING_TYPE_TRANSPARENT` character is encountered in a sequence,
    the `JOINING_TYPE` of the preceding character passes
    through. Diacritical marks are frequently assigned this value. 
	
  - `JOINING_TYPE_JOIN_CAUSING` indicates that the character forces
    the use of joining forms with the preceding and subsequent
    characters. Kashidas and the Zero Width Joiner (`U+200D`) are both
    `JOIN_CAUSING` characters.
  

In some scripts (such as Arabic and Syriac), letters are also assigned
to a `JOINING_GROUP` that indicates which fundamental character they
behave like with regard to joining behavior. Each of the basic letters
in the script typically belongs to its own `JOINING_GROUP`, while
supplemental and accented letters are usually assigned to the
`JOINING_GROUP` that corresponds to the underlying base letter, with
no diacritics or other marks. 

For example, the Persian letter "Peh" (`U+067E`) is visually
represented as the Arabic letter "Beh" (`U+0628`), but with two additional
below-base "ijam" marks. Consequently, "Peh" is assigned to the `BEH`
`JOINING_GROUP`.

Mongolian and N'Ko, notably, do not make use of joining groups. Every
letter in these scripts belongs to the _null_ or `NO_JOINING_GROUP`
group.


### Mark classification ###

The Unicode standard defines a _canonical combining class_ for each
codepoint that is used whenever a sequence needs to be sorted into
canonical order. 

The marks in most scripts belong to the standard combining
classes. For example:

:::{table} Example mark-classification table

| Codepoint | Combining class | Glyph                              |
|:----------|:----------------|:-----------------------------------|
|`U+064B`   | 27              | &#x064B; Fathatan / Open fathatan  |
|`U+064C`   | 28              | &#x064C; Dammatan / Open dammatan  |
|`U+064D`   | 29              | &#x064D; Kasratan / Open Kasratan  |
|`U+064E`   | 30              | &#x064E; Fatha / Small fatha       |
|`U+064F`   | 31              | &#x064F; Damma / Small damma       |
|`U+0650`   | 32              | &#x0650; Kasra / Small kasra       |
|`U+0651`   | 33              | &#x0651; Shadda                    |
|`U+0652`   | 34              | &#x0652; Sukun                     |
|`U+0670`   | 35              | &#x0670; Superscript Alef          |
|           | 220             | Other below-base combining marks   |
|           | 230             | Other above-base combining marks   |
:::


The numeric values of these combining classes are used during Unicode
normalization. Sequences of marks are sorted by combining class,
reordering the sequence into increasing numerical order.

In addition, some Arabic and Syriac marks require special handling
when shaping Arabic text, during the mark-reordering stage. These
marks fall into two classes of _Modifier Combining Marks_ (<abbr>MCM</abbr>) that
may need to be repositioned closer to the base character, when they
occur in sequences of multiple marks. 

The sets are:
  - Below-base (class 220) <abbr title="Modifier Combining Mark">MCM</abbr>s
  - Above-base (class 230) <abbr title="Modifier Combining Mark">MCM</abbr>s
  
These classifications are used in the [mark-transient-reordering
stage](#stage-1-transient-reordering-of-modifier-combining-marks).

Lists of the marks that belong to each <abbr title="Modifier Combining Mark">MCM</abbr> classes are included in the
script-specific shaping documents for Arabic and Syriac.
			
			
### Character tables ###

Character tables for all of the scripts, plus important miscellaneous
characters, are available here: 

  - [Arabic](character-tables/character-tables-arabic.md#arabic-character-table)
  - [Syriac](character-tables/character-tables-syriac.md#syriac-character-table)
  - [N'Ko](character-tables/character-tables-nko.md#nko-character-table)
  - [Mongolian](character-tables/character-tables-mongolian.md#mongolian-character-table)


## The general Arabic-based shaping model ##

Processing a run of text tagged with any of the scripts supported by
the general Arabic shaping model involves seven top-level stages:

1. Transient reordering of modifier combining marks
2. Compound character composition and decomposition
3. Computing letter joining states
4. Applying the `stch` feature
5. Applying the language-form substitution features from <abbr>GSUB</abbr>
6. Applying the typographic-form substitution features from <abbr>GSUB</abbr>
7. Applying the positioning features from <abbr>GPOS</abbr>


### Stage 1: Transient reordering of modifier combining marks ###

<!--- http://www.unicode.org/reports/tr53/tr53-1.pdf --->
> Note: the transient reordering of modifier combining marks is
> necessary only for scripts that can feature the <samp>"Shadda"</samp> mark or
> marks that belong to _Modifier Combining Marks_ (<abbr>MCM</abbr>) classes.

Sequences of adjacent marks must be reordered so that they appear in
the appropriate visual order before the mark-to-base and mark-to-mark
positioning features from <abbr title="Glyph Positioning table">GPOS</abbr> can be correctly applied.

In particular, those marks that have strong affinity to the base
character must be placed closest to the base.

This mark-reordering operation is distinct from the standard,
cross-script mark-reordering performed during Unicode
normalization. The standard Unicode mark-reordering algorithm is based
on comparing the _Canonical_Combining_Class_ (<abbr>Ccc</abbr>) properties of mark
codepoints, whereas this script-specific reordering utilizes the
_Modifier_Combining_Mark_ (<abbr>MCM</abbr>) subclasses specified in the
character tables.

The algorithm for reordering a sequence of marks is:

  - First, move any <samp>"Shadda"</samp> (combining class `33`) characters to the
    beginning of the mark sequence.
	
  -	Second, move any subsequence of combining-class-`230` characters that begins
       with a `230_MCM` character to the beginning of the sequence,
       before all <samp>"Shadda"</samp> characters. The subsequence must be moved
       as a group.

  - Finally, move any subsequence of combining-class-`220` characters that begins
       with a `220_MCM` character to the beginning of the sequence,
       before all <samp>"Shadda"</samp> characters and before all class-`230`
       characters. The subsequence must be moved as a group.

> Note: Unicode describes this mark-reordering operation, the Arabic
> Mark Transient Reordering Algorithm (<abbr>AMTRA</abbr>), in Technical Report 53,
> which describes it in terms that are distinct from standard,
> <abbr>Ccc</abbr>-based mark reordering.
>
> Specifically, <abbr title="Arabic Mark Transient Reordering Algorithm">AMTRA</abbr> is designated as an operation performed during
> text rendering only, which therefore does not impact other
> Unicode-compliance issues such as allowable input sequences or text
> encoding.
>
> However, shaping engines may choose to perform the reordering of
> modifier combining marks in conjunction with their Unicode
> normalization functionality for increased efficiency.

### Stage 2: Compound character composition and decomposition ###

The `ccmp` feature allows a font to substitute

 - mark-and-base sequences with a pre-composed glyph including both
    the mark and the base (as is done in with a ligature substitution)
	
  - individual compound glyphs with the equivalent sequence of
    decomposed glyphs (such as decomposing a letter with ijam into a
    separate fundamental-letter glyph followed by an ijam-only glyph,
    to permit more precise positioning)
 
If present, these composition and decomposition substitutions must be
performed before applying any other <abbr title="Glyph Substitution table">GSUB</abbr> or <abbr title="Glyph Positioning table">GPOS</abbr> lookups, because
those lookups may be written to match only the `ccmp`-substituted
glyphs. 


### Stage 3: Computing letter joining states ###

In order to correctly apply the initial, medial, and final form
substitutions from <abbr title="Glyph Substitution table">GSUB</abbr> during stage 6, the shaping engine must
tag every letter for possible application of the appropriate feature.

> Note: not all of the rules detailed below apply to every script that
> is supported by the general Arabic shaping model.

To determine which feature is appropriate, the shaping engine must
examine each word in turn and compute each letter's joining state from
the letter's `JOINING_TYPE` and the `JOINING_TYPE` of the
preceding character (if any).

> Note: Although the supported scripts use inter-word spaces, the
> `init` feature does _not_ refer to word-initial letters only and the
> `fina` feature does _not_ refer to word-final letters only.
>
> Rather, both of these terms are defined with respect to whether or
> not the preceding and subsequent letters form joins with the current
> letter. The letters at word boundaries will, naturally, take on
> initial and final forms, but initial and final forms of letters also
> occur regularly within words, when the letter in question is
> adjacent to a letter that does not form joins.

This computation starts from the first letter of the word, temporarily
tagging the letter for `isol` substitution. If the first
letter is the only letter in the word, the `isol` tag will remain unchanged.

From here, the algorithm consumes each character in the string, one at
a time, keeping track of the JOINING_TYPE of the previous character. 

If the current character is JOINING_TYPE_TRANSPARENT, move on to the next
character but preserve the currently-tracked JOINING_TYPE at its previous state.

If the preceding character's JOINING_TYPE is LEFT, DUAL, or
JOIN_CAUSING:
  - In `<syrc>` text, if the current character is <samp>"Alaph"</samp>, tag the
    current character for `med2`, then update the tag for the
    preceding character:
	  - `isol` becomes `init`
	  - `fina` becomes `medi`
	  - `init` remains `init`
	  - `medi` remains `medi`
  - If the current character's JOINING_TYPE is RIGHT, DUAL, or
    JOIN_CAUSING, tag the current character for `fina`, then update
    the tag for the preceding character:
	  - `isol` becomes `init`
	  - `fina` becomes `medi`
	  - `init` remains `init`
	  - `medi` remains `medi`

Otherwise, tag the current character for `isol`.

After testing the final character of the word, if the text is in `<syrc>` and
if the last character that is not JOINING_TYPE_TRANSPARENT or
JOINING_TYPE_NON_JOINING is <samp>"Alaph"</samp>, perform an additional test:
  - If the preceding character is JOINING_TYPE_LEFT, tag the current character
    for `fina`
  - If the preceding character's JOINING_GROUP is DALATH_RISH, tag the current
    character for `fin3`
  - Otherwise, tag the current character for `fin2`


Once the last character of the word has been processed, proceed to the
next word and repeat the algorithm, starting at the beginning of the
next word.

> Note: Because the processing of the characters in the algorithm
> described above is deterministic, shaping engines may choose to
> implement the joining-state computation as a state machine, in a lookup
> table, or by any other means desirable.

At the end of this process, all letters should be tagged for possible
substitution by one of the `isol`, `init`, `medi`, `med2`, `fina`, `fin2`, or
`fin3` features.

### Stage 4: Applying the `stch` feature ###

The `stch` feature decomposes and stretches special marks that are
meant to extend to the full width of words to which they are
attached. It was defined for use in `<syrc>` text runs for the <samp>"Syriac
Abbreviation Mark"</samp> (`U+070F`) but it can be used with similar marks in
other scripts.

To apply the `stch` feature, the shaping engine should first decompose the
`U+070F` glyph into components, which results in a beginning point glyph,
midpoint glyph, and endpoint glyph plus one (or more) extension glyphs: at
least one extension between the beginning and midpoint glyphs and at
least one extension between the midpoint and endpoint glyphs. 

The shaping engine must then calculate the total length of the word to
which the mark applies. That length, minus the advance widths of the
beginning, middle, and endpoint glyphs of the mark, must be divided by
two. 

The result, divided by the advance width of the extension glyph
and rounded up to the next integer, tells the shaping engine how many
copies of the extension glyph must be placed between the midpoint and
each end of the mark.

Following this procedure ensures that the same number of extensions is
used on each side of the mark so that it remains symmetrical.

Finally, the decomposed mark must be reordered as follows: 

  - All of the glyphs in the sequence for the mark, _except_ for
    the final glyph, are repositioned as a group so that they precede
    the word to which the mark is attached.
  - The final glyph in the mark sequence is repositioned to the end of
    the word.
	

### Stage 5: Applying the language-form substitution features from <abbr>GSUB</abbr> ###

The language-substitution phase applies mandatory substitution
features using the rules in the font's <abbr title="Glyph Substitution table">GSUB</abbr> table. In preparation for
this stage, glyph sequences should be tagged for possible application 
of <abbr title="Glyph Substitution table">GSUB</abbr> features.

The order in which these substitutions must be performed is fixed for
all scripts implemented with the Arabic shaping model:

	locl
	isol
	fina
	fin2 (only used in Syriac)
	fin3 (only used in Syriac)
	medi
	med2 (only used in Syriac)
	init
	rlig
	rclt
	calt

> Note: `rlig` and `calt` need to be appled to the word as a whole before
> continuing to the next feature.
	
See the individual script pages for further detail on each feature and
for script-specific information.


> Note: Strictly speaking, the use of localized-form substitutions is
> not part of the shaping process, but of the localization process,
> and could take place at an earlier point while handling the text
> run. However, shaping engines are expected to complete the
> application of the `locl` feature before applying the subsequent
> <abbr title="Glyph Substitution table">GSUB</abbr> substitutions in the following steps.


### Stage 6: Applying the typographic-form substitution features from <abbr>GSUB</abbr> ###

The typographic-substitution phase applies optional substitution
features using the rules in the font's <abbr title="Glyph Substitution table">GSUB</abbr> table.

The order in which these substitution must be performed is fixed for
all scripts implemented in the Arabic shaping model:

    liga
	dlig
	cswh
	mset
	
See the individual script pages for further detail on each feature and
for script-specific information.


### Stage 7: Applying the positioning features from <abbr>GPOS</abbr> ###

The positioning stage adjusts the positions of mark and base
glyphs.

The order in which these features are applied is fixed for
all scripts implemented in the Arabic shaping model:

    curs
	kern
	mark
	mkmk


See the individual script pages for further detail on each feature and
for script-specific information.


================================================
FILE: opentype-shaping-arabic.md
================================================
# Arabic script shaping in OpenType #

This document details the general shaping procedure shared by all
Arabic script styles, and defines the common pieces that style-specific
implementations share. 


**Contents**

  - [General information](#general-information)
  - [Terminology](#terminology)
  - [Glyph classification](#glyph-classification)
      - [Joining properties](#joining-properties)
	  - [Mark classification](#mark-classification)
	  - [Character tables](#character-tables)
  - [The `<arab>` shaping model](#the-arab-shaping-model)
      - [Stage 1: Transient reordering of modifier combining marks](#stage-1-transient-reordering-of-modifier-combining-marks)
      - [Stage 2: Compound character composition and decomposition](#stage-2-compound-character-composition-and-decomposition)
      - [Stage 3: Computing letter joining states](#stage-3-computing-letter-joining-states)
      - [Stage 4: Applying the `stch` feature](#stage-4-applying-the-stch-feature)
      - [Stage 5: Applying the language-form substitution features from <abbr>GSUB</abbr>](#stage-5-applying-the-language-form-substitution-features-from-gsub)
      - [Stage 6: Applying the typographic-form substitution features from <abbr>GSUB</abbr>](#stage-6-applying-the-typographic-form-substitution-features-from-gsub)
      - [Stage 7: Applying the positioning features from <abbr>GPOS</abbr>](#stage-7-applying-the-positioning-features-from-gpos)
  

## General information ##

The Arabic script is used to write multiple languages, most commonly
Arabic, Persian, Urdu, Pashto, Kurdish, and Azerbaijani. 

The Arabic script encompasses multiple distinct styles, including Naskh, 
Nataliq, and Kufi, that share a number of common features and rules,
but that differ considerably in their final appearance. Due to the
common features found between the styles, a shaping engine can support
all styles of Arabic with a single shaping model.

In addition, several other writing systems that observe similar rules
and conventions can be supported using the same shaping model, even if
they are not historically related to Arabic. These scripts include:

  - [N'Ko](opentype-shaping-nko.md)
  - [Syriac](opentype-shaping-syriac.md)
  - [Mongolian](opentype-shaping-mongolian.md)

Note that each of these scripts has its own independent
script tag defined in OpenType. N'Ko uses `<nko >`, Syriac uses `<syrc>`, and
Mongolian uses `<mong>`. The information found below about the `<arab>`
script shaping model can serve as a general guide; script-specific
information can be found in the linked document for each script. 

Arabic is a joining script that uses inter-word spaces, so each
codepoint in a text run may be substituted with one of several
contextual forms corresponding to what, if any, characters appear
before and after the codepoint. Most, but not all, letter sequences
join; shaping engines must track which positions trigger joining
behavior for each letter. 

Arabic is written (and, therefore, rendered) from right to
left. Shaping engines must track the directionality of the text run
when scripts of different direction are mixed.

## Terminology ##

OpenType shaping uses a standard set of terms for elements of the
Arabic script. The terms used colloquially in any particular language
may vary, however, potentially causing confusion.

**Base** glyph or character is the standard term for a Arabic
character that is capable of taking a diacritical mark. 

Most of the base characters in Arabic are consonants, but each
language written with the Arabic script may have one or more vowel
base letters.

Vowels that are not base characters are frequently omitted from the
text run entirely. Alternatively, such a vowel may appear as a
diacritical mark called a **ḥarakah**.

**Ijam** is the standard term for an above- or below-base dot that
distinguishes one consonant from another. Ijam are not considered
diacritics; they are integral to the consonant of which they are a part.

**Shadda** and **tashdid** are both standard terms for the "consonant
doubling" diacritical mark.

**Hamza** is the standard term for the glottal stop
semi-consonant. The hamza is not regarded as a full letter in most
languages, although it can appear as a standalone letter within
words. In some sequences, the hamza attaches to an adjacent letter;
when a hamza-supporting letter is not adjacent, however, the hamza can
appear on its own.

**Kashida** (or **tatweel**) is the term for a glyph inserted into a
sequence for the purpose of elongating the baseline stroke of a
letter. Unicode documents use the term "tatweel" most frequently,
while OpenType documents use the term "kashida" most
frequently. Kashidas are typically inserted in order to justify lines
of text. 


## Glyph classification ##

Because Arabic is a joining (or cursive) script, proper shaping of
text runs involves identifying the joining behavior of each character,
then combining that information with any preceding or subsequent
characters to determine the contextually correct form for display.

### Joining properties ###

Arabic characters are assigned a `JOINING_TYPE` property in the
Unicode standard that indicates how they join to adjacent
characters. There are six possible values: 

  - `JOINING_TYPE_LEFT` indicates that a character joins with
    the subsequent character, but does not join with the preceding
    character. 
	
  - `JOINING_TYPE_RIGHT` indicates that a character joins with the
    preceding character, but does not join with the subsequent character.	

  - `JOINING_TYPE_DUAL` indicates that a character joins with the
    preceding character and joins with the subsequent character.
	
  - `JOINING_TYPE_NON_JOINING` indicates that a character does not
    join with the preceding or with the subsequent character.
	
  - `JOINING_TYPE_TRANSPARENT` indicates that the character does not
    join with adjacent characters _and_ that the character must be
    skipped over when the shaping engine is evaluating the joining
    positions in a sequence of characters. When a
    `JOINING_TYPE_TRANSPARENT` character is encountered in a sequence,
    the `JOINING_TYPE` of the preceding character passes
    through. Diacritical marks are frequently assigned this value. 
	
  - `JOINING_TYPE_JOIN_CAUSING` indicates that the character forces
    the use of joining forms with the preceding and subsequent
    characters. Kashidas and the Zero Width Joiner (`U+200D`) are both
    `JOIN_CAUSING` characters.
  

Arabic letters are also assigned to a `JOINING_GROUP` that indicates
which fundamental character they behave like with regard to joining
behavior. Each of the basic letters in the Arabic block tends to
belong to its own `JOINING_GROUP`, while letters from the supplemental and
extended blocks are usually assigned to the `JOINING_GROUP` that
corresponds to the character's base letter, with no diacritics or ijam.

For example, the Persian letter "Peh" (`U+067E`) is visually
represented as the Arabic letter "Beh" (`U+0628`), but with two additional
below-base ijam. Consequently, "Peh" is assigned to the `BEH` `JOINING_GROUP`.

### Mark classification ###

The Unicode standard defines a _canonical combining class_ for each
codepoint that is used whenever a sequence needs to be sorted into
canonical order. 

Several of the Arabic marks belong to standard combining
classes:

:::{table} Mark-classification table

| Codepoint | Combining class | Glyph                              |
|:----------|:----------------|:-----------------------------------|
|`U+064B`   | 27              | &#x064B; Fathatan / Open fathatan  |
|`U+064C`   | 28              | &#x064C; Dammatan / Open dammatan  |
|`U+064D`   | 29              | &#x064D; Kasratan / Open Kasratan  |
|`U+064E`   | 30              | &#x064E; Fatha / Small fatha       |
|`U+064F`   | 31              | &#x064F; Damma / Small damma       |
|`U+0650`   | 32              | &#x0650; Kasra / Small kasra       |
|`U+0651`   | 33              | &#x0651; Shadda                    |
|`U+0652`   | 34              | &#x0652; Sukun                     |
|`U+0670`   | 35              | &#x0670; Superscript Alef          |
|           | 220             | Other below-base combining marks   |
|           | 230             | Other above-base combining marks   |
:::


The numeric values of these combining classes are used during Unicode
normalization.

A subset of the Arabic marks require special handling when shaping
Arabic text, during the mark-reordering stage. These include two sets
of _Modifier Combining Marks_ (<abbr>MCM</abbr>) that may need to be repositioned
closer to the base character, when they occur in sequences of multiple
marks. 

The sets are:
  - Below-base (class 220) <abbr title="Modifier Combining Mark">MCM</abbr>s: "Hamza below" (`U+0655`), "Small low seen"
    (`U+06E3`), "Large round dot below" (`U+08CF`), "Small low waw" (`U+08D3`)
  - Above-base (class 230) <abbr title="Modifier Combining Mark">MCM</abbr>s: "Hamza above" (`U+0654`), "Mark noon ghunna"
    (`U+0658`), "Small high seen" (`U+06DC`), "Small high yeh" (`U+06E7`), "Small high
    noon" (`U+06E8`), "Small high Farsi yeh" (`U+08CA`), "Small high
    yeh barree with two dots below" (`U+08CB`), "Small high zah"
    (`U+08CD`), "Large round dot above" (`U+08CE`), "Small high waw" (`U+08F3`)

These classifications are used in the [mark-transient-reordering
stage](#stage-1-transient-reordering-of-modifier-combining-marks).

	
### Character tables ###

Separate character tables are provided for the Arabic, Arabic
Supplement, Arabic Extended-A, Abaric Extended-B, and Rumi Numeral
Symbols blocks, as well as for other miscellaneous characters that are
used in `<arab>` text runs:

  - [Arabic character table](character-tables/character-tables-arabic.md#arabic-character-table)
  - [Arabic Supplement character table](character-tables/character-tables-arabic.md#arabic-supplement-character-table)
  - [Arabic Extended-A character table](character-tables/character-tables-arabic.md#arabic-extended-a-character-table)
  - [Arabic Extended-B character table](character-tables/character-tables-arabic.md#arabic-extended-b-character-table)
  - [Arabic Extended-C character table](character-tables/character-tables-arabic.md#arabic-extended-c-character-table)
  - [Rumi Numeral Symbols character table](character-tables/character-tables-arabic.md#rumi-numeral-symbols-character-table)
  - [Miscellaneous character table](character-tables/character-tables-arabic.md#miscellaneous-character-table)

<!--- Commenting out Arabic Mathematical Alphabetical Symbols block 
      since it does not involve text shaping AFAICT. --->
<!---   - [Arabic Mathematical Alphabetic Symbols character table](character-tables/character-tables-arabic.md#arabic-mathematical-alphabetic-symbols-character-table) --->

Unicode also defines two blocks that implement backward compatibility
with retired file-encoding formats:

  - Arabic Presentation Forms-A
  - Arabic Presentation Forms-B
  
Unless a software application is required to support specific stores of
documents that are known to have used these older encodings, however, the
shaping engine should not be expected to handle any text runs
incorporating codepoints from these blocks.

The tables list each codepoint along with its Unicode general
category and its joining type. For letters, the table lists the
codepoint's joining group. For diacritical marks, the table lists the
codepoint's mark combining class. The codepoint's Unicode name and an example
glyph are also provided.

For example:

:::{table} Example character table

| Codepoint | Unicode category | Joining type | Joining group | Mark class | Glyph                        |
|:----------|:-----------------|:-------------|:--------------|:-----------|:-----------------------------|
|`U+0628`   | Letter           | DUAL         | BEH           | _null_     | &#x0628; Beh                 |
| | | | | |
|`U+0655`   | Mark [Mn]        | TRANSPARENT  | _null_        | 220_MCM   | &#x0655; Hamza Below         |
:::


Codepoints with no assigned meaning are
designated as _unassigned_ in the _Unicode category_ column. 


#### Special-function codepoints ####

Other important characters that may be encountered when shaping runs
of Arabic text include the dotted-circle placeholder (`U+25CC`), the
combining grapheme joiner (`U+034F`), the zero-width joiner (`U+200D`)
and zero-width non-joiner (`U+200C`), the left-to-right text marker
(`U+200E`) and right-to-left text marker (`U+200F`), and the no-break
space (`U+00A0`).

Each of these is of particular importance to shaping engines, because
these codepoints interact with the shaping engine, the text run, and
the active font, either to mediate non-default shaping behavior or to
relay information about the current shaping process.

The dotted-circle placeholder is frequently used when displaying a
combining mark in isolation. Real-world text documents may also use
other characters, such as hyphens or dashes, in a similar placeholder
fashion; shaping engines should cope with this situation gracefully.

Dotted-circle placeholder characters (like any Unicode codepoint) can
appear anywhere in text input sequences and should be rendered
normally. <abbr title="Glyph Positioning table">GPOS</abbr> positioning lookups should attach mark glyphs to dotted
circles as they would to other non-mark characters. As visible glyphs,
dotted circles can also be involved in <abbr title="Glyph Substitution table">GSUB</abbr> substitutions.

In addition to the default input-text handling process, shaping
engines may also insert dotted-circle placeholders into the text
sequence. Dotted-circle insertions are required when a non-spacing
mark or dependent sign is formed with no base character present.

This requirement covers:

  - Dependent signs that are assigned their own individual Unicode
    codepoints (such as most dependent-vowel marks or matras)
  
  - Dependent signs that are formed only by specific sequences of
    other codepoints (which is not common in Arabic but can occur in
    other scripts)


The combining grapheme joiner (<abbr>CGJ</abbr>) is primarily used to alter the
order in which adjacent marks are positioned during the
mark-reordering stage, in order to adhere to the needs of a
non-default language orthography.

By default, OpenType shaping reorders sequences of adjacent marks by
sorting the sequence on the marks' Canonical_Combining_Class (<abbr>Ccc</abbr>)
values. The presence of a <abbr title="Combining Grapheme Joiner">CGJ</abbr> character within a sequence of marks has
the effect of splitting the sequence into two sequences of marks and,
therefore, halting any mark-reordering that would have occurred
between the marks on either side of the <abbr title="Combining Grapheme Joiner">CGJ</abbr>.

The zero-width joiner (<abbr title="Zero-Width Joiner">ZWJ</abbr>) is primarily used to force the usage of the
cursive connecting form of a letter even when the context of the
adjoining letters would not trigger the connecting form. 

For example, to show the initial form of a letter in isolation (such
as for displaying it in a table of forms), the sequence <samp>"_Letter_,ZWJ"</samp>
would be used. To show the medial form of a letter in isolation, the
sequence <samp>"ZWJ,_Letter_,ZWJ"</samp> would be used.

The zero-width non-joiner (<abbr>ZWNJ</abbr>) is primarily used to prevent a
cursive connection between two adjacent characters that would, under
normal circumstances, form a join. 

The <abbr title="Zero-Width Joiner">ZWJ</abbr> and <abbr title="Zero-Width Non Joiner">ZWNJ</abbr> characters are, by definition, non-printing control
characters and have the _Default_Ignorable_ property in the Unicode
Character Database. In standard text-display scenarios, their function
is to signal a request from the user to the shaping engine for some
particular non-default behavior. As such, they are not rendered
visually.

> Note: Naturally, there are special circumstances where a user or
> document might need to request that a <abbr title="Zero-Width Joiner">ZWJ</abbr> or <abbr title="Zero-Width Non Joiner">ZWNJ</abbr> be rendered
> visually, such as when illustrating the OpenType shaping process, or
> displaying Unicode tables.

Because the <abbr title="Zero-Width Joiner">ZWJ</abbr> and <abbr title="Zero-Width Non Joiner">ZWNJ</abbr> are non-printing control characters, they can
be ignored by any portion of a software text-handling stack not
involved in the shaping operations that the <abbr title="Zero-Width Joiner">ZWJ</abbr> and <abbr title="Zero-Width Non Joiner">ZWNJ</abbr> are designed
to interface with. For example, spell-checking or collation functions
will typically ignore <abbr title="Zero-Width Joiner">ZWJ</abbr> and <abbr title="Zero-Width Non Joiner">ZWNJ</abbr>.

Similarly, the <abbr title="Zero-Width Joiner">ZWJ</abbr> and <abbr title="Zero-Width Non Joiner">ZWNJ</abbr> should be ignored by the shaping engine
when matching sequences of codepoints against the backtrack and
lookahead sequences of a font's <abbr title="Glyph Substitution table">GSUB</abbr> or <abbr title="Glyph Positioning table">GPOS</abbr> lookups.


The right-to-left mark (<abbr>RLM</abbr>) and left-to-right mark (<abbr>LRM</abbr>) are used by
the Unicode bidirectionality algorithm (BiDi) to indicate the points
in a text run at which the writing direction changes. Generally
speaking <abbr title="Right-to-Left Mark">RLM</abbr> and <abbr title="Left-to-Right Mark">LRM</abbr> codepoints do not interact with shaping.

The no-break space is primarily used to display those codepoints that
are defined as non-spacing (such as vowel or diacritical marks and <samp>"Hamza"</samp>) in an
isolated context, as an alternative to displaying them superimposed on
the dotted-circle placeholder.


## The `<arab>` shaping model ##

Processing a run of `<arab>` text involves seven top-level stages:

1. Transient reordering of modifier combining marks
2. Compound character composition and decomposition
3. Computing letter joining states
4. Applying the `stch` feature
5. Applying the language-form substitution features from <abbr>GSUB</abbr>
6. Applying the typographic-form substitution features from <abbr>GSUB</abbr>
7. Applying the positioning features from <abbr>GPOS</abbr>


### Stage 1: Transient reordering of modifier combining marks ###

<!--- http://www.unicode.org/reports/tr53/tr53-1.pdf --->

Sequences of adjacent marks must be reordered so that they appear in
the appropriate visual order before the mark-to-base and mark-to-mark
positioning features from <abbr title="Glyph Positioning table">GPOS</abbr> can be correctly applied.

In particular, those marks that have strong affinity to the base
character must be placed closest to the base.

This mark-reordering operation is distinct from the standard,
cross-script mark-reordering performed during Unicode
normalization. The standard Unicode mark-reordering algorithm is based
on comparing the _Canonical_Combining_Class_ (<abbr>Ccc</abbr>) properties of mark
codepoints, whereas this script-specific reordering utilizes the
_Modifier_Combining_Mark_ (<abbr>MCM</abbr>) subclasses specified in the
character tables.

The algorithm for reordering a sequence of marks is:

  - First, move any <samp>"Shadda"</samp> (combining class `33`) characters to the
    beginning of the mark sequence.
	
  -	Second, move any subsequence of combining-class-`230` characters that begins
       with a `230_MCM` character to the beginning of the sequence,
       before all <samp>"Shadda"</samp> characters. The subsequence must be moved
       as a group.

  - Finally, move any subsequence of combining-class-`220` characters that begins
       with a `220_MCM` character to the beginning of the sequence,
       before all <samp>"Shadda"</samp> characters and before all class-`230`
       characters. The subsequence must be moved as a group.

> Note: Unicode describes this mark-reordering operation, the Arabic
> Mark Transient Reordering Algorithm (<abbr>AMTRA</abbr>), in Unicode
> Standard Annex 53, which describes it in terms that are distinct
> from standard, <abbr>Ccc</abbr>-based mark reordering.
>
> Specifically, <abbr title="Arabic Mark Transient Reordering Algorithm">AMTRA</abbr> is designated as an operation performed during
> text rendering only, which therefore does not impact other
> Unicode-compliance issues such as allowable input sequences or text
> encoding.
>
> However, shaping engines may choose to perform the reordering of
> modifier combining marks in conjunction with their Unicode
> normalization functionality for increased efficiency.

### Stage 2: Compound character composition and decomposition ###

The `ccmp` feature allows a font to substitute

  - mark-and-base sequences with a pre-composed glyph including both
    the mark and the base (as is done in with a ligature substitution)
	
  - individual compound glyphs with the equivalent sequence of
    decomposed glyphs (such as decomposing a letter with ijam into a
    separate fundamental-letter glyph followed by an ijam-only glyph,
    to permit more precise positioning)
 

If present, these composition and decomposition substitutions must be
performed before applying any other <abbr title="Glyph Substitution table">GSUB</abbr> or <abbr title="Glyph Positioning table">GPOS</abbr> lookups, because
those lookups may be written to match only the `ccmp`-substituted
glyphs. 


:::{figure-md}
![Composition and decomposition](/images/arabic/arabic-ccmp.svg "Composition and decomposition"){.shaping-demo .inline-svg .greyscale-svg #arabic-ccmp}

Composition and decomposition
:::

```{svg-color-toggle-button} arabic-ccmp
```


### Stage 3: Computing letter joining states ###

In order to correctly apply the initial, medial, and final form
substitutions from <abbr title="Glyph Substitution table">GSUB</abbr> during stage 6, the shaping engine must
tag every letter for possible application of the appropriate feature.

> Note: The following algorithm includes rules for processing `<syrc>`
> text in addition to `<arab>` text. Implementers concerned only with
> shaping `<arab>` text can omit the portions for `<syrc>`-specific
> rules. 

To determine which feature is appropriate, the shaping engine must
examine each word in turn and compute each letter's joining state from
the letter's `JOINING_TYPE` and the `JOINING_TYPE` of the
preceding character (if any).

> Note: Although Arabic uses inter-word spaces, the `init` feature
> does _not_ refer to word-initial letters only and the `fina` feature
> does _not_ refer to word-final letters only.
>
> Rather, both of these terms are defined with respect to whether or
> not the preceding and subsequent letters form joins with the current
> letter. The letters at word boundaries will, naturally, take on
> initial and final forms, but initial and final forms of letters also
> occur regularly within words, when the letter in question is
> adjacent to a letter than does not form joins.

This computation starts from the first letter of the word, temporarily
tagging the letter for `isol` substitution. If the first
letter is the only letter in the word, the `isol` tag will remain unchanged.

From here, the algorithm consumes each character in the string, one at
a time, keeping track of the JOINING_TYPE of the previous character. 

If the current character is JOINING_TYPE_TRANSPARENT, move on to the next
character but preserve the currently-tracked JOINING_TYPE at its previous state.

If the preceding character's JOINING_TYPE is LEFT, DUAL, or
JOIN_CAUSING:
  - In `<syrc>` text, if the current character is <samp>"Alaph"</samp>, tag the
    current character for `med2`, then update the tag for the
    preceding character:
	  - `isol` becomes `init`
	  - `fina` becomes `medi`
	  - `init` remains `init`
	  - `medi` remains `medi`
  - If the current character's JOINING_TYPE is RIGHT, DUAL, or
    JOIN_CAUSING, tag the current character for `fina`, then update
    the tag for the preceding character:
	  - `isol` becomes `init`
	  - `fina` becomes `medi`
	  - `init` remains `init`
	  - `medi` remains `medi`

Otherwise, tag the current character for `isol`.

After testing the final character of the word, if the text is in `<syrc>` and
if the last character that is not JOINING_TYPE_TRANSPARENT or
JOINING_TYPE_NON_JOINING is <samp>"Alaph"</samp>, perform an additional test:
  - If the preceding character is JOINING_TYPE_LEFT, tag the current character
    for `fina`
  - If the preceding character's JOINING_GROUP is DALATH_RISH, tag the current
    character for `fin3`
  - Otherwise, tag the current character for `fin2`


Once the last character of the word has been processed, proceed to the
next word and repeat the algorithm, starting at the beginning of the
next word.

> Note: Because the processing of the characters in the algorithm
> described above is deterministic, shaping engines may choose to
> implement the joining-state computation as a state machine, in a lookup
> table, or by any other means desirable.

At the end of this process, all letters should be tagged for possible
substitution by one of the `isol`, `init`, `medi`, `med2`, `fina`, `fin2`, or
`fin3` features.

### Stage 4: Applying the `stch` feature ###

The `stch` feature decomposes and stretches special marks that are
meant to extend to the full width of words to which they are
attached. It was defined for use in `<syrc>` text runs for the <samp>"Syriac
Abbreviation Mark"</samp> (`U+070F`) but it can be used with similar marks in
other scripts.

To apply the `stch` feature, the shaping engine should first decompose the
`U+070F` glyph into components, which results in a beginning point,
midpoint, and endpoint glyphs plus one (or more) extension glyphs: at
least one extension between the beginning and midpoint glyphs and at
least one extension between the midpoint and endpoint glyphs. 

The shaping engine must then calculate the total length of the word to
which the mark applies. That length, minus the advance widths of the
beginning, middle, and endpoint glyphs of the mark, must be divided by
two. 

The result, divided by the advance width of the extension glyph
and rounded up to the next integer, tells the shaping engine how many
copies of the extension glyph must be placed between the midpoint and
each end of the mark.

Following this procedure ensures that the same number of extensions is
used on each side of the mark so that it remains symmetrical.

Finally, the decomposed mark must be reordered as follows: 

  - All of the glyphs in the sequence for the mark, _except_ for
    the final glyph, are repositioned as a group so that they precede
    the word to which the mark is attached.
  - The final glyph in the mark sequence is repositioned to the end of
    the word.
	

### Stage 5: Applying the language-form substitution features from <abbr>GSUB</abbr> ###

The language-substitution phase applies mandatory substitution
features using the rules in the font's <abbr title="Glyph Substitution table">GSUB</abbr> table. In preparation for
this stage, glyph sequences should be tagged for possible application 
of <abbr title="Glyph Substitution table">GSUB</abbr> features.

The order in which these substitutions must be performed is fixed for
all scripts implemented in the Arabic shaping model:

	locl
	isol
	fina
	fin2 (not used in <arab>)
	fin3 (not used in <arab>)
	medi
	med2 (not used in <arab>)
	init
	rlig
	rclt
	calt
	
> Note: `rlig` and `calt` need to be appled to the word as a whole before
> continuing to the next feature.

#### Stage 5, step 1: locl ####

The `locl` feature replaces default glyphs with any language-specific
variants, based on examining the language setting of the text run.

> Note: Strictly speaking, the use of localized-form substitutions is
> not part of the shaping process, but of the localization process,
> and could take place at an earlier point while handling the text
> run. However, shaping engines are expected to complete the
> application of the `locl` feature before applying the subsequent
> <abbr title="Glyph Substitution table">GSUB</abbr> substitutions in the following steps.

:::{figure-md}
![Localized form substitution](/images/arabic/arabic-locl.svg "Localized form substitution"){.shaping-demo .inline-svg .greyscale-svg #arabic-locl}

Localized form substitution
:::

```{svg-color-toggle-button} arabic-locl
```


#### Stage 5, step 2: isol ####

The `isol` feature substitutes the default glyph for a codepoint with
the isolated form of the letter.

> Note: It is common for a font to use the isolated form of a letter
> as the default, in which case the `isol` feature would apply no
> substitutions. However, this is only a convention, and the active
> font may use other forms as the default glyphs for any or all
> codepoints.

:::{figure-md}
![Isolated form substitution](/images/arabic/arabic-isol.svg "Isolated form substitution"){.shaping-demo .inline-svg .greyscale-svg #arabic-isol}

Isolated form substitution
:::

```{svg-color-toggle-button} arabic-isol
```


#### Stage 5, step 3: fina ####

The `fina` feature substitutes the default glyph for a codepoint with
the terminal (or final) form of the letter.

:::{figure-md}
![Final form substitution](/images/arabic/arabic-fina.svg "Final form substitution"){.shaping-demo .inline-svg .greyscale-svg #arabic-fina}

Final form substitution
:::

```{svg-color-toggle-button} arabic-fina
```


#### Stage 5, step 4: fin2 ####

This feature is not used in `<arab>` text.

#### Stage 5, step 5: fin3 ####

This feature is not used in `<arab>` text.

#### Stage 5, step 6: medi ####

The `medi` feature substitutes the default glyph for a codepoint with
the medial form of the letter.

:::{figure-md}
![Medial form substitution](/images/arabic/arabic-medi.svg "Medial form substitution"){.shaping-demo .inline-svg .greyscale-svg #arabic-medi}

Medial form substitution
:::

```{svg-color-toggle-button} arabic-medi
```


#### Stage 5, step 7: med2 ####

This feature is not used in `<arab>` text.

#### Stage 5, step 8: init ####

The `init` feature substitutes the default glyph for a codepoint with
the initial form of the letter.

:::{figure-md}
![Initial form substitution](/images/arabic/arabic-init.svg "Initial form substitution"){.shaping-demo .inline-svg .greyscale-svg #arabic-init}

Initial form substitution
:::

```{svg-color-toggle-button} arabic-init
```


#### Stage 5, step 9: rlig ####

The `rlig` feature substitutes glyph sequences with mandatory
ligatures. Substitutions made by `rlig` cannot be disabled by
application-level user interfaces.

:::{figure-md}
![Required ligature substitution](/images/arabic/arabic-rlig.svg "Required ligature substitution"){.shaping-demo .inline-svg .greyscale-svg #arabic-rlig}

Required ligature substitution
:::

```{svg-color-toggle-button} arabic-rlig
```


#### Stage 5, step 10: rclt ####

The `rclt` feature substitutes glyphs with contextual alternate
forms. In general, this involves replacing the default form of a
connecting glyph with an alternate that provides a preferable
connection to an adjacent glyph.

The `rclt` feature should be used to perform such substitutions that
are required by the orthography of the active script and
language. Substitutions made by `rclt` cannot be disabled by 
application-level user interfaces.

#### Stage 5, step 11: calt ####

The `calt` feature substitutes glyphs with contextual alternate
forms. In general, this involves replacing the default form of a
connecting glyph with an alternate that provides a preferable
connection to an adjacent glyph.

The `calt` feature, in contrast to `rclt` above, performs
substitutions that are not mandatory for orthographic
correctness. However, unlike `rclt`, the substitutions made by `calt`
can be disabled by application-level user interfaces.

:::{figure-md}
![Contextual alternate substitution](/images/arabic/arabic-calt.svg "Contextual alternate substitution"){.shaping-demo .inline-svg .greyscale-svg #arabic-calt}

Contextual alternate substitution
:::

```{svg-color-toggle-button} arabic-calt
```


### Stage 6: Applying the typographic-form substitution features from <abbr>GSUB</abbr> ###

The typographic-substitution phase applies optional substitution
features using the rules in the font's <abbr title="Glyph Substitution table">GSUB</abbr> table.

The order in which these substitutions must be performed is fixed for
all scripts implemented in the Arabic shaping model:

    liga
	dlig
	cswh
	mset
	

#### Stage 6, step 1: liga ####

The `liga` feature substitutes standard, optional ligatures that are on
by default. Substitutions made by `liga` may be disabled by
application-level user interfaces.

:::{figure-md}
![Standard ligature substitution](/images/arabic/arabic-liga.svg "Standard ligature substitution"){.shaping-demo .inline-svg .greyscale-svg #arabic-liga}

Standard ligature substitution
:::

```{svg-color-toggle-button} arabic-liga
```


#### Stage 6, step 2: dlig ####

The `dlig` feature substitutes additional optional ligatures that are
off by default. Substitutions made by `dlig` may be disabled by
application-level user interfaces.

:::{figure-md}
![Discretionary ligature substitution](/images/arabic/arabic-dlig.svg "Discretionary ligature substitution"){.shaping-demo .inline-svg .greyscale-svg #arabic-dlig}

Discretionary ligature substitution
:::

```{svg-color-toggle-button} arabic-dlig
```


#### Stage 6, step 3: cswh ####

The `cswh` feature substitutes contextual swash variants of
glyphs. For example, the active font might substitute a longer variant
of <samp>"Noon"</samp> when a certain number of subsequent glyphs do not descend
below the baseline.


#### Stage 6, step 4: mset ####

The `mset` feature performs mark positioning by substituting sequences
of bases and marks with precomposed base-and-mark glyphs.

> Note: Positioning marks with the `mark` and `mkmk` features of <abbr title="Glyph Positioning table">GPOS</abbr> is
> preferred, because `mset` can interfere with the OpenType shaping
> process. For example, substitution rules contained in `mset` may not be able to
> account for necessary mark-reordering adjustments conducted in the
> next stage.
> 
> Nevertheless, when the active font uses `mset` substitutions, the
> shaping engine must deal with the situation gracefully.

### Stage 7: Applying the positioning features from <abbr>GPOS</abbr> ###

The positioning stage adjusts the positions of mark and base
glyphs.

The order in which these features are applied is fixed for
all scripts implemented in the Arabic shaping model:

    curs
	dist
	kern
	mark
	mkmk

#### Stage 7, step 1: curs ####

The `curs` feature perform cursive positioning. Each glyph has an
entry point and exit point; the `curs` feature positions glyphs so
that the entry point of the current glyph meets the exit point of the
preceding glyph.

:::{figure-md}
![Cursive positioning](/images/arabic/arabic-curs.svg "Cursive positioning"){.shaping-demo .inline-svg .greyscale-svg #arabic-curs}

Cursive positioning
:::

```{svg-color-toggle-button} arabic-curs
```


#### Stage 7, step 2: dist ####

The `dist` feature adjusts glyph spacing between glyphs. Unlike `kern`,
adjustments made with `dist` do not require the application or the user
to enable any software kerning features, if such features are
optional. 

:::{figure-md}
![Distance adjustment](/images/arabic/arabic-dist.svg "Distance adjustment"){.shaping-demo .inline-svg .greyscale-svg #arabic-dist}

Distance adjustment
:::

```{svg-color-toggle-button} arabic-dist
```


#### Stage 7, step 3: kern ####

The `kern` feature adjusts glyph spacing between pairs of adjacent glyphs.

:::{figure-md}
![Kerning adjustment](/images/arabic/arabic-kern.svg "Kerning adjustment"){.shaping-demo .inline-svg .greyscale-svg #arabic-kern}

Kerning adjustment
:::

```{svg-color-toggle-button} arabic-kern
```


#### Stage 7, step 4: mark ####

The `mark` feature positions marks with respect to base glyphs.

:::{figure-md}
![Mark positioning](/images/arabic/arabic-mark.svg "Mark positioning"){.shaping-demo .inline-svg .greyscale-svg #arabic-mark}

Mark positioning
:::

```{svg-color-toggle-button} arabic-mark
```


#### Stage 7, step 5: mkmk ####

The `mkmk` feature positions marks with respect to preceding marks,
providing proper positioning for sequences of marks that attach to the
same base glyph.

:::{figure-md}
![Mark-to-mark positioning](/images/arabic/arabic-mkmk.svg "Mark-to-mark positioning"){.shaping-demo .inline-svg .greyscale-svg #arabic-mkmk}

Mark-to-mark positioning
:::

```{svg-color-toggle-button} arabic-mkmk
```


================================================
FILE: opentype-shaping-bengali.md
================================================
```{include} /_global.md
```

# Bengali shaping in OpenType #

This document details the shaping procedure needed to display text
runs in the Bengali script.


**Contents**

  - [General information](#general-information)
  - [Terminology](#terminology)
  - [Glyph classification](#glyph-classification)
      - [Shaping classes and subclasses](#shaping-classes-and-subclasses)
      - [Bengali character tables](#bengali-character-tables)
  - [The `<bng2>` shaping model](#the-bng2-shaping-model)
      - [Stage 1: Identifying syllables and other sequences](#stage-1-identifying-syllables-and-other-sequences)
      - [Stage 2: Initial reordering](#stage-2-initial-reordering)
      - [Stage 3: Applying the basic substitution features from <abbr>GSUB</abbr>](#stage-3-applying-the-basic-substitution-features-from-gsub)
      - [Stage 4: Final reordering](#stage-4-final-reordering)
      - [Stage 5: Applying all remaining substitution features from <abbr>GSUB</abbr>](#stage-5-applying-all-remaining-substitution-features-from-gsub)
      - [Stage 6: Applying remaining positioning features from <abbr>GPOS</abbr>](#stage-6-applying-remaining-positioning-features-from-gpos)
  - [The `<beng>` shaping model](#the-beng-shaping-model)
      - [Distinctions from `<bng2>`](#distinctions-from-bng2)
      - [Advice for handling fonts with `<beng>` features only](#advice-for-handling-fonts-with-beng-features-only)
      - [Advice for handling text runs composed in `<beng>` format](#advice-for-handling-text-runs-composed-in-beng-format)


## General information ##

The Bengali or Bangla script belongs to the Indic family, and follows
the same general patterns as the other Indic scripts. More
specifically, it belongs to the North Indic subgroup, in which
sequences of adjacent consonants are often represented as conjuncts.

The Bengali script is used to write multiple languages, most commonly
Bengali, Assamese, and Manipuri. In addition, Sanskrit may be written
in Bengali, so Bengali script runs may include glyphs from the Vedic
Extensions block of Unicode. 

There are two extant Bengali script tags defined in OpenType, `<beng>`
and `<bng2>`. The older script tag, `<beng>`, was deprecated in 2005.
Therefore, new fonts should be engineered to work with the `<bng2>`
shaping model. However, if a font is encountered that supports only
`<beng>`, the shaping engine should deal with it gracefully.

## Terminology ##

OpenType shaping uses a standard set of terms for Indic scripts.  The
terms used colloquially in any particular language may vary, however,
potentially causing confusion.

**Matra** is the standard term for a dependent vowel sign. In the Bengali
language, dependent-vowel signs  may also be referred to as _kar_ forms — e.g., "i-kar" or
"u-kar".

The term "matra" is also used to refer to the headline above most
Bengali letters. To avoid ambiguity, the term **headline** is
used in most Unicode and OpenType shaping documents.

**Halant** and **Virama** are both standard terms for the below-base "vowel-killer"
mark. Unicode documents use the term "virama" most frequently, while
OpenType documents use the term "halant" most frequently. In the Bengali
language, this sign is known as the _hasanta_.

**Chandrabindu** (or simply **Bindu**) is the standard term for the diacritical mark
indicating that the preceding vowel should be nasalized. In the Bengali
language, this mark is known as the _candrabindu_.

The term **base consonant** is also critical to Indic shaping. The
base consonant of a syllable is the consonant that carries the
syllable's vowel sound, either the inherent vowel (for an unmarked
base consonant) or a dependent vowel (with the addition of a matra).

A syllable's base consonant is generally rendered in its full form
(although it may form ligatures), while other consonants in the
syllable frequently take on secondary forms. Different <abbr title="Glyph Substitution table">GSUB</abbr>
substitutions may apply to a script's **pre-base** and **post-base**
consonants. Some of these substitutions create **above-base** or
**below-base** forms. The **Reph** form of the consonant "Ra" is an
example.

Syllables may also begin with an **independent vowel** instead of a
consonant. In these syllables, the independent vowel is rendered in
full-letter form, not as a matra, and the independent vowel serves as the
syllable base, similar to a base consonant.

Where possible, using the standard terminology is preferred, as the
use of a language-specific term necessitates choosing one language
over all of the others that share a common script.

## Glyph classification ##

Shaping Bengali text depends on the shaping engine correctly
classifying each glyph in the run. As with most other scripts, the
classifications must distinguish between consonants, vowels
(independent and dependent), numerals, punctuation, and various types
of diacritical mark. 

For most codepoints, the `General Category` property defined in the Unicode
standard is correct, but it is not sufficient to fully capture the
expected shaping behavior (such as glyph reordering). Therefore,
Bengali glyphs must additionally be classified by how they are treated
when shaping a run of text.

### Shaping classes and subclasses ###

The shaping classes listed in the tables that follow are defined so
that they capture the positioning rules used by Indic scripts. 

For most codepoints, the _Shaping class_ is synonymous with the `Indic
Syllabic Category` defined in Unicode. However, there are some
distinctions, where the defined category does not fully capture the
behavior of the character in the shaping process.

Several of the diacritic and syllable-modifying marks behave according
to their own rules and, thus, have a special class. These include
`BINDU`, `VISARGA`, `AVAGRAHA`, `NUKTA`, and `VIRAMA`. Some
less-common marks behave according to rules that are similar to these
common marks, and are therefore classified with the corresponding
common mark. The Vedic Extensions also include a `CANTILLATION`
class for tone marks.

Letters generally fall into the classes `CONSONANT`,
`VOWEL_INDEPENDENT`, and `VOWEL_DEPENDENT`. These classes help the
shaping engine parse and identify key positions in a syllable. For
example, Unicode categorizes dependent vowels as `Mark [Mn]`, but the
shaping engine must be able to distinguish between dependent vowels
and diacritical marks (which are categorized as `Mark [Mn]`).

Bengali uses one subclass of consonant, `CONSONANT_DEAD`. This
subclass is used only for the Bengali "Khanda Ta" (`U+09CE`). It indicates that
<samp>"Khanda Ta"</samp> should match tests for consonants, such as when [identifying
syllables](#stage-1-identifying-syllables-and-other-sequences), but that, unlike
standard consonants, it carries no inherent vowel. The lack of an
inherent vowel is important during the [initial
reordering](#stage-2-initial-reordering) stage.

Other characters, such as symbols and miscellaneous letters (for
example, letter-like symbols that only occur as standalone entities
and do not occur within syllables), need no special attention from the
shaping engine, so they are not assigned a shaping class.

Numbers are classified as `NUMBER`, even though they evoke no special
behavior from the Indic shaping rules, because there are OpenType features that
might affect how the respective glyphs are drawn, such as `tnum`,
which specifies the usage of tabular-width numerals, and `sups`, which
replaces the default glyphs with superscript variants.

Marks and dependent vowels are further labeled with a mark-placement
subclass, which indicates where the glyph will be placed with respect
to the syllable base to which it is attached. The actual position of
the glyphs is determined by the lookups found in the font's <abbr title="Glyph Positioning table">GPOS</abbr>
table, however, the shaping rules for Indic scripts require that the
shaping engine be able to identify marks by their general
position. 

For example, left-side dependent vowels (matras), classified
with `LEFT_POSITION`, must frequently be reordered, with the final
position determined by whether or not other letters in the syllable
have formed ligatures or combined into conjunct forms. Therefore, the
`LEFT_POSITION` subclass of the character must be tracked throughout
the shaping process.

There are four basic _mark-placement subclasses_ for dependent vowels
(matras). Each corresponds to the visual position of the matra with
respect to the base consonant or syllable base to which it is attached:

  - `LEFT_POSITION` matras are positioned to the left of the syllable base.
  - `RIGHT_POSITION` matras are positioned to the right of the syllable base.
  - `TOP_POSITION` matras are positioned above the syllable base.
  - `BOTTOM_POSITION` matras are positioned below syllable base.
  
These positions may also be referred to elsewhere in shaping documents as:

  - _Pre-base_ matras
  - _Post-base_ matras
  - _Above-base_ matras
  - _Below-base_ matras
  
respectively. The `LEFT`, `RIGHT`, `TOP`, and `BOTTOM` designations
corresponds to Unicode's preferred terminology. The _Pre_, _Post_,
_Above_, and _Below_ terminology is used in the official descriptions
of OpenType <abbr title="Glyph Substitution table">GSUB</abbr> and <abbr title="Glyph Positioning table">GPOS</abbr> features. Shaping engines may, internally,
use whichever terminology is preferred.

In addition, dependent-vowel codepoints that are composed of multiple
components will be designated in character tables as having a compound
_mark-placement subclass_, such as `TOP_AND_RIGHT` or `LEFT_AND_RIGHT`. 

However, these multi-part matras are decomposed into separate matra
components during the shaping process. After the decomposition, each
matra component will belong to exactly one of the four basic
_mark-placement subclasses_.

For most mark and dependent-vowel codepoints, the _mark-placement
subclass_ is synonymous with the `Indic Positional Category` defined
in Unicode. However, there are some distinctions, where the defined
category does not fully capture the behavior of the character in the
shaping process. 

### Bengali character tables ###

Separate character tables are provided for the Bengali and Vedic
Extensions blocks as well as for other miscellaneous characters that
are used in `<bng2>` text runs:

  - [Bengali character table](character-tables/character-tables-bengali.md#bengali-character-table)
  - [Vedic Extensions character table](character-tables/character-tables-bengali.md#vedic-extensions-character-table)
  - [Miscellaneous character table](character-tables/character-tables-bengali.md#miscellaneous-character-table)

The tables list each codepoint along with its Unicode general
category, its shaping class, and its mark-placement subclass. The
codepoint's Unicode name and an example glyph are also provided.

For example:

:::{table} Example character table

| Codepoint | Unicode category | Shaping class     | Mark-placement subclass    | Glyph                        |
|:----------|:-----------------|:------------------|:---------------------------|:-----------------------------|
|`U+0981`   | Mark [Mn]        | BINDU             | TOP_POSITION               | &#x0981; Candrabindu         |
| | | | |
|`U+0995`   | Letter           | CONSONANT         | _null_                     | &#x0995; Ka                  |
:::

Codepoints with no assigned meaning are
designated as _unassigned_ in the _Unicode category_ column. 

Assigned codepoints with a _null_ in the _Shaping class_
column evoke no special behavior from the shaping engine.

The _Mark-placement subclass_ column indicates mark-placement
positioning for codepoints in the _Mark_ category. Assigned, non-mark
codepoints have a _null_ in this column and evoke no special
mark-placement behavior. Marks tagged with [Mn] in the _Unicode
category_ column are categorized as non-spacing; marks tagged with
[Mc] are categorized as spacing-combining.

Some codepoints in the tables use a _Shaping class_ that
differs from the codepoint's Unicode _General Category_. The _Shaping
class_ takes precedence during OpenType shaping, as it captures more
specific, script-aware behavior.


#### Special-function codepoints ####

Other important characters that may be encountered when shaping runs
of Bengali text include the dotted-circle placeholder (`U+25CC`), the
zero-width joiner (`U+200D`) and zero-width non-joiner (`U+200C`), and
the no-break space (`U+00A0`).

Each of these is of particular importance to shaping engines, because
these codepoints interact with the shaping engine, the text run, and
the active font, either to mediate non-default shaping behavior or to
relay information about the current shaping process.

The dotted-circle placeholder is frequently used when displaying a
dependent vowel (matra) or a combining mark in isolation. Real-world
text syllables may also use other characters, such as hyphens or dashes,
in a similar placeholder fashion; shaping engines should cope with
this situation gracefully.

Dotted-circle placeholder characters (like any Unicode codepoint) can
appear anywhere in text input sequences and should be rendered
normally. <abbr title="Glyph Positioning table">GPOS</abbr> positioning lookups should attach mark glyphs to dotted
circles as they would to other non-mark characters. As visible glyphs,
dotted circles can also be involved in <abbr title="Glyph Substitution table">GSUB</abbr> substitutions.

In addition to the default input-text handling process, shaping
engines may also insert dotted-circle placeholders into the text
sequence. Dotted-circle insertions are required when a non-spacing
mark or dependent sign is formed with no base character present.

This requirement covers:

  - Dependent signs that are assigned their own individual Unicode
    codepoints (such as most dependent-vowel marks or matras)
  
  - Dependent signs that are formed only by specific sequences of
    other codepoints (such as <samp>"Reph"</samp>)


The zero-width joiner (<abbr title="Zero-Width Joiner">ZWJ</abbr>) is primarily used to prevent the formation
of a conjunct from a <samp>"_Consonant_,Halant,_Consonant_"</samp> sequence. 

  - The sequence <samp>"_Consonant_,Halant,ZWJ,_Consonant_"</samp> blocks the
    formation of a conjunct between the two consonants. 

Note, however, that the <samp>"_Consonant_,Halant"</samp> subsequence in the above
example may still trigger a half-forms feature. To prevent the
application of the half-forms feature in addition to preventing the
conjunct, the zero-width non-joiner (<abbr>ZWNJ</abbr>) must be used instead.

  - The sequence <samp>"_Consonant_,Halant,ZWNJ,_Consonant_"</samp> should produce
    the first consonant in its standard form, followed by an explicit
    <samp>"Halant"</samp>.

A secondary usage of the zero-width joiner is to prevent the formation of
<samp>"Reph"</samp>. 

  - An initial <samp>"Ra,Halant,ZWJ"</samp> sequence should not produce a <samp>"Reph"</samp>,
    even where an initial <samp>"Ra,Halant"</samp> sequence without the zero-width
    joiner would otherwise produce a <samp>"Reph"</samp>.

The <abbr title="Zero-Width Joiner">ZWJ</abbr> and <abbr title="Zero-Width Non Joiner">ZWNJ</abbr> characters are, by definition, non-printing control
characters and have the _Default_Ignorable_ property in the Unicode
Character Database. In standard text-display scenarios, their function
is to signal a request from the user to the shaping engine for some
particular non-default behavior. As such, they are not rendered
visually.

> Note: Naturally, there are special circumstances where a user or
> document might need to request that a <abbr title="Zero-Width Joiner">ZWJ</abbr> or <abbr title="Zero-Width Non Joiner">ZWNJ</abbr> be rendered
> visually, such as when illustrating the OpenType shaping process, or
> displaying Unicode tables.

Because the <abbr title="Zero-Width Joiner">ZWJ</abbr> and <abbr title="Zero-Width Non Joiner">ZWNJ</abbr> are non-printing control characters, they can
be ignored by any portion of a software text-handling stack not
involved in the shaping operations that the <abbr title="Zero-Width Joiner">ZWJ</abbr> and <abbr title="Zero-Width Non Joiner">ZWNJ</abbr> are designed
to interface with. For example, spell-checking or collation functions
will typically ignore <abbr title="Zero-Width Joiner">ZWJ</abbr> and <abbr title="Zero-Width Non Joiner">ZWNJ</abbr>.

Similarly, the <abbr title="Zero-Width Joiner">ZWJ</abbr> and <abbr title="Zero-Width Non Joiner">ZWNJ</abbr> should be ignored by the shaping engine
when matching sequences of codepoints against the backtrack and
lookahead sequences of a font's <abbr title="Glyph Substitution table">GSUB</abbr> or <abbr title="Glyph Positioning table">GPOS</abbr> lookups.

For example:

  - A lookup that substitutes an alternate version of a
    dependent-vowel (matra) glyph when it is preceded by <samp>"Ka,Halant,Tta"</samp>
    should still be applied if the dependent-vowel codepoint is preceded
    by <samp>"Ka,Halant,ZWJ,Tta"</samp> in the text run.

The no-break space (<abbr>NBSP</abbr>) is primarily used to display those
codepoints that are defined as non-spacing (marks, dependent vowels
(matras), below-base consonant forms, and post-base consonant forms)
in an isolated context, as an alternative to displaying them
superimposed on the dotted-circle placeholder. These sequences will
match <samp>"NBSP,ZWJ,Halant,_Consonant_"</samp>, <samp>"NBSP,_mark_"</samp>, or <samp>"NBSP,_matra_"</samp>.

In addition to general punctuation, runs of Bengali text often use the
danda (`U+0964`) and double danda (`U+0965`) punctuation marks from
the Devanagari block.


## The `<bng2>` shaping model ##

Processing a run of `<bng2>` text involves six top-level stages:

1. Identifying syllables and other sequences
2. Initial reordering
3. Applying the basic substitution features from <abbr>GSUB</abbr>
4. Final reordering
5. Applying all remaining substitution features from <abbr>GSUB</abbr>
6. Applying all remaining positioning features from <abbr>GPOS</abbr>


As with other Indic scripts, the initial reordering stage and the
final reordering stage each involve applying a set of several
script-specific rules. The basic substitution features must be applied
to the run in a specific order. The remaining substitution features in
stage five, however, do not have a mandatory order.

Indic scripts follow many of the same shaping patterns, but they
differ in a few critical characteristics that the shaping engine must
track. These include:

  - The position of the base consonant in a syllable.
  
  - The final position of <samp>"Reph"</samp>.
  
  - Whether <samp>"Reph"</samp> must be requested explicitly or if it is formed by
    a specific, implicit sequence.
	
  - Whether the below-base forms feature is applied only to consonants
    before the syllable base, only to consonants after the base
    consonant, or to both.
	
  - The ordering positions for dependent vowels
    (matras). Specifically, right-side, above-base, and below-base
    matras follow different rules in different scripts. 
	All Indic scripts position left-side matras in the same
    manner, in the ordering position `POS_PREBASE_MATRA`. 

With regard to these common variations, Bengali's specific shaping
characteristics include:

  - `BASE_POS_LAST` = The base consonant of a syllable is the last
     consonant, not counting any special final-consonant forms.

  - `REPH_POS_AFTER_SUBJOINED` = <samp>"Reph"</samp> is ordered after all subjoined (i.e.,
     below-base) consonant forms.

  - `REPH_MODE_IMPLICIT` = <samp>"Reph"</samp> is formed by an initial <samp>"Ra,Halant"</samp> sequence.

  - `BLWF_MODE_PRE_AND_POST` = The below-forms feature is applied both to
     pre-base consonants and to post-base consonants.

  - `MATRA_POS_TOP` = _null_  = Unlike most other Indic scripts, Bengali
     does not use any above-base matras. Therefore, this shaping
     characteristic does not apply.

  - `MATRA_POS_RIGHT` = `POS_AFTER_POST` = Right-side matras are
     ordered after all post-base consonant forms.

  - `MATRA_POS_BOTTOM` = `POS_AFTER_SUBJOINED` = Below-base matras are
     ordered after all subjoined (i.e., below-base) consonant forms.

These characteristics determine how the shaping engine must reorder
certain glyphs, how base consonants are determined, and how <samp>"Reph"</samp>
should be encoded within a run of text.

> Note: Unlike most other Indic scripts, Bengali does not use
> above-base matras. Therefore `MATRA_POS_TOP` can be set to _null_.

### Stage 1: Identifying syllables and other sequences ###

A syllable in Bengali consists of a valid orthographic sequence
that may be followed by a "tail" of modifier signs. 

> Note: The Bengali Unicode block enumerates five modifier signs,
> "Candrabindu" (`U+0981`), "Anusvara" (`U+0982`), "Visarga" 
> (`U+0983`), "Avagraha" (`U+09BD`), and "Vedic Anusvara"
> (`U+09FC`). In addition, Sanskrit text written in Bengali may
> include additional signs from Vedic Extensions block.

Each syllable contains exactly one vowel sound. Valid syllables may
begin with either a consonant or an independent vowel. 

If the syllable begins with a consonant, then the consonant that
provides the vowel sound is referred to as the "base" consonant. If
the syllable begins with an independent vowel, that vowel is the
syllable's only vowel sound and serves as the "base". 

> Note: A consonant that is not accompanied by a dependent vowel (matra) sign
> carries the script's inherent vowel sound. This vowel sound is changed
> by a dependent vowel (matra) sign following the consonant.

From the shaping engine's perspective, the main distinction between a
syllable with a base consonant and a syllable with an
independent-vowel base is that a syllable with an independent-vowel
base is less likely to include additional consonants in special forms
and less likely to include dependent vowel signs
(matras). Therefore, in the common case, vowel-based syllables may
involve less reordering, substitution feature applications, and other
processing than consonant-based syllables.

In some languages and orthographies, vowel-based syllables are
not permitted to include additional consonants or matras, and certain
<abbr title="Glyph Substitution table">GSUB</abbr> substitution features do not occur. However, there are often
known exceptions, and real-world text makes no such guarantees. 

> Note: Shaping engines may choose to treat independent-vowel bases 
> like base consonants for the sake of simplicity or code
> reuse.
>
> However, implementations that take this approach should note
> that removing the distinction between base consonants and
> independent-vowel bases entirely may have unintended
> consequences. Making guarantees about the correctness of the results
> or about language-specific tests is out of scope for this document.

Generally speaking, the base consonant is the final consonant of the
syllable and its vowel sound designates the end of the syllable. This
rule is synonymous with the `BASE_POS_LAST` characteristic mentioned
earlier. 

Non-base consonants in a valid syllable will be separated by <samp>"Halant"</samp>
marks. Pre-base consonants will be followed by <samp>"Halant"</samp>, while
post-base consonants will be preceded by <samp>"Halant"</samp>.

	Pre-baseC Halant BaseC Halant Post-baseC
	
The algorithm for correctly identifying the base consonant includes a
test to recognize these sequences and not mis-identify the base
consonant.

All consonants in Bengali can potentially occur in pre-base
position. The <samp>"Halant"</samp> marks on pre-base consonants indicate that they
carry no vowel. Instead, they affect syllable pronunciation by
combining with the base consonant (e.g., "_thr_" or "_spl_").

Three consonants in Bengali are allowed to occur after the base
consonant or syllable base: "Ya", "Ba", and "Ra". When these consonants occur after the
base consonant or syllable base, they take on special forms.

A <samp>"Ya"</samp> after the base consonant or syllable base takes on the <samp>"Yaphala"</samp> form.

> Note: some fonts may also implement the <samp>"Yaphala"</samp> form for a
> post-base "Yya" (`U+09DF`).

A <samp>"Ba"</samp> after the base consonant or syllable bases takes on the below-base <samp>"Baphala"</samp>
form. A <samp>"Ba"</samp> before the base consonant or syllable base will take on the below-base
<samp>"Baphala"</samp> form unless it is the first pre-base consonant in the syllable.

As with other Indic scripts, the consonant <samp>"Ra"</samp> receives special
treatment; in many circumstances it is replaced by one of two combining
mark-like forms. 

  - A <samp>"Ra,Halant"</samp> sequence at the beginning of a syllable is replaced
    with an above-base mark called <samp>"Reph"</samp> (unless the <samp>"Ra"</samp> is the only
    consonant in the syllable). This rule is synonymous with the
    `REPH_MODE_IMPLICIT` characteristic mentioned earlier.

  - A non-initial <samp>"Ra"</samp> before the base consonant or syllable base or a <samp>"Ra"</samp> after the
    base consonant or syllable base takes on the below-base form <samp>"Raphala."</samp>
  
<samp>"Reph"</samp> characters must be reordered after the
syllable-identification stage is complete. 

> Note: `<bng2>` text contains two Unicode codepoints for "Ra."
> `U+09B0` and `U+09F0`. 
>
> `U+09B0` is used in Bengali-language, Manipuri-language, and
> Sanskrit text. `U+09F0` is used in Assamese-language text.
>

> Note: Generally speaking, OpenType fonts will implement support for
> any below-base, post-base, and pre-base-reordering consonant forms
> by including the necessary substitution rules in their `blwf`,
> `pstf`, and `pref` lookups in <abbr title="Glyph Substitution table">GSUB</abbr>.
>
> Consequently, whenever shaping engines need to determine whether or 
> not a given consonant can take on such a special form, the most
> appropriate test is to check if the consonant is included in the
> relevant <abbr title="Glyph Substitution table">GSUB</abbr> lookup. Other implementations are possible, such as
> maintaining static tables of consonants, but checking for <abbr title="Glyph Substitution table">GSUB</abbr>
> support ensures that the expected behavior is implemented in the
> active font, and is therefore the most reliable approach.


In addition to valid syllables, standalone sequences may occur, such
as when an isolated codepoint is shown in example text.

> Note: Foreign loanwords, when written in the Bengali script, may
> not adhere to the syllable-formation rules described above. In
> particular, it is not uncommon to encounter foreign loanwords that
> contain a word-final suffix of consonants.
>
> Nevertheless, such word-final suffixes will be correctly matched by
> the regular expressions listed below. These loanwords are pronounced
> different, which raises issues for potential readers, but the
> character sequences do not affect the shaping process.


Syllables should be identified by examining the run and matching
glyphs, based on their categorization, using regular expressions. 

The following general-purpose Indic-shaping regular expressions can be
used to match Bengali syllables. 

The regular expressions utilize the shaping classes from the tables
above. For the purpose of syllable identification, more general
classes can be used, as defined in the following table. This
simplifies the resulting expressions. 

```markdown
_ra_		= The consonant "Ra" 
_consonant_	= ( `CONSONANT` | `CONSONANT_DEAD` ) - _ra_
_vowel_		= `VOWEL_INDEPENDENT`
_nukta_	  	= `NUKTA`
_halant_	= `VIRAMA`
_zwj_		= `JOINER`
_zwnj_		= `NON_JOINER`
_matra_		= `VOWEL_DEPENDENT` | `PURE_KILLER`
_syllablemodifier_	= `SYLLABLE_MODIFIER` | `BINDU` | `VISARGA` | `GEMINATION_MARK`
_vedicsign_	= `CANTILLATION`
_placeholder_	= `PLACEHOLDER` | `CONSONANT_PLACEHOLDER` | `NUMBER`
_dottedcircle_	= `DOTTED_CIRCLE`
_repha_		= `CONSONANT_PRE_REPHA`
_consonantmedial_	= `CONSONANT_MEDIAL`
_symbol_	= `SYMBOL` | `AVAGRAHA`
_consonantwithstacker_	= `CONSONANT_WITH_STACKER`
_other_		= `OTHER` | `MODIFYING_LETTER`
```


> Note: the _ra_ identification class is mutually exclusive with 
> the _consonant_ class. The union of the _consonant_ and _ra_ classes
> is used in the regular expression elements below in order to
> correctly identify "Ra" characters that do not trigger <samp>"Reph"</samp> or
> <samp>"Rakaar"</samp> shaping behavior.
>
> Note, also, that the cantillation mark "combining Ra" in the
> Devanagari Extended block does _not_ belong to the _ra_
> identification class, and that the other "combining consonant"
> cantillation marks in the Devanagari Extended block do not belong to
> the _consonant_ identification class.

> Note: The _placeholder_ identification class includes codepoints
> that are often used in place of vowels or consonants when a document
> needs to display a matra, mark, or special form in isolation or
> in another context beyond a standard syllable. Examples of
> _placeholder_ codepoints include hyphens and non-breaking
> spaces. Sequences that utilize this approach should be identified as
> "standalone" syllables.
>
> The _placeholder_ identification class also includes numerals, which
> are commonly used as word substitutes within normal text. Examples
> include ordinals (e.g., "4th").

> Note: The _other_ identification class includes codepoints that
> do not interact with adjacent characters for shaping purposes. Even
> though some of these codepoints (such as `MODIFYING_LETTER`) can
> occur within words, they evoke no behavior from the shaping
> engine and do not factor into the regular expressions that
> follow. Therefore, the shaping engine may choose to ignore them
> during syllable identification; they are listed here for completeness.

These identification classes form the bases of the following regular
expression elements:

```markdown
C	= _consonant_ | _ra_
Z	= _zwj_ | _zwnj_
REPH	= (_ra_ _halant_) | _repha_
CN		= C _zwj_? _nukta_?
FORCED_RAKAR	= _zwj_ _halant_ _zwj_ _ra_
S	= _symbol_ _nukta_?
MATRA_GROUP	= Z{0,3} _matra_ _nukta_? (_halant_ | FORCED_RAKAR)?
SYLLABLE_TAIL	= (Z? _syllablemodifier_ _syllablemodifier_? _zwnj_?)? _vedicsign_{0,3}
HALANT_GROUP	= Z? _halant_ (_zwj_ _nukta_?)?
FINAL_HALANT_GROUP	= HALANT_GROUP | (_halant_ _zwnj_)
MEDIAL_GROUP	= _consonantmedial_?
HALANT_OR_MATRA_GROUP	= FINAL_HALANT_GROUP | MATRA_GROUP*)
```

> Note: Practically speaking, shaping engines are highly unlikely to
> encounter more than 4 sequential `(MATRA_GROUP)`
> instances in any real-word syllables. Thus, implementations may
> choose to limit occurrences by limiting the above expressions to a
> finite length, such as `(MATRA_GROUP){0,4}` .


Using the above elements, the following regular expressions define the
possible syllable types:

A consonant-based syllable will match the expression:
```markdown
(_repha_|_consonantwithstacker_)? (CN HALANT_GROUP)* CN MEDIAL_GROUP HALANT_OR_MATRA_GROUP SYLLABLE_TAIL
```

> Note: Practically speaking, shaping engines are highly unlikely to
> encounter more than 4 sequential `(CN HALANT_GROUP)`
> instances in any real-word syllables. Thus, implementations may
> choose to limit occurrences by limiting the above expressions to a
> finite length, such as `(CN HALANT_GROUP){0,4}` .

A vowel-based syllable will match the expression:
```markdown
REPH? _vowel_ _nukta_? (_zwj_ | (HALANT_GROUP CN)* MEDIAL_GROUP HALANT_OR_MATRA_GROUP SYLLABLE_TAIL)
```

> Note: Practically speaking, shaping engines are highly unlikely to
> encounter more than 4 sequential `(HALANT_GROUP CN)`
> instances in any real-word syllables. Thus, implementations may
> choose to limit occurrences by limiting the above expressions to a
> finite length, such as `(HALANT_GROUP CN){0,4}` .

A standalone syllable will match the expression:
```markdown
((_repha_|_consonantwithstacker_)? _placeholder_ | REPH? _dottedcircle_) _nukta_? (HALANT_GROUP CN)* MEDIAL_GROUP HALANT_OR_MATRA_GROUP SYLLABLE_TAIL
```

> Note: Practically speaking, shaping engines are highly unlikely to
> encounter more than 4 sequential `(HALANT_GROUP CN)`
> instances in any real-word syllables. Thus, implementations may
> choose to limit occurrences by limiting the above expressions to a
> finite length, such as `(HALANT_GROUP CN){0,4}` .

> Note: Although they are labeled as "standalone syllables" here,
> many sequences that match the standalone regular expression above
> are instances where a document needs to display a matra, combining
> mark, or special form in isolation. Such sequences might not have
> any significance with regard to the definition of syllables used in
> the language or orthography of the text.

A symbol-based syllable will match the expression:
```markdown
S SYLLABLE_TAIL
```

A broken syllable will match the expression:
```markdown
REPH? _nukta_? (HALANT_GROUP CN)* MEDIAL_GROUP HALANT_OR_MATRA_GROUP SYLLABLE_TAIL
```

> Note: Practically speaking, shaping engines are highly unlikely to
> encounter more than 4 sequential `(HALANT_GROUP CN)`
> instances in any real-word syllables. Thus, implementations may
> choose to limit occurrences by limiting the above expressions to a
> finite length, such as `(HALANT_GROUP CN){0,4}` .

The primary problem involved in shaping broken syllables is the lack
of a syllable base (either a base consonant or an independent
vowel). Without a syllable base, the shaping engine cannot perform
<abbr title="Glyph Positioning table">GPOS</abbr> positioning and other contextual operations that are required
later in the shaping process.

To make up for this limitation, shaping engines should insert a
dotted-circle placeholder (`U+25CC`) character into the text stream
where the missing syllable base was expected to occur. This
placeholder allows the shaping process to proceed on a best-effort
basis at handling the broken-syllable sequence, but making guarantees
about the orthographic correctness or preferred appearance of the
final result is out of scope for this document.

Shaping engines can perform this dotted-circle insertion at any point
after the broken syllable has been recognized and before <abbr title="Glyph Substitution table">GSUB</abbr> features
are applied. However, the best results will likely be attained by
performing the insertion immediately, before proceeding to
stage 2. This will enable the maximum number of <abbr title="Glyph Substitution table">GSUB</abbr> and <abbr title="Glyph Positioning table">GPOS</abbr> features
in the active font to be correctly applied to the text run by ensuring
that all reordering, tagging, and sorting algorithms are executed as
usual.

> Note: In software stacks where other text-handling operations, such
> as Unicode normalization and localization, are performed before the
> text run is passed to the shaping engine, there is a potential for
> the dotted-circle insertion to cause unexpected effects.
>
> For example, if a `ccmp` or `locl` feature substitutes the default
> dotted-circle placeholder glyph with a variant glyph of a different
> size or weight for the (`U+25CC`) codepoint, then any shaping engine
> which relies on another software component to handle that
> functionality must take additional care to ensure consistency.


The expressions above use state-machine syntax from the Ragel
state-machine compiler. The operators represent:

```markdown
a* = zero or more copies of a
b+ = one or more copies of b
c? = optional instance of c
d{n} = exactly n copies of d
d{,n} = zero to n copies of d
d{n,} = n or more copies of d
d{n,m} = n to m copies of d
!e = not e
^f = character-level not f
g.h = concatenation of g and h
i|j = i or j
( ) = grouping of expression elements
```


After the syllables have been identified, each of the subsequent 
shaping stages occurs on a per-syllable basis.

### Stage 2: Initial reordering ###

The initial reordering stage is used to relocate glyphs from the
phonetic order in which they occur in a run of text to the
orthographic order in which they are presented visually.

> Note: Primarily, this means moving dependent-vowel (matra) glyphs, 
> <samp>"Ra,Halant"</samp> glyph sequences, and other consonants that take special
> treatment in some circumstances. <samp>"Ba"</samp>, <samp>"Ta"</samp>, and <samp>"Ya"</samp> occasionally
> take on special forms, depending on their position in the syllable.
>
> These reordering moves are mandatory. The final-reordering stage
> may make additional moves, depending on the text and on the features
> implemented in the active font.

The syllable should be processed by tagging each glyph with its
intended position based on its ordering category. After all glyphs
have been tagged, the entire syllable should be sorted in stable order,
so that glyphs of the same ordering category remain in the same
relative position with respect to each other.

The final sort order of the ordering categories should be:


	POS_RA_TO_BECOME_REPH
	POS_PREBASE_MATRA
	POS_PREBASE_CONSONANT

	POS_SYLLABLE_BASE
	POS_AFTER_MAIN

	POS_ABOVEBASE_CONSONANT

	POS_BEFORE_SUBJOINED
	POS_BELOWBASE_CONSONANT
	POS_AFTER_SUBJOINED

	POS_BEFORE_POST
	POS_POSTBASE_CONSONANT
	POS_AFTER_POST

	POS_FINAL_CONSONANT
	POS_SMVD


This sort order enumerates all of the possible final positions to
which a codepoint might be reordered, across all of the Indic
scripts. It includes some ordering categories not utilized in
Bengali. 

The basic positions (left to right) are <samp>"Reph"</samp>
(`POS_RA_TO_BECOME_REPH`), dependent vowels (matras) and consonants
positioned before the base consonant or syllable base
(`POS_PREBASE_MATRA` and `POS_PREBASE_CONSONANT`), the base consonant
or syllable base (`POS_SYLLABLE_BASE`), above-base consonants
(`POS_ABOVEBASE_CONSONANT`), below-base consonants
(`POS_BELOWBASE_CONSONANT`), consonants positioned after the base
consonant or syllable base (`POS_POSTBASE_CONSONANT`), syllable-final
consonants (`POS_FINAL_CONSONANT`), and syllable-modifying or Vedic
signs (`POS_SMVD`).

In addition, several secondary positions are defined to handle various
reordering rules that deal with relative, rather than absolute,
positioning. `POS_AFTER_MAIN` means that a character must be
positioned immediately after the syllable base. `POS_BEFORE_SUBJOINED`
and `POS_AFTER_SUBJOINED` mean that a character must be positioned
before or after any below-base consonants, respectively. Similarly,
`POS_BEFORE_POST` and `POS_AFTER_POST` mean that a character must be
positioned before or after any post-base consonants, respectively. 

For shaping-engine implementers, the names used for the ordering
categories matter only in that they are unambiguous. 

For a definition of the "base" consonant, refer to step 2.1, which
follows.

#### Stage 2, step 1: Base consonant ####

The first step is to determine the base consonant of the syllable, if
there is one, and tag it as `POS_SYLLABLE_BASE`.

In a syllable that begins with an independent vowel, the independent
vowel will always serve as the syllable base, and it should be tagged
as `POS_SYLLABLE_BASE`. The shaping engine can then proceed to step 2.

In a standalone sequence or other syllable that begins with a placeholder
or dotted circle, the placeholder or dotted circle will always serve
as the syllable base, and it should be tagged as
`POS_SYLLABLE_BASE`. The shaping engine can then proceed to step 2.

In a syllable that begins with a consonant, the shaping engine must
determine the base consonant by a script-specific algorithm.

> Note: Shaping engines may choose to treat independent-vowel bases 
> like base consonants for the sake of simplicity or code
> reuse.
>
> However, implementations that take this approach should note
> that removing the distinction between base consonants and
> independent-vowel bases entirely may have unintended
> consequences. Making guarantees about the correctness of the results
> or about language-specific tests is out of scope for this document.

The base consonant is defined as the consonant in a consonant-based
syllable that carries the syllable's vowel sound. That vowel sound
will either be provided by the script's inherent vowel (in which case
it is not written with a separate character) or the sound will be designated
by the addition of a dependent-vowel (matra) sign.

<!--- >
> Because vowel-based syllables will not include consonants and
> because independent vowels do not take on special forms or require
> reordering, many of the steps that follow will involve no
> work for a vowel-based syllable. However, vowel-based syllables must
> still be sorted and their marks handled correctly, and <abbr title="Glyph Substitution table">GSUB</abbr> and <abbr title="Glyph Positioning table">GPOS</abbr>
> lookups must be applied. These steps of the shaping process follow
> the same rules that are employed for consonant-based syllables.
--->

While performing the base-consonant search, shaping engines may
also encounter special-form consonants, including below-base
consonants and post-base consonants. Each of these special-form
consonants must also be tagged (`POS_BELOWBASE_CONSONANT`,
`POS_POSTBASE_CONSONANT`, respectively). 

Any pre-base-reordering consonant (such as a pre-base-reordering <samp>"Ra"</samp>)
encountered during the base-consonant search must be tagged
`POS_POSTBASE_CONSONANT`. 
 
> Note: Shaping engines may choose any method to identify consonants that
> have below-base, post-base, or pre-base-reordering forms while
> executing the above algorithm. For example, one implementation may
> choose to maintain a static table of special-form consonants to
> compare against the text run. Another implementation might examine
> the active font to see if it includes a `blwf`, `pstf`, or `pref`
> lookup in the <abbr title="Glyph Substitution table">GSUB</abbr> table that affects the consonants encountered in
> the syllable.
>
> However, checking for <abbr title="Glyph Substitution table">GSUB</abbr> support ensures that the expected
> behavior is implemented in the active font, and is therefore the
> most reliable approach.


The algorithm for determining the base consonant is

  - If the syllable starts with <samp>"Ra,Halant"</samp> and the syllable contains
    more than one consonant, exclude the starting <samp>"Ra"</samp> from the list of
    consonants to be considered. 
  - Starting from the end of the syllable, move backwards until a consonant is found.
      * If the consonant is the first consonant, stop.
      * If the consonant is preceded by the sequence <samp>"Halant,ZWJ"</samp>, stop.
      * If the consonant has a below-base form, tag it as
        `POS_BELOWBASE_CONSONANT`, then move to the previous consonant. 
      * If the consonant has a post-base form, tag it as
        `POS_POSTBASE_CONSONANT`, then move to the previous consonant. 
      * If the consonant is a pre-base-reordering <samp>"Ra"</samp>, tag it as
        `POS_POSTBASE_CONSONANT`, then move to the previous consonant. 
      * If none of the above conditions is true, stop.
  - The consonant stopped at will be the base consonant.

> Note: The algorithm is designed to work for all Indic
> scripts. However, Bengali does not utilize pre-base-reordering <samp>"Ra"</samp>.

Bengali includes one post-base consonant. 

  - The sequence <samp>"Halant,Ya"</samp> (`U+09CD`,`U+09AF`)  triggers
    the <samp>"Yaphala"</samp> form. <samp>"Yaphala"</samp> behaves like a modifier to the
    pronunciation of the preceding vowel, despite the fact that it is
    formed from a consonant.

:::{figure-md}
![Yaphala composition](/images/bengali/bengali-yaphala.svg "Yaphala composition"){.shaping-demo .inline-svg .greyscale-svg #bengali-yaphala}

Yaphala composition
:::

```{svg-color-toggle-button} bengali-yaphala
```


> Note: some fonts may also implement the <samp>"Yaphala"</samp> post-base form for
> <samp>"Halant,Yya"</samp> (`U+09CD`,`U+09DF`).

Bengali includes two below-base consonant forms:

  - <samp>"Halant,Ra"</samp> (after the syllable base) and <samp>"Ra,Halant"</samp> (in a
    non-syllable-initial position) take on the <samp>"Raphala"</samp> form.
  - <samp>"Ba,Halant"</samp> (before the syllable base) and <samp>"Halant,Ba"</samp> (after the
    syllable base) take on the <samp>"Baphala"</samp> form.
	

:::{figure-md}
![Raphala composition](/images/bengali/bengali-raphala.svg "Raphala composition"){.shaping-demo .inline-svg .greyscale-svg #bengali-raphala}

Raphala composition
:::

```{svg-color-toggle-button} bengali-raphala
```


:::{figure-md}
![Baphala composition](/images/bengali/bengali-baphala.svg "Baphala composition"){.shaping-demo .inline-svg .greyscale-svg #bengali-baphala}

Baphala composition
:::

```{svg-color-toggle-button} bengali-baphala
```


> Note: Because Bengali employs the `BLWF_MODE_PRE_AND_POST` shaping
> characteristic, consonants with below-base special forms may occur
> before or after the syllable base.
> 
> During the base-consonant search, only the <samp>"Halant,_consonant_"</samp> 
> pattern following the syllable base for these below-base forms will
> be encountered. Step 2.5 below ensures that the <samp>"_consonant_,Halant"</samp>
> pattern preceding the base consonant or syllable base for these below-base forms will
> also be tagged correctly.


#### Stage 2, step 2: Matra decomposition ####

Second, any two-part dependent vowels (matras) must be decomposed
into their left-side and right-side components. Bengali has two
two-part dependent vowels, "O" (`U+09CB`) and "Au" (`U+09CC`). Each
has a canonical decomposition, so this step is unambiguous. 

> "O" (`U+09CB`) decomposes to "`U+09C7`,`U+09BE`"
>
> "Au" (`U+09CC`) decomposes to "`U+09C7`,`U+09D7`"

Because this decomposition is a character-level operation, the shaping
engine may choose to perform it earlier, such as during an initial
Unicode-normalization stage. However, all such decompositions must be
completed before the shaping engine begins step three, below.

:::{figure-md}
![Two-part matra decomposition](/images/bengali/bengali-matra-decompose.svg "Two-part matra decomposition"){.shaping-demo .inline-svg .greyscale-svg #bengali-matra-decompose}

Two-part matra decomposition
:::

```{svg-color-toggle-button} bengali-matra-decompose
```

#### Stage 2, step 3: Tag matras ####

Third, all left-side dependent-vowel (matra) signs, including those that
resulted from the preceding decomposition step, must be tagged to be
moved to the beginning of the syllable, with `POS_PREBASE_MATRA`.

All right-side dependent-vowel (matra) signs are tagged
`POS_AFTER_POST`.

All below-base dependent-vowel (matra) signs are tagged
`POS_AFTER_SUBJOINED`.

For simplicity, shaping engines may choose to tag single-part matras
in an earlier text-processing step, using the information in the
_Mark-placement subclass_ column of the character tables. It is
critical at this step, however, that all decomposed matras are also
correctly tagged before proceeding to the next step.

#### Stage 2, step 4: Adjacent marks ####

Fourth, any subsequences of marks that include a <samp>"Nukta"</samp> and a
<samp>"Halant"</samp> or Vedic sign must be reordered so that the <samp>"Nukta"</samp> appears
first.

This means that the subsequence <samp>"Halant,Nukta"</samp> is reordered to
<samp>"Nukta,Halant"</samp> and that the subsequence <samp>"_Vedic_sign_,Nukta"</samp> is
reordered to <samp>"Nukta,_Vedic_sign_"</samp>.

For subsequences of affected marks that are longer than two, the
reordering operation must be repeated until the <samp>"Nukta"</samp> is the first
character in the subsequence. No other marks in the subsequence
should be reordered.

This order is canonical in Unicode and is required so that
<samp>"_consonant_,Nukta"</samp> substitution rules from <abbr title="Glyph Substitution table">GSUB</abbr> will be correctly
matched later in the shaping process.


> Note: Bengali includes the consonant "Yya" (`U+09DF`), which is
> canonically equivalent to the sequence <samp>"Ya,Nukta"</samp>
> (`U+09AF`,`U+09BC`). "Ya" can also take on the post-base <samp>"Yaphala"</samp>
> form when it occurs in the sequence <samp>"`SYLLABLE_BASE`,Halant,Ya"</samp>.
>
> Consequently, shaping engines that encounter a <samp>"Ya,Nukta"</samp>
> sequence may wish to recompose that sequence to <samp>"Yya"</samp> earlier than
> other nukta-variant substitutions, as a safeguard
> against the decomposed <samp>"Ya"</samp> unintentionally triggering a <samp>"Yaphala"</samp>
> substitution during <abbr title="Glyph Substitution table">GSUB</abbr> feature application (if the sequence in
> question happens to match the <samp>"Yaphala"</samp> substitution rule as well as
> the <samp>"Yya"</samp> substitution rule).
> 
> A well-behaved font should be expected to include explicit <samp>"Yya"</samp> and
> <samp>"Yaphala"</samp> substitution rules that do not trigger unexpected results,
> but there is no guarantee that real-world fonts will be well-behaved
> in this regard.


#### Stage 2, step 5: Pre-base consonants ####

Fifth, consonants that occur before the syllable base must be
tagged. Excluding initial <samp>"Ra,Halant"</samp> sequences that will become <samp>"Reph"</samp>s:

  - If the consonant has a below-base form, tag it as
          `POS_BELOWBASE_CONSONANT`. 
  - Otherwise, tag it as `POS_PREBASE_CONSONANT`.
  
> Note: Shaping engines may choose any method to identify consonants that
> have below-base, post-base, or pre-base-reordering forms while
> executing the above algorithm. For example, one implementation may
> choose to maintain a static table of special-form consonants to
> compare against the text run. Another implementation might examine
> the active font to see if it includes a `blwf`, `pstf`, or `pref`
> lookup in the <abbr title="Glyph Substitution table">GSUB</abbr> table that affects the consonants encountered in
> the syllable.
>
> However, checking for <abbr title="Glyph Substitution table">GSUB</abbr> support ensures that the expected
> behavior is implemented in the active font, and is therefore the
> most reliable approach.

Bengali includes two below-base consonant forms:

  - <samp>"Halant,Ra"</samp> (after the syllable base) and <samp>"Ra,Halant"</samp> (in a
    non-syllable-initial position) take on the <samp>"Raphala"</samp> form.
  - <samp>"Ba,Halant"</samp> (before the syllable base) and <samp>"Halant,Ba"</samp> (after the
    syllable base) take on the <samp>"Baphala"</samp> form.
	

:::{figure-md}
![Raphala composition](/images/bengali/bengali-raphala-1.svg "Raphala composition"){.shaping-demo .inline-svg .greyscale-svg #bengali-raphala-1}

Raphala composition
:::

```{svg-color-toggle-button} bengali-raphala-1
```

:::{figure-md}
![Baphala composition](/images/bengali/bengali-baphala-1.svg "Baphala composition"){.shaping-demo .inline-svg .greyscale-svg #bengali-baphala-1}

Baphala composition
:::

```{svg-color-toggle-button} bengali-baphala-1
```


> Note: Because Bengali employs the `BLWF_MODE_PRE_AND_POST` shaping
> characteristic, consonants with below-base special forms may occur
> before or after the syllable base. 
> 
> During the base-consonant search in 2.1, any instances of the
> <samp>"Halant,_consonant_"</samp>  pattern following the syllable base for these
> below-base forms will be encountered. The tagging in this step
> ensures that the <samp>"_consonant_,Halant"</samp> pattern preceding the syllable
> base for these below-base forms will also be tagged correctly.


#### Stage 2, step 6: Reph ####

Sixth, initial <samp>"Ra,Halant"</samp> sequences that will become <samp>"Reph"</samp>s must be tagged with
`POS_RA_TO_BECOME_REPH`.

> Note: an initial <samp>"Ra,Halant"</samp> sequence will always become a <samp>"Reph"</samp>
> unless the <samp>"Ra"</samp> is the only consonant in the syllable.

#### Stage 2, step 7: Final consonants ####

Seventh, all final consonants must be tagged. Consonants that occur
after the syllable base _and_ after a dependent vowel (matra) sign
must be tagged with  `POS_FINAL_CONSONANT`.

> Note: Final consonants occur only in Sinhala and should not be
> expected in `<bng2>` text runs. This step is included here to
> maintain compatibility across Indic scripts.


<!---  Not sure about Yya.... --->
	
#### Stage 2, step 8: Mark tagging ####

Eighth, all marks must be tagged. 

> Note: In this step, joiner and non-joiner characters must also be
> tagged according to the same rules given for marks, even though
> these characters are not categorized as marks in Unicode.

Marks in the `BINDU`, `VISARGA`, `AVAGRAHA`, `CANTILLATION`,
`SYLLABLE_MODIFIER`, `GEMINATION_MARK`, and `SYMBOL` categories should
be tagged with `POS_SMVD`. 

All <samp>"Nukta"</samp>s must be tagged with the same positioning tag as the
preceding consonant, independent vowel, placeholder, or dotted circle.

All remaining marks (not in the `POS_SMVD` category and not <samp>"Nukta"</samp>s)
must be tagged with the same positioning tag as the closest non-mark
character the mark has affinity with, so that they move together 
during the sorting step.

There are two possible cases: those marks before the syllable base
and those marks after the syllable base. In addition, an exception is
made for <samp>"Halant"</samp> marks that follow a left-side (pre-base) matra.

  1. Initially, all remaining marks should be tagged with the same
	 positioning tag as the closest preceding consonant.

  2. For each consonant after the syllable base (such as post-base
	 consonants, below-base consonants, or final consonants), all
	 remaining marks located between that current consonant and any
	 previous consonant should be tagged with the same positioning tag as
	 the current (later) consonant.
  
     In other words, all consonants preceding the syllable base "own" the
	 marks that follow them, while all consonants after the syllable base
	 "own" the marks that come before them. When a syllable does not have
	 any consonants after the syllable base, the syllable base should
	 "own" all the marks that follow it.
  
  3. Finally, <samp>"Halant"</samp> marks that follow a left-side dependent vowel
     (matra) should _not_ be tagged with the left-side matra's
     positioning tag. Instead, the <samp>"Halant"</samp> should be tagged with the
     positioning tag of the non-mark character preceding the left-side
     matra. This prevents the <samp>"Halant"</samp> mark from being moved with the
     left-side matra when the syllable is sorted.


#### Stage 2, step 9: Sort syllable ####

With these steps completed, the syllable can be sorted into the final
sort order as listed at the beginning of stage 2.

The glyphs in the syllable should be sorted in stable order,
so that glyphs of the same ordering category remain in the same
relative position with respect to each other.


#### Stage 2, step 10: Flag sequences for possible feature applications ####

With the initial reordering complete, those glyphs in the syllable that
may have <abbr title="Glyph Substitution table">GSUB</abbr> or <abbr title="Glyph Positioning table">GPOS</abbr> features applied in stages 3, 5, and 6 should be
flagged for each potential feature. 

This flagging is preliminary; the set of potential features varies
between different scripts and which features are supported varies
between fonts. It is also possible that the application of
one feature on a glyph sequence will perform a substitution that makes
a later feature no longer applicable to the updated sequence.

Consequently, the flagging must be completed before shaping proceeds
to the stages during which features are applied.

Some shaping features, such as `locl`, can potentially apply to any
glyphs. Therefore it is not necessary to maintain a separate flag for
these features in the bitmask (or other data structure) used to track
the flags -- although shaping engines may do so if desired.

The sequences to flag are summarized in the list below; a full
description of each feature's function and interpretation is provided
in <abbr title="Glyph Substitution table">GSUB</abbr> and <abbr title="Glyph Positioning table">GPOS</abbr> application stages that follow.

  - `nukt` should match <samp>"_Consonant_,Nukta"</samp> sequences
  - `akhn` should match <samp>"Ka,Halant,Ssa"</samp> and <samp>"Ja,Halant,Nya"</samp>
  - `rphf` should match initial <samp>"Ra,Halant"</samp> sequences but _not_ match
            initial <samp>"Ra,Halant,ZWJ"</samp> sequences
  - `blwf` should match <samp>"Halant,Ra"</samp> and <samp>"Halant,Ba"</samp> in
            post-base positions and <samp>"Ra,Halant"</samp> and
            <samp>"Ba,Halant"</samp> in non-initial pre-base positions
  - `half` should match <samp>"_Consonant_,Halant"</samp> in pre-base position but
           _not_ match <samp>"Ra,Halant"</samp> sequences flagged for `rphf` and
           _not_ match <samp>"_Consonant_,Halant,ZWNJ,_Consonant_"</samp> sequences
  - `pstf` should match <samp>"Halant,Ya"</samp> in post-base position
  - `vatu` should match <samp>"_Consonant_,Halant,Ra"</samp>
  - `cjct` should match <samp>"_Consonant_,Halant,_Consonant_"</samp> but _not_
            match <samp>"_Consonant_,Halant,ZWJ,_Consonant_"</samp> or
            <samp>"_Consonant_,Halant,ZWNJ,_Consonant_"</samp>


### Stage 3: Applying the basic substitution features from <abbr>GSUB</abbr> ###

The basic-substitution stage applies mandatory substitution features
using the rules in the font's <abbr title="Glyph Substitution table">GSUB</abbr> table. In preparation for this
stage, glyph sequences should be flagged for possible application 
of <abbr title="Glyph Substitution table">GSUB</abbr> features in stage 2, step 10.

The order in which these substitutions must be performed is fixed for
all Indic scripts:

	locl
	nukt
	akhn
	rphf 
	rkrf (not used in Bengali)
	pref (not used in Bengali)
	blwf 
	abvf (not used in Bengali)
	half
	pstf
	vatu
	cjct
	cfar (not used in Bengali)

#### Stage 3, step 1: locl ####

The `locl` feature replaces default glyphs with any language-specific
variants, based on examining the language setting of the text run.

> Note: Strictly speaking, the use of localized-form substitutions is
> not part of the shaping process, but of the localization process,
> and could take place at an earlier point while handling the text
> run. However, shaping engines are expected to complete the
> application of the `locl` feature before applying the subsequent
> <abbr title="Glyph Substitution table">GSUB</abbr> substitutions in the following steps.

#### Stage 3, step 2: nukt ####

The `nukt` feature replaces <samp>"_Consonant_,Nukta"</samp> sequences with a
precomposed nukta-variant of the consonant glyph. 

The context defined for a `nukt` feature is:

:::{table} `nukt` feature context

| Backtrack     | Matching sequence             | Lookahead     |
|:--------------|:------------------------------|:--------------|
| _none_        | `_consonant_`(full),`_nukta_` | _none_        |

:::

:::{figure-md}
![Nukta composition](/images/bengali/bengali-nukt.svg "Nukta composition"){.shaping-demo .inline-svg .greyscale-svg #bengali-nukt}

Nukta composition
:::

```{svg-color-toggle-button} bengali-nukt
```

> Note: Bengali includes the consonant "Yya" (`U+09DF`), which is
> canonically equivalent to the sequence <samp>"Ya,Nukta"</samp>
> (`U+09AF`,`U+09BC`). <samp>"Ya"</samp> can also take on the post-base <samp>"Yaphala"</samp>
> form when it occurs in the sequence <samp>"`SYLLABLE_BASE`,Halant,Ya"</samp>.
>
> Consequently, shaping engines that encounter a <samp>"Ya,Nukta"</samp>
> sequence may wish to recompose that sequence to <samp>"Yya"</samp> earlier than
> other nukta-variant substitutions, as a safeguard
> against the decomposed <samp>"Ya"</samp> unintentionally triggering a <samp>"Yaphala"</samp>
> substitution during <abbr title="Glyph Substitution table">GSUB</abbr> feature application (if the sequence in
> question happens to match the <samp>"Yaphala"</samp> substitution rule as well as
> the <samp>"Yya"</samp> substitution rule).
> 
> A well-behaved font should be expected to include explicit <samp>"Yya"</samp> and
> <samp>"Yaphala"</samp> substitution rules that do not trigger unexpected results,
> but there is no guarantee that real-world fonts will be well-behaved
> in this regard.


#### Stage 3, step 3: akhn ####

The `akhn` feature replaces two specific sequences with required ligatures. 

  - <samp>"Ka,Halant,Ssa"</samp> is substituted with the <samp>"KSsa"</samp> ligature. 
  - <samp>"Ja,Halant,Nya"</samp> is substituted with the <samp>"JNya"</samp> ligature. 
  
These sequences can occur anywhere in a syllable. The <samp>"KSsa"</samp> and
<samp>"JNya"</samp> characters have orthographic status equivalent to full
consonants in some languages, and fonts may have `cjct` substitution
rules designed to match them in subsequences. Therefore, this
feature must be applied before all other many-to-one substitutions.

The context defined for an `akhn` feature is:

:::{table} `akhn` feature context

| Backtrack     | Matching sequence           | Lookahead     |
|:--------------|:----------------------------|:--------------|
| _none_        | `AKHAND_CONSONANT_SEQUENCE` | _none_        |
:::

:::{figure-md}
![KSsa ligation](/images/bengali/bengali-akhn-kssa.svg "KSsa ligation"){.shaping-demo .inline-svg .greyscale-svg #bengali-akhn-kssa}

KSsa ligation
:::

```{svg-color-toggle-button} bengali-akhn-kssa
```

:::{figure-md}
![JNya ligation](/images/bengali/bengali-akhn-jnya.svg "JNya ligation"){.shaping-demo .inline-svg .greyscale-svg #bengali-akhn-jnya}

JNya ligation
:::

```{svg-color-toggle-button} bengali-akhn-jnya
```

#### Stage 3, step 4: rphf ####

The `rphf` feature replaces initial <samp>"Ra,Halant"</samp> sequences with the
<samp>"Reph"</samp> glyph.

  - An initial <samp>"Ra,Halant,ZWJ"</samp> sequence, however, must not be flagged for
    the `rphf` substitution.


The context defined for a `rphf` feature is:
    
:::{table} `rphf` feature context

| Backtrack        | Matching sequence       | Lookahead     |
|:-----------------|:------------------------|:--------------|
| `SYLLABLE_START` | "Ra"(full),`_halant_`   | _none_        |

:::


:::{figure-md}
![Reph composition](/images/bengali/bengali-rphf.svg "Reph composition with common Ra"){.shaping-demo .inline-svg .greyscale-svg #bengali-rphf}

Reph composition with common "Ra"
:::

```{svg-color-toggle-button} bengali-rphf
```

:::{figure-md}
![Reph composition](/images/bengali/bengali-rphf-as.svg "Reph composition with Assamese Ra"){.shaping-demo .inline-svg .greyscale-svg #bengali-rphf-as}

Reph composition with Assamese "Ra"
:::

```{svg-color-toggle-button} bengali-rphf-as
```

#### Stage 3, step 5: rkrf ####

> This feature is not used in Bengali.

#### Stage 3, step 6: pref ####

> This feature is not used in Bengali.

<!--- 3.5: The `pref` feature replaces pre-base-consonant glyphs with -->
<!--any special forms. --->

#### Stage 3, step 7: blwf ####

The `blwf` feature replaces below-base-consonant glyphs with any
special forms. Bengali includes two below-base consonant
forms:

  - <samp>"Halant,Ra"</samp> (after the syllable base) and <samp>"Ra,Halant"</samp> (in a
    non-syllable-initial position) take on the <samp>"Raphala"</samp> form.
  - <samp>"Ba,Halant"</samp> (before the syllable base) and <samp>"Halant,Ba"</samp> (after the
    syllable base) take on the <samp>"Baphala"</samp> form. 

Because Bengali incorporates the `BLWF_MODE_PRE_AND_POST` shaping
characteristic, any pre-base consonants and any post-base consonants
may potentially match a `blwf` substitution; therefore, both cases must
be flagged for comparison. Note that this is not necessarily the case in other
Indic scripts that use a different `BLWF_MODE_` shaping
characteristic. 


:::{figure-md}
![Raphala composition](/images/bengali/bengali-raphala-2.svg "Raphala composition"){.shaping-demo .inline-svg .greyscale-svg #bengali-raphala-2}

Raphala composition
:::

```{svg-color-toggle-button} bengali-raphala-2
```

:::{figure-md}
![Baphala composition](/images/bengali/bengali-baphala-2.svg "Baphala composition"){.shaping-demo .inline-svg .greyscale-svg #bengali-baphala-2}

Baphala composition
:::

```{svg-color-toggle-button} bengali-baphala-2
```

#### Stage 3, step 8: abvf ####

> This feature is not used in Bengali.

#### Stage 3, step 9: half ####

The `half` feature replaces <samp>"_Consonant_,Halant"</samp> sequences before the
base consonant or syllable base with "half forms" of the consonant
glyphs.

In the most common case, this substitution applies to
<samp>"_Consonant_,Halant"</samp> sequences that are followed by another
_Consonant_.

In addition, a sequence matching <samp>"_Consonant_,Halant,ZWJ"</samp> must also be
flagged for potential `half` substitutions.

> Note: The presence of the <samp>"ZWJ"</samp> at the end of the sequence means
> that the sequence may match the regular-expression test in stage 1
> as the end of a syllable, even without being followed by a base
> consonant or syllable base.
>
> The fact that the regular-expression tests identify a syllable break
> after the <samp>"_Consonant_,Halant,ZWJ"</samp> is a byproduct of OpenType
> shaping and Unicode encoding, however, and might not have any
> significance with regard to the definition of syllables used in the
> language or orthography of the text.

There are three exceptions to the default behavior, for which
the shaping engine must test:

  - Initial <samp>"Ra,Halant"</samp> sequences, which should have been flagged for
    the `rphf` feature earlier, must not be flagged for potential
    `half` substitutions.

  - Non-initial <samp>"Ra,Halant"</samp> and <samp>"Ba,Halant"</samp> sequences, which should
    have been flagged for the `rkrf` or `blwf` features earlier, must
    not be flagged for potential `half` substitutions.

  - A sequence matching <samp>"_Consonant_,Halant,ZWNJ,_Consonant_"</samp> must not be
    flagged for potential `half` substitutions.

:::{figure-md}
![Half-form formation](/images/bengali/bengali-half-ka.svg "Half-form formation"){.shaping-demo .inline-svg .greyscale-svg #bengali-half-ka}

Half-form formation
:::

```{svg-color-toggle-button} bengali-half-ka
```

#### Stage 3, step 10: pstf ####

The `pstf` feature replaces post-base-consonant glyphs with any special forms.


:::{figure-md}
![Yaphala composition](/images/bengali/bengali-yaphala-1.svg "Yaphala formation"){.shaping-demo .inline-svg .greyscale-svg #bengali-yaphala-1}

Yaphala composition
:::

```{svg-color-toggle-button} bengali-yaphala-1
```

#### Stage 3, step 11: vatu ####

The `vatu` feature replaces certain sequences with "Vattu variant"
forms. 

"Vattu variants" are formed from glyphs followed by <samp>"Raphala"</samp>
(the below-base form of "Ra"); therefore, this feature must be applied after
the `blwf` feature.

The context defined for a `vatu` feature is:
    
:::{table} `vatu` feature context

| Backtrack        | Matching sequence       | Lookahead     |
|:-----------------|:------------------------|:--------------|
| _none_           | `_consonant_`,"Raphala" | _none_        |

:::


:::{figure-md}
![Vattu variant ligation](/images/bengali/bengali-vatu.svg "Vattu variant ligation"){.shaping-demo .inline-svg .greyscale-svg #bengali-vatu}

Vattu variant ligation
:::

```{svg-color-toggle-button} bengali-vatu
```

#### Stage 3, step 12: cjct ####

The `cjct` feature replaces sequences of adjacent consonants with
conjunct ligatures. These sequences must match <samp>"_Consonant_,Halant,_Consonant_"</samp>.

A sequence matching <samp>"_Consonant_,Halant,ZWJ,_Consonant_"</samp> or
<samp>"_Consonant_,Halant,ZWNJ,_Consonant_"</samp> must not be flagged to form a conjunct.

> Note: The presence of the <samp>"ZWJ"</samp> in a
> <samp>"_Consonant_,Halant,ZWJ,_Consonant_"</samp> sequence should automatically
> inhibit any `cjct` feature rules from matching the sequence as valid
> input, and thus prevent the `cjct` substitution from being applied.

> Note: The presence of the <samp>"ZWNJ"</samp> in a
> <samp>"_Consonant_,Halant,ZWNJ,_Consonant_"</samp> sequence means that the
> <samp>"_Consonant_,Halant,ZWNJ"</samp> subsequence will match the
> regular-expression test in stage 1 as the end of a syllable.
> 
> Because OpenType shaping features in `<bng2>` are defined as
> applying only within an individual syllable, this means that the
> presence of the <samp>"ZWNJ"</samp> will automatically prevent the application of
> a `cjct` feature by triggering the identification of a syllable
> break between the two consonants.
>
> The fact that the regular-expression tests identify a syllable break
> after the <samp>"_Consonant_,Halant,ZWNJ"</samp> is a byproduct of OpenType
> shaping and Unicode encoding, however, and might not have any
> significance with regard to the definition of syllables used in the
> language or orthography of the text.
>
> Note, also: The presence of the <samp>"ZWJ"</samp> means that a
> <samp>"_Consonant_,Halant,ZWJ"</samp> sequence may match the regular-expression
> test in stage 1 as the end of a syllable, even without being
> followed by a base consonant or syllable base. By definition,
> however, a <samp>"_Consonant_,Halant,ZWJ"</samp> syllable identified in stage 1
> cannot also include a <samp>"_Consonant_"</samp> after the <abbr title="Zero-Width Joiner">ZWJ</abbr>.

The font's <abbr title="Glyph Substitution table">GSUB</abbr> rules might be implemented so that `cjct`
substitutions apply to half-form consonants; therefore, this feature
must be applied after the `half` feature. 


:::{figure-md}
![Conjunct ligation](/images/bengali/bengali-cjct.svg "Conjunct ligation"){.shaping-demo .inline-svg .greyscale-svg #bengali-cjct}

Conjunct ligation
:::

```{svg-color-toggle-button} bengali-cjct
```

#### Stage 3, step 13: cfar ####

> This feature is not used in Bengali.


### Stage 4: Final reordering ###

The final reordering stage repositions marks, dependent-vowel (matra)
signs, and <samp>"Reph"</samp> glyphs to the appropriate location with respect to
the base consonant or syllable base. Because multiple substitutions
may have occurred during the application of the basic-shaping features
in the preceding stage, these repositioning moves could not be
performed during the initial reordering stage.

Like the initial reordering stage, the steps involved in this stage
occur on a per-syllable basis.

<!--- Check that classifications have not been mangled. If the -->
<!--character is a Halant AND a ligature was formed AND a multiple
substitution was performed, restore the classification to VIRAMA
because it was almost certainly lost in the preceding <abbr title="Glyph Substitution table">GSUB</abbr> stage.
--->

#### Stage 4, step 1: Base consonant ####

The final reordering stage, like the initial reordering stage, begins
with determining the syllable base of each syllable, following the
same algorithm used in stage 2, step 1.

In a syllable that begins with an independent vowel, the independent
vowel will always serve as the syllable base. In a standalone sequence or
other syllable that begins with a placeholder or a dotted circle, the
placeholder or dotted circle will always serve as the syllable base.

In a syllable that begins with a consonant, the shaping engine must
repeat the base-consonant search algorithm used in stage 2, step 1.

The codepoint of the underlying base consonant or syllable base will
not change between the search performed in stage 2, step 1, and the
search repeated here. However, the application of <abbr title="Glyph Substitution table">GSUB</abbr> shaping
features in stage 3 means that several ligation and many-to-one
substitutions may have taken place. The final glyph produced by that
process may, therefore, be a conjunct or ligature form — in most
cases, such a glyph will not have an assigned Unicode codepoint.
   
#### Stage 4, step 2: Pre-base matras ####

Pre-base dependent vowels (matras) that were reordered during the
initial reordering stage must be moved to their final position. This
position is defined as:
   
   - after the last standalone <samp>"Halant"</samp> glyph that comes after the
     matra's starting position and also comes before the main
     consonant.
   - If a zero-width joiner follows this last standalone <samp>"Halant"</samp>, the
     final matra position is moved to after the joiner.

This means that the matra will move to the right of all explicit
<samp>"consonant,Halant"</samp> subsequences, but will stop to the left of the base
consonant or syllable base, all conjuncts or ligatures that contain
the base consonant or syllable base, and all half forms.

:::{figure-md}
![Pre-base matra reordering](/images/bengali/bengali-matra-position.svg "Pre-base matra reordering"){.shaping-demo .inline-svg .greyscale-svg #bengali-matra-position}

Pre-base matra reordering
:::

```{svg-color-toggle-button} bengali-matra-position
```

> Note: OpenType and Unicode both state that if the syllable includes
> a <abbr title="Zero-Width Joiner">ZWJ</abbr> immediately after the last <samp>"Halant"</samp>, then the final matra
> position should be after the <abbr title="Zero-Width Joiner">ZWJ</abbr>.
>
> However, there are several test sequences indicating that
> Microsoft's Uniscribe shaping engine did not follow this rule (in,
> at least, Devanagari and Bengali text), and in these circumstances
> Uniscribe instead makes the final matra position before the final
> <samp>"Consonant,Halant,ZWJ"</samp>.
>
> Subsequently, the HarfBuzz shaping engine has also followed the same
> pattern. If other shaping engine implementations prefer to maintain
> maximum compatibility with Uniscribe and HarfBuzz, then they should
> also follow suit.

> Note: The Microsoft script-development specifications for OpenType
> shaping also state that if a zero-width non-joiner follows the last
> standalone <samp>"Halant"</samp>, the final matra position is moved to after the
> non-joiner. However, it is unnecessary to test for this condition,
> because a <samp>"Halant,ZWNJ"</samp> subsequence is, by definition, the end of a
> syllable. Consequently, a <samp>"Halant,ZWNJ"</samp> cannot be followed by a
> pre-base dependent vowel.


#### Stage 4, step 3: Reph ####

<samp>"Reph"</samp> must be moved from the beginning of the syllable to its final
position. Because Bengali incorporates the `REPH_POS_AFTER_SUBJOINED`
shaping characteristic, this final position is defined to be
immediately after the syllable base and any subjoined (below-base
consonant or below-base dependent vowel) forms.

The algorithm for finding the final <samp>"Reph"</samp> position is

<!---
  - If the syllable does not have a base consonant (such as a syllable
    based on an independent vowel or placeholder), then the final
    <samp>"Reph"</samp> position is immediately before the first character tagged
    with the `POS_BEFORE_POST` position or any later position in the
    sort order.

    -- If there are no characters tagged with `POS_BEFORE_POST` or
       later positions, then <samp>"Reph"</samp> is positioned at the end of the
       syllable.
--->

  - Starting at the first post-<samp>"Reph"</samp> consonant, search forward looking
    for the first explicit <samp>"Halant"</samp>, ending the search when the base
    consonant is encountered. If such an explicit <samp>"Halant"</samp> is found,
    move the <samp>"Reph"</samp> to the position immediately after this
    <samp>"Halant"</samp>.
	  * If a zero-width joiner (<abbr>ZWJ</abbr>) or a zero-width non-joiner (<abbr>ZWNJ</abbr>)
        follows this <samp>"Halant"</samp>, move the <samp>"Reph"</samp> to the position
        immediately after the <abbr title="Zero-Width Joiner">ZWJ</abbr> or <abbr title="Zero-Width Non Joiner">ZWNJ</abbr>. This will be the final
        <samp>"Reph"</samp> position. 
	  * If no <abbr title="Zero-Width Joiner">ZWJ</abbr> or <abbr title="Zero-Width Non Joiner">ZWNJ</abbr> follows this <samp>"Halant"</samp>, leave the <samp>"Reph"</samp> in
        its position immediately after the <samp>"Halant"</samp>. This will be the
        final <samp>"Reph"</samp> position. 
  - If no such explicit <samp>"Halant"</samp> is found in the previous step, find
    the first post-base consonant that has not formed a ligature with
    the base consonant. If such a non-ligated post-base consonant is
    found, move the <samp>"Reph"</samp> to the position immediately before the
    non-ligated post-base consonant. This will be the final <samp>"Reph"</samp>
    position.
  - If no such non-ligated post-base consonant is found in the
    previous step, move the <samp>"Reph"</samp> to the position immediately before
    the first post-base matra, syllable modifier, or Vedic sign that
    has a positioning tag after the script's <samp>"Reph"</samp> position in the
    syllable sort order (as listed in [stage
    2](#stage-2-initial-reordering)). This will be the final <samp>"Reph"</samp>
    position. 
	> Note: Because Bengali incorporates the
    > `REPH_POS_AFTER_SUBJOINED` shaping characteristic, this means
    > any positioning tag of `POS_BEFORE_POST` or later,
    > although a post-base matra, syllable modifier, or Vedic sign
    > would not typically be tagged with `POS_BEFORE_POST`.
  - If no other location has been located in the previous steps, move
    the <samp>"Reph"</samp> to the end of the syllable.


Finally, if the final position of <samp>"Reph"</samp> occurs after a
<samp>"_matra_,Halant"</samp> subsequence, then <samp>"Reph"</samp> must be repositioned to the
left of <samp>"Halant"</samp>, to allow for potential matching with `abvs` or
`psts` substitutions from <abbr title="Glyph Substitution table">GSUB</abbr>.

:::{figure-md}
![Reph final reordering](/images/bengali/bengali-reph-position.svg){.shaping-demo .inline-svg .greyscale-svg #bengali-reph-position}

Reph final reordering
:::

```{svg-color-toggle-button} bengali-reph-position
```

#### Stage 4, step 4: Pre-base-reordering consonants ####

Any pre-base-reordering consonants must be moved to immediately before
the base consonant or syllable base.
  
Bengali does not use pre-base-reordering consonants, so this step will
involve no work when processing `<bng2>` text. It is included here in order
to maintain compatibility with the other Indic scripts.


#### Stage 4, step 5: Initial matras ####

Any left-side dependent vowels (matras) that are at the start of a
word must be flagged for potential substitution by the `init` feature
of <abbr title="Glyph Substitution table">GSUB</abbr>.

> Note: Although the specification defines the `init` feature as being
> used for word-initial positions only, the feature's origin bases
> this on the linguistic sense of "word" and that sense may not be
> precise enough to cover all of the cases encountered in a
> contemporary text run.
>
> In practice, users may expect the `init` feature to be applied when
> a sequence has a left-side dependent vowel that is preceded by a
> punctuation character, a currency symbol, an emoji, or any of
> several other categories of code point. Shaping engines may need to
> adapt their matching rules to meet users' expectations for this
> feature. 
>
> The Microsoft Uniscribe shaping engine historically tested for a
> certain range of  Unicode `General Category` and more recent shaping
> engines follow suit. For more information on Uniscribe
> compatibility, see the [Uniscribe-bug-compatibility
> note](/notes/uniscribe-bug-compatibility.md). 


### Stage 5: Applying all remaining substitution features from <abbr>GSUB</abbr> ###

In this stage, the remaining substitution features from the <abbr title="Glyph Substitution table">GSUB</abbr> table
are applied. In preparation for this stage, glyph sequences should be
flagged for possible application of <abbr title="Glyph Substitution table">GSUB</abbr> features in stage 2,
step 10.

The order in which these features are applied is not canonical; they
should be applied in the order in which they appear in the <abbr title="Glyph Substitution table">GSUB</abbr> table
in the font.

	init
	pres
	abvs
	blws
	psts
	haln

The `init` feature replaces word-initial glyphs with special
presentation forms. Generally, these forms involve removing the
headline in-stroke from the left side of the glyph.

The context defined for an `init` feature is:
    
:::{table} `init` feature context

| Backtrack    | Matching sequence          | Lookahead           |
|:-------------|:---------------------------|:--------------------|
| `WORD_START` | `_matra_`(`LEFT_POSITION`) | `_consonant_`(full) |

:::

:::{figure-md}
![Application of the init feature](/images/bengali/bengali-init.svg "Application of the init feature"){.shaping-demo .inline-svg .greyscale-svg #bengali-init}

Application of the `init` feature
:::

```{svg-color-toggle-button} bengali-init
```


The `pres` feature replaces pre-base-consonant glyphs with special
presentations forms. This can include consonant conjuncts, half-form
consonants, and stylistic variants of left-side dependent vowels
(matras). 

:::{figure-md}
![Application of the pres feature](/images/bengali/bengali-pres.svg "Application of the pres feature"){.shaping-demo .inline-svg .greyscale-svg #bengali-pres}

Application of the `pres` feature
:::

```{svg-color-toggle-button} bengali-pres
```


The `abvs` feature replaces above-base-consonant glyphs with special
presentation forms. This usually includes contextual variants of
above-base marks or contextually appropriate mark-and-base ligatures.

:::{figure-md}
![Application of the abvs feature](/images/bengali/bengali-abvs.svg "Application of the abvs feature"){.shaping-demo .inline-svg .greyscale-svg #bengali-abvs}

Application of the `abvs` feature
:::

```{svg-color-toggle-button} bengali-abvs
```


The `blws` feature replaces below-base-consonant glyphs with special
presentation forms. This usually includes replacing base consonants that
are adjacent to below-base-consonant forms like <samp>"Raphala"</samp> or
<samp>"Baphala"</samp> with contextual ligatures.

:::{figure-md}
![Application of the blws feature](/images/bengali/bengali-blws.svg "Application of the blws feature"){.shaping-demo .inline-svg .greyscale-svg #bengali-blws}

Application of the `blws` feature
:::

```{svg-color-toggle-button} bengali-blws
```


The `psts` feature replaces post-base-consonant glyphs with special
presentation forms. This usually includes replacing right-side
dependent vowels (matras) with stylistic variants or replacing
post-base-consonant/matra pairs with contextual ligatures.

:::{figure-md}
![Application of the psts feature](/images/bengali/bengali-psts.svg "Application of the psts feature"){.shaping-demo .inline-svg .greyscale-svg #bengali-psts}

Application of the `psts` feature
:::

```{svg-color-toggle-button} bengali-psts
```


The `haln` feature replaces syllable-final <samp>"_Consonant_,Halant"</samp> pairs with
special presentation forms. This can include stylistic variants of the
consonant where placing the <samp>"Halant"</samp> mark on its own is
typographically problematic.

:::{figure-md}
![Application of the haln feature](/images/bengali/bengali-haln.svg "Application of the haln feature"){.shaping-demo .inline-svg .greyscale-svg #bengali-haln}

Application of the `haln` feature
:::

```{svg-color-toggle-button} bengali-haln
```


> Note: The `calt` feature, which allows for generalized application
> of contextual alternate substitutions, is usually applied at this
> point. However, `calt` is not mandatory for correct Bengali shaping
> and may be disabled in the application by user preference.

### Stage 6: Applying remaining positioning features from <abbr>GPOS</abbr> ###

In this stage, mark positioning, kerning, and other <abbr title="Glyph Positioning table">GPOS</abbr> features are
applied.

As with the preceding stage, the order in which these features are
applied is not canonical; they should be applied in the order in which
they appear in the <abbr title="Glyph Positioning table">GPOS</abbr> table in the font.

        dist
        abvm
        blwm

> Note: The `kern` feature is usually applied at this stage, if it is
> present in the font. However, `kern` (like `calt`, above) is not
> mandatory for shaping Bengali text and may be disabled by user preference.

The `dist` feature adjusts the horizontal positioning of
glyphs. Unlike `kern`, adjustments made with `dist` do not require the
application or the user to enable any software _kerning_ features, if
such features are optional. 

The `abvm` feature positions above-base marks for attachment to base
characters. In Bengali, this includes <samp>"Reph"</samp> in addition to the
diacritical marks and Vedic signs. 

:::{figure-md}
![Application of the abvm feature](/images/bengali/bengali-abvm.svg "Application of the abvm feature"){.shaping-demo .inline-svg .greyscale-svg #bengali-abvm}

Application of the `abvm` feature
:::

```{svg-color-toggle-button} bengali-abvm
```

The `blwm` feature positions below-base marks for attachment to base
characters. In Bengali, this includes below-base dependent vowels
(matras) as well as the below-base consonant forms <samp>"Raphala"</samp> and
<samp>"Baphala"</samp>.

:::{figure-md}
![Application of the blwm feature](/images/bengali/bengali-blwm.svg "Application of the blwm feature"){.shaping-demo .inline-svg .greyscale-svg #bengali-blwm}

Application of the `blwm` feature
:::

```{svg-color-toggle-button} bengali-blwm
```


## The `<beng>` shaping model ##

The older Bengali script tag, `<beng>`, has been deprecated. However,
shaping engines may still encounter fonts that were built to work with
`<beng>` and some users may still have documents that were written to
take advantage of `<beng>` shaping.

### Distinctions from `<bng2>` ###

The most significant distinction between the shaping models is that the
sequence of <samp>"Halant"</samp> and consonant glyphs used to trigger shaping
features was altered when migrating from `<beng>` to
`<bng2>`. 

Specifically, shaping engines were expected to reorder post-base
<samp>"Halant,_Consonant_"</samp> sequences to <samp>"_Consonant_,Halant"</samp>.

As a result, a font's <abbr title="Glyph Substitution table">GSUB</abbr> substitutions would be written to match
<samp>"_Consonant_,Halant"</samp> sequences in all pre-base and post-base positions.


The `<beng>` syllable

	Pre-baseC Halant BaseC Halant Post-baseC

would be reordered to

	Pre-baseC Halant BaseC Post-baseC Halant

before features are applied.

In `<bng2>` text, as described above in this document, there is no
such reordering. The correct sequence to match for <abbr title="Glyph Substitution table">GSUB</abbr> substitutions is
<samp>"_Consonant_,Halant"</samp> for pre-base consonants, but <samp>"Halant,_Consonant_"</samp>
for post-base consonants.

The old Indic shaping model also did not recognize the
`BLWF_MODE_PRE_AND_POST` shaping characteristic. Instead, `<beng>`
was treated as if it followed the `BLWF_MODE_POST_ONLY`
characteristic. In other words, below-base form substitutions were
only applied to consonants after the base consonant or syllable base.

In addition, for some scripts, left-side dependent vowel marks
(matras) were not repositioned during the final reordering
stage. For `<beng>` text, the left-side matra was always positioned
at the beginning of the syllable.


### Advice for handling fonts with `<beng>` features only ###

Shaping engines may choose to match post-base <samp>"_Consonant_,Halant"</samp>
sequences in order to apply <abbr title="Glyph Substitution table">GSUB</abbr> substitutions when it is known that
the font in use supports only the `<beng>` shaping model.

### Advice for handling text runs composed in `<beng>` format ###

Shaping engines may choose to match post-base <samp>"_Consonant_,Halant"</samp>
sequences for <abbr title="Glyph Substitution table">GSUB</abbr> substitutions or to reorder them to
<samp>"Halant,_Consonant_"</samp> when processing text runs that are tagged with
the `<beng>` script tag and it is known that the font in use supports
only the `<bng2>` shaping model.

Shaping engines may also choose to apply `blwf` substitutions to
below-base consonants occurring before the base consonant or syllable base when it is
known that the font in use supports an applicable substitution lookup.

Shaping engines may also choose to position left-side matras according
to the `<beng>` ordering scheme; however, doing so might interfere
with matching <abbr title="Glyph Substitution table">GSUB</abbr> or <abbr title="Glyph Positioning table">GPOS</abbr> features.


================================================
FILE: opentype-shaping-default.md
================================================
# Default script shaping in OpenType #

This document details the default shaping procedure needed to display
text runs in non-complex scripts. It may also be used as a fallback
model for unrecognized scripts.


**Contents**

  - [General information](#general-information)
  - [Terminology](#terminology)
  - [Normalization](#normalization)
  - [The default shaping model](#the-default-shaping-model)
      - [Stage 1: Applying the basic substitution features from <abbr>GSUB</abbr>](#stage-1-applying-the-basic-substitution-features-from-gsub)
	  - [Stage 2: Applying typographic substitution features from <abbr>GSUB</abbr>](#stage-2-applying-typographic-substitution-features-from-gsub)
	  - [Stage 3: Applying the positioning features from <abbr>GPOS</abbr>](#stage-3-applying-the-positioning-features-from-gpos)
  
  
## General information ##

The default OpenType shaping model is used for scripts that are
considered _non-complex_ from the shaper's perspective. This
designation means that shaping a text run does not involve glyph
reordering, contextual joining behavior, or the substitution of
context-dependent forms for linguistic or orthographic correctness.

Text runs in non-complex scripts may, however, involve ligature
substitution, Unicode normalization, mark positioning, kerning, and
the application of other features from the active font's <abbr title="Glyph Substitution table">GSUB</abbr> and <abbr title="Glyph Positioning table">GPOS</abbr>
tables.

The non-complex scripts covered by this model include Latin, Cyrillic,
Greek, Armenian, Georgian, Ethiopic, Cherokee, Tifinagh, and many others.


## Terminology ##

Many of these scripts support diacritics and other **marks**. Unicode may
contain **precomposed** mark-and-base codepoints for some or all
combinations of marks and base letters in the script. For combinations
without a codepoint, the desired form can be achieved by following the
**base** letter with a **combining mark** codepoint. 

The primary concern for the shaping engine is processing the text run into
the correct normalized form, so that the best glyphs from the active
font can be selected from among the available precomposed and
combining alternatives.

Fonts for non-complex scripts might not include a <abbr title="Glyph Substitution table">GSUB</abbr> or <abbr title="Glyph Positioning table">GPOS</abbr> table
at all. 

However, <abbr title="Glyph Substitution table">GSUB</abbr> and <abbr title="Glyph Positioning table">GPOS</abbr> may also be used to implement a variety of
OpenType smart features, including several classes of ligature,
contextual alternate, or contextual positioning rules. Because these
features are not required in order to render the text run
orthographically correct, the features are not considered shaping
features. Nevertheless, the shaping engine may be expected to apply
these features in order to simplify the overall text-rendering
architecture of the implementation.

## Normalization ##

Unicode defines algorithms for normalizing a sequence of input
codepoints into either a canonical composed form or a canonical
decomposed form. The purpose of these algorithms and of the defined
normalization forms is to determine equivalent representations of input
sequences regardless of variations in the input sequences.

For example, a base letter with an attached mark might exist in
Unicode as a single codepoint, but an input sequence might consist of
the base letter codepoint followed by the combining mark
codepoint. Unicode normalization can be used to determine that the
<samp>"letter, mark"</samp> sequence is equivalent to the single codepoint. This
simplifies sorting, searching, string comparison, and many other common
tasks.

OpenType shaping utilizes Unicode normalization, but OpenType
shaping has a distinctly different goal: to select the best or most
appropriate representation of the input codepoint sequence that is
available in the active font. A full description of the algorithm is
available in the [normalization](opentype-shaping-normalization.md) document. 

Shaping some complex scripts involves explicit composition or
decomposition steps. The default shaping model does not involve any
such steps, but it does proceed with the general assumption that text
runs have been normalized as part of input sanitization. 

For convenience, shaping engines may choose to implement a single
normalization routine for all scripts, default and complex. If
normalization is done before the shaping-model–specific processing is
done, then there may be no work required in certain shaping steps
(such as the processing of `ccmp` substitutions from <abbr title="Glyph Substitution table">GSUB</abbr>). However,
these steps will always be described in the relevant script's shaping
document. 


## The default shaping model ##

Processing a run of text in the default shaping model involves three
top-level stages:

1. Applying the basic substitution features from <abbr>GSUB</abbr>
2. Applying typographic substitution features from <abbr>GSUB</abbr>
3. Applying the positioning features from <abbr>GPOS</abbr>

Together, these stages cover the application of all <abbr title="Glyph Substitution table">GSUB</abbr> and <abbr title="Glyph Positioning table">GPOS</abbr>
features that are required or that have been defined by OpenType as
being on by default.

For convenience, shaping engines may also choose to apply any optional
or off-by-default OpenType features that have been activated for the
text run (including those that have been
enabled by the user and those that have been enabled at the
application level). However, the order in which such features should
be applied and how they should interact with OpenType shaping features
is beyond the scope of this document.

The default shaping model does not involve syllable-identification,
word-identification, or other preprocessing of the input
sequence. Shaping engines may choose how to segment longer text runs
for processing, or may choose to rely on higher-level applications to
make segmentation decisions.


### Stage 1: Applying the basic substitution features from <abbr>GSUB</abbr> ###

The basic-substitution stage applies mandatory substitution features
using the rules in the font's <abbr title="Glyph Substitution table">GSUB</abbr> table. In preparation for this
stage, glyph sequences should be tagged for possible application 
of <abbr title="Glyph Substitution table">GSUB</abbr> features.

These substitutions include those features designed to provide
linguistic and orthographic correctness.

The order in which these features are applied is not canonical; they
should be applied in the order in which they appear in the <abbr title="Glyph Substitution table">GSUB</abbr> table
in the font.

	locl
	ccmp
	rlig
	
The `locl` feature replaces default glyphs with any language-specific
variants, based on examining the language setting of the text run.

> Note: Strictly speaking, the use of localized-form substitutions is
> not part of the shaping process, but of the localization process,
> and could take place at an earlier point while handling the text
> run. However, shaping engines are expected to complete the
> application of the `locl` feature before applying the subsequent
> <abbr title="Glyph Substitution table">GSUB</abbr> substitutions in the following steps.

The `ccmp` feature allows a font to substitute mark-and-base sequences
with a pre-composed glyph including the mark and the base, or to
substitute a single glyph into an equivalent decomposed sequence of
glyphs. 

If present, these composition and decomposition substitutions must be
performed before applying any other <abbr title="Glyph Substitution table">GSUB</abbr> lookups, because
those lookups may be written to match only the `ccmp`-substituted
glyphs.

> Note: The `ccmp` feature may perform compositions or decompositions
> of glyph sequences that do not have a canonical decomposition
> defined in Unicode. 

The `rlig` feature substitutes glyph sequences with mandatory
ligatures. Substitutions made by `rlig` cannot be disabled by
application-level user interfaces.


### Stage 2: Applying typographic substitution features from <abbr>GSUB</abbr> ###

The typographic-substitution phase applies all remaining substitution
features using the rules in the font's <abbr title="Glyph Substitution table">GSUB</abbr> table. In preparation for
this stage, glyph sequences should be tagged for possible application 
of <abbr title="Glyph Substitution table">GSUB</abbr> features.

These substitutions include those features designed to provide
typographic consistency and correctness.

The order in which these features are applied is not canonical; they
should be applied in the order in which they appear in the <abbr title="Glyph Substitution table">GSUB</abbr> table
in the font.


	rclt
	calt
	clig
	liga
	

The `rclt` feature substitutes glyphs with contextual alternate
forms. In general, the `rclt` feature is used to perform such
substitutions that are required by the orthography of the active
script and language. Substitutions made by `rclt` cannot be disabled
by application-level user interfaces.

The `calt` feature substitutes glyphs with contextual alternate
forms. In general, the `calt` feature performs substitutions that are
not mandatory for orthographic correctness. However, unlike `rclt`,
the substitutions made by `calt` can be disabled by application-level
user interfaces.

The `clig` feature substitutes optional ligatures that are on by
default, but which are activated only in certain
contexts. Substitutions made by `clig` may be disabled by
application-level user interfaces. 

The `liga` feature substitutes standard, optional ligatures that are on
by default. Substitutions made by `liga` may be disabled by
application-level user interfaces.


### Stage 3: Applying the positioning features from <abbr>GPOS</abbr> ###

The positioning stage adjusts the positions of mark and base
glyphs. In preparation for this stage, glyph sequences should be
tagged for possible application of <abbr title="Glyph Positioning table">GPOS</abbr> features.

The order in which these features are applied is not canonical; they
should be applied in the order in which they appear in the <abbr title="Glyph Substitution table">GSUB</abbr> table
in the font.


	curs
	dist
	kern
	mark
	mkmk

The `curs` feature perform cursive positioning. Each glyph has an
entry point and exit point; the `curs` feature positions glyphs so
that the entry point of the current glyph meets the exit point of the
preceding glyph.

The `dist` feature adjusts the horizontal positioning of
glyphs. Unlike `kern`, adjustments made with `dist` do not require the
application or the user to enable any software kerning features, if
such features are optional.

The `kern` adjusts glyph spacing between pairs of adjacent glyphs.

The `mark` feature positions marks with respect to base glyphs.

The `mkmk` feature positions marks with respect to preceding marks,
providing proper positioning for sequences of marks that attach to the
same base glyph.

<!---
collect features
override features
data create
data destroy
preprocess text
postprocess glyphs
normalization mode default
decompose
compose
setup masks
disable otl
reorder marks
zero width marks by gdef late
fallback position
--->


================================================
FILE: opentype-shaping-devanagari.md
================================================
```{include} /_global.md
```

# Devanagari shaping in OpenType #

This document details the shaping procedure needed to display text
runs in the Devanagari script.


**Contents**

  - [General information](#general-information)
  - [Terminology](#terminology)
  - [Glyph classification](#glyph-classification)
      - [Shaping classes and subclasses](#shaping-classes-and-subclasses)
      - [Devanagari character tables](#devanagari-character-tables)
  - [The `<dev2>` shaping model](#the-dev2-shaping-model)
      - [Stage 1: Identifying syllables and other sequences](#stage-1-identifying-syllables-and-other-sequences)
      - [Stage 2: Initial reordering](#stage-2-initial-reordering)
      - [Stage 3: Applying the basic substitution features from <abbr>GSUB</abbr>](#stage-3-applying-the-basic-substitution-features-from-gsub)
      - [Stage 4: Final reordering](#stage-4-final-reordering)
      - [Stage 5: Applying all remaining substitution features from <abbr>GSUB</abbr>](#stage-5-applying-all-remaining-substitution-features-from-gsub)
      - [Stage 6: Applying remaining positioning features from <abbr>GPOS</abbr>](#stage-6-applying-remaining-positioning-features-from-gpos)
  - [The `<deva>` shaping model](#the-deva-shaping-model)
      - [Distinctions from `<dev2>`](#distinctions-from-dev2)
      - [Advice for handling fonts with `<deva>` features only](#advice-for-handling-fonts-with-deva-features-only)
      - [Advice for handling text runs composed in `<deva>` format](#advice-for-handling-text-runs-composed-in-deva-format)


## General information ##

The Devanagari script belongs to the Indic family, and follows
the same general patterns as the other Indic scripts. More
specifically, it belongs to the North Indic subgroup, in which
sequences of adjacent consonants are often represented as conjuncts.

The Devanagari script is used to write multiple languages, most commonly
Hindi, Marathi, Maithili, and Nepali. In addition, Sanskrit may be written
in Devanagari, so Devanagari script runs may include glyphs from the Vedic
Extensions block of Unicode. 

There are two extant Devanagari script tags defined in OpenType, `<deva>`
and `<dev2>`. The older script tag, `<deva>`, was deprecated in 2005.
Therefore, new fonts should be engineered to work with the `<dev2>`
shaping model. However, if a font is encountered that supports only
`<deva>`, the shaping engine should deal with it gracefully.

## Terminology ##

OpenType shaping uses a standard set of terms for Indic scripts.  The
terms used colloquially in any particular language may vary, however,
potentially causing confusion.

**Matra** is the standard term for a dependent vowel sign. 

The term "matra" is also used to refer to the headline above most
Devanagari letters. To avoid ambiguity, the term **headline** is
used in most Unicode and OpenType shaping documents.

**Halant** and **Virama** are both standard terms for the below-base "vowel-killer"
sign. Unicode documents use the term "virama" most frequently, while
OpenType documents use the term "halant" most frequently. 

**Chandrabindu** (or simply **Bindu**) is the standard term for the diacritical mark
indicating that the preceding vowel should be nasalized. 

The term **base consonant** is also critical to Indic shaping. The
base consonant of a syllable is the consonant that carries the
syllable's vowel sound, either the inherent vowel (for an unmarked
base consonant) or a dependent vowel (with the addition of a matra).

A syllable's base consonant is generally rendered in its full form
(although it may form ligatures), while other consonants in the
syllable frequently take on secondary forms. Different <abbr title="Glyph Substitution table">GSUB</abbr>
substitutions may apply to a script's **pre-base** and **post-base**
consonants. Some of these substitutions create **above-base** or
**below-base** forms. The **Reph** form of the consonant "Ra" is an
example.

Syllables may also begin with an **independent vowel** instead of a
consonant. In these syllables, the independent vowel is rendered in
full-letter form, not as a matra, and the independent vowel serves as the
syllable base, similar to a base consonant.

Where possible, using the standard terminology is preferred, as the
use of a language-specific term necessitates choosing one language
over all of the others that share a common script.

## Glyph classification ##

Shaping Devanagari text depends on the shaping engine correctly
classifying each glyph in the run. As with most other scripts, the
classifications must distinguish between consonants, vowels
(independent and dependent), numerals, punctuation, and various types
of diacritical mark.

For most codepoints, the `General Category` property defined in the Unicode
standard is correct, but it is not sufficient to fully capture the
expected shaping behavior (such as glyph reordering). Therefore,
Devanagari glyphs must additionally be classified by how they are treated
when shaping a run of text.

### Shaping classes and subclasses ###

The shaping classes listed in the tables that follow are defined so
that they capture the positioning rules used by Indic scripts. 

For most codepoints, the _Shaping class_ is synonymous with the `Indic
Syllabic Category` defined in Unicode. However, there are some
distinctions, where the defined category does not fully capture the
behavior of the character in the shaping process.

Several of the diacritic and syllable-modifying marks behave according
to their own rules and, thus, have a special class. These include
`BINDU`, `VISARGA`, `AVAGRAHA`, `NUKTA`, and `VIRAMA`. Some
less-common marks behave according to rules that are similar to these
common marks, and are therefore classified with the corresponding
common mark. The Vedic Extensions also include a `CANTILLATION`
class for tone marks.

Letters generally fall into the classes `CONSONANT`,
`VOWEL_INDEPENDENT`, and `VOWEL_DEPENDENT`. These classes help the
shaping engine parse and identify key positions in a syllable. For
example, Unicode categorizes dependent vowels as `Mark [Mn]`, but the
shaping engine must be able to distinguish between dependent vowels
and diacritical marks (which are categorized as `Mark [Mn]`).

Other characters, such as symbols and miscellaneous letters (for
example, letter-like symbols that only occur as standalone entities
and do not occur within syllables), need no special attention from the
shaping engine, so they are not assigned a shaping class.

Numbers are classified as `NUMBER`, even though they evoke no special
behavior from the Indic shaping rules, because there are OpenType features that
might affect how the respective glyphs are drawn, such as `tnum`,
which specifies the usage of tabular-width numerals, and `sups`, which
replaces the default glyphs with superscript variants.

Marks and dependent vowels are further labeled with a mark-placement
subclass, which indicates where the glyph will be placed with respect
to the base character to which it is attached. The actual position of
the glyphs is determined by the lookups found in the font's <abbr title="Glyph Positioning table">GPOS</abbr>
table, however, the shaping rules for Indic scripts require that the
shaping engine be able to identify marks by their general
position. 

For example, left-side dependent vowels (matras), classified
with `LEFT_POSITION`, must frequently be reordered, with the final
position determined by whether or not other letters in the syllable
have formed ligatures or combined into conjunct forms. Therefore, the
`LEFT_POSITION` subclass of the character must be tracked throughout
the shaping process.

There are four basic _mark-placement subclasses_ for dependent vowels
(matras). Each corresponds to the visual position of the matra with
respect to the syllable base to which it is attached:

  - `LEFT_POSITION` matras are positioned to the left of the syllable base.
  - `RIGHT_POSITION` matras are positioned to the right of the syllable base.
  - `TOP_POSITION` matras are positioned above the syllable base.
  - `BOTTOM_POSITION` matras are positioned below syllable base.
  
These positions may also be referred to elsewhere in shaping documents as:

  - _Pre-base_ matras
  - _Post-base_ matras
  - _Above-base_ matras
  - _Below-base_ matras
  
respectively. The `LEFT`, `RIGHT`, `TOP`, and `BOTTOM` designations
corresponds to Unicode's preferred terminology. The _Pre_, _Post_,
_Above_, and _Below_ terminology is used in the official descriptions
of OpenType <abbr title="Glyph Substitution table">GSUB</abbr> and <abbr title="Glyph Positioning table">GPOS</abbr> features. Shaping engines may, internally,
use whichever terminology is preferred.

In addition, dependent-vowel codepoints that are composed of multiple
components will be designated in character tables as having a compound
_mark-placement subclass_, such as `TOP_AND_RIGHT` or `LEFT_AND_RIGHT`. 

However, these multi-part matras are decomposed into separate matra
components during the shaping process. After the decomposition, each
matra component will belong to exactly one of the four basic
_mark-placement subclasses_.

For most mark and dependent-vowel codepoints, the _mark-placement
subclass_ is synonymous with the `Indic Positional Category` defined
in Unicode. However, there are some distinctions, where the defined
category does not fully capture the behavior of the character in the
shaping process. 

### Devanagari character tables ###

Separate character tables are provided for the Devanagari, Devanagari
Extended, and Vedic Extensions block as well as for other
miscellaneous characters that are used in `<dev2>` text runs:

  - [Devanagari character table](character-tables/character-tables-devanagari.md#devanagari-character-table)
  - [Devanagari Extended character table](character-tables/character-tables-devanagari.md#devanagari-extended-character-table)
  - [Vedic Extensions character table](character-tables/character-tables-devanagari.md#vedic-extensions-character-table)
  - [Miscellaneous character table](character-tables/character-tables-devanagari.md#miscellaneous-character-table)

The tables list each codepoint along with its Unicode general
category, its shaping class, and its mark-placement subclass. The
codepoint's Unicode name and an example glyph are also provided.

For example:

:::{table} Example character table

| Codepoint | Unicode category | Shaping class     | Mark-placement subclass    | Glyph                        |
|:----------|:-----------------|:------------------|:---------------------------|:-----------------------------|
|`U+0901`   | Mark [Mn]        | BINDU             | TOP_POSITION               | &#x0901; Candrabindu         |
| | | | |
|`U+0915`   | Letter           | CONSONANT         | _null_                     | &#x0915; Ka                  |
:::


Codepoints with no assigned meaning are designated as _unassigned_ in
the _Unicode category_ column.

Assigned codepoints with a _null_ in the _Shaping class_
column evoke no special behavior from the shaping engine. 

The _Mark-placement subclass_ column indicates mark-placement
positioning for codepoints in the _Mark_ category. Assigned, non-mark
codepoints have a _null_ in this column and evoke no special
mark-placement behavior. Marks tagged with [Mn] in the _Unicode
category_ column are categorized as non-spacing; marks tagged with
[Mc] are categorized as spacing-combining.

Some codepoints in the tables use a _Shaping class_ that
differs from the codepoint's Unicode _General Category_. The _Shaping
class_ takes precedence during OpenType shaping, as it captures more
specific, script-aware behavior.


#### Special-function codepoints ####

Other important characters that may be encountered when shaping runs
of Devanagari text include the dotted-circle placeholder (`U+25CC`), the
zero-width joiner (`U+200D`) and zero-width non-joiner (`U+200C`), and
the no-break space (`U+00A0`).

Each of these is of particular importance to shaping engines, because
these codepoints interact with the shaping engine, the text run, and
the active font, either to mediate non-default shaping behavior or to
relay information about the current shaping process.

The dotted-circle placeholder is frequently used when displaying a
dependent vowel (matra) or a combining mark in isolation. Real-world
text syllables may also use other characters, such as hyphens or dashes,
in a similar placeholder fashion; shaping engines should cope with
this situation gracefully.

Dotted-circle placeholder characters (like any Unicode codepoint) can
appear anywhere in text input sequences and should be rendered
normally. <abbr title="Glyph Positioning table">GPOS</abbr> positioning lookups should attach mark glyphs to dotted
circles as they would to other non-mark characters. As visible glyphs,
dotted circles can also be involved in <abbr title="Glyph Substitution table">GSUB</abbr> substitutions.

In addition to the default input-text handling process, shaping
engines may also insert dotted-circle placeholders into the text
sequence. Dotted-circle insertions are required when a non-spacing
mark or dependent sign is formed with no base character present.

This requirement covers:

  - Dependent signs that are assigned their own individual Unicode
    codepoints (such as most dependent-vowel marks or matras)
  
  - Dependent signs that are formed only by specific sequences of
    other codepoints (such as <samp>"Reph"</samp>)


The zero-width joiner (<abbr title="Zero-Width Joiner">ZWJ</abbr>) is primarily used to prevent the formation
of a conjunct from a <samp>"_Consonant_,Halant,_Consonant_"</samp> sequence. 

  - The sequence <samp>"_Consonant_,Halant,ZWJ,_Consonant_"</samp> blocks the
    formation of a conjunct between the two consonants. 

Note, however, that the <samp>"_Consonant_,Halant"</samp> subsequence in the above
example may still trigger a half-forms feature. To prevent the
application of the half-forms feature in addition to preventing the
conjunct, the zero-width non-joiner (<abbr>ZWNJ</abbr>) must be used instead.

  - The sequence <samp>"_Consonant_,Halant,ZWNJ,_Consonant_"</samp> should produce
    the first consonant in its standard form, followed by an explicit
    <samp>"Halant"</samp>. 

A secondary usage of the zero-width joiner is to prevent the formation of
<samp>"Reph"</samp>.

  - An initial <samp>"Ra,Halant,ZWJ"</samp> sequence should not produce a <samp>"Reph"</samp>,
    even where an initial <samp>"Ra,Halant"</samp> sequence without the zero-width
    joiner would otherwise produce a <samp>"Reph"</samp>.

The <abbr title="Zero-Width Joiner">ZWJ</abbr> and <abbr title="Zero-Width Non Joiner">ZWNJ</abbr> characters are, by definition, non-printing control
characters and have the _Default_Ignorable_ property in the Unicode
Character Database. In standard text-display scenarios, their function
is to signal a request from the user to the shaping engine for some
particular non-default behavior. As such, they are not rendered
visually.

> Note: Naturally, there are special circumstances where a user or
> document might need to request that a <abbr title="Zero-Width Joiner">ZWJ</abbr> or <abbr title="Zero-Width Non Joiner">ZWNJ</abbr> be rendered
> visually, such as when illustrating the OpenType shaping process, or
> displaying Unicode tables.

Because the <abbr title="Zero-Width Joiner">ZWJ</abbr> and <abbr title="Zero-Width Non Joiner">ZWNJ</abbr> are non-printing control characters, they can
be ignored by any portion of a software text-handling stack not
involved in the shaping operations that the <abbr title="Zero-Width Joiner">ZWJ</abbr> and <abbr title="Zero-Width Non Joiner">ZWNJ</abbr> are designed
to interface with. For example, spell-checking or collation functions
will typically ignore <abbr title="Zero-Width Joiner">ZWJ</abbr> and <abbr title="Zero-Width Non Joiner">ZWNJ</abbr>.

Similarly, the <abbr title="Zero-Width Joiner">ZWJ</abbr> and <abbr title="Zero-Width Non Joiner">ZWNJ</abbr> should be ignored by the shaping engine
when matching sequences of codepoints against the backtrack and
lookahead sequences of a font's <abbr title="Glyph Substitution table">GSUB</abbr> or <abbr title="Glyph Positioning table">GPOS</abbr> lookups.

For example:

  - A lookup that substitutes an alternate version of a
    dependent-vowel (matra) glyph when it is preceded by <samp>"Ka,Halant,Tta"</samp>
    should still be applied if the dependent-vowel codepoint is preceded
    by <samp>"Ka,Halant,ZWJ,Tta"</samp> in the text run.

The no-break space (<abbr>NBSP</abbr>) is primarily used to display those
codepoints that are defined as non-spacing (marks, dependent vowels
(matras), below-base consonant forms, and post-base consonant forms)
in an isolated context, as an alternative to displaying them
superimposed on the dotted-circle placeholder. These sequences will
match <samp>"NBSP,ZWJ,Halant,_Consonant_"</samp>, <samp>"NBSP,_mark_"</samp>, or <samp>"NBSP,_matra_"</samp>.


## The `<dev2>` shaping model ##

Processing a run of `<dev2>` text involves six top-level stages:

1. Identifying syllables and other sequences
2. Initial reordering
3. Applying the basic substitution features from <abbr>GSUB</abbr>
4. Final reordering
5. Applying all remaining substitution features from <abbr>GSUB</abbr>
6. Applying all remaining positioning features from <abbr>GPOS</abbr>


As with other Indic scripts, the initial reordering stage and the
final reordering stage each involve applying a set of several
script-specific rules. The basic substitution features must be applied
to the run in a specific order. The remaining substitution features in
stage five, however, do not have a mandatory order.

Indic scripts follow many of the same shaping patterns, but they
differ in a few critical characteristics that the shaping engine must
track. These include:

  - The position of the base consonant in a syllable.
  
  - The final position of <samp>"Reph"</samp>.
  
  - Whether <samp>"Reph"</samp> must be requested explicitly or if it is formed by
    a specific, implicit sequence.
	
  - Whether the below-base forms feature is applied only to consonants
    before the syllable base, only to consonants after the base
    consonant, or to both.
	
  - The ordering positions for dependent vowels
    (matras). Specifically, right-side, above-base, and below-base
    matras follow different rules in different scripts. 
	All Indic scripts position left-side matras in the same
    manner, in the ordering position `POS_PREBASE_MATRA`. 

With regard to these common variations, Devanagari's specific shaping
characteristics include: 

  - `BASE_POS_LAST` = The base consonant of a syllable is the last
     consonant, not counting any special final-consonant forms.

  - `REPH_POS_BEFORE_POST` = <samp>"Reph"</samp> is ordered before all post-base consonant forms.

  - `REPH_MODE_IMPLICIT` = <samp>"Reph"</samp> is formed by an initial <samp>"Ra,Halant"</samp> sequence.

  - `BLWF_MODE_PRE_AND_POST` = The below-forms feature is applied both to
     pre-base consonants and to post-base consonants.

  - `MATRA_POS_TOP` = `POS_AFTER_SUBJOINED` = Above-base matras are
     ordered after subjoined (i.e., below-base) consonant forms. 

  - `MATRA_POS_RIGHT` = `POS_AFTER_SUBJOINED` = Right-side matras are
     ordered after subjoined (i.e., below-base) consonant forms. 

  - `MATRA_POS_BOTTOM` = `POS_AFTER_SUBJOINED` = Below-base matras are
     ordered after all subjoined (i.e., below-base) consonant forms.

These characteristics determine how the shaping engine must reorder
certain glyphs, how base consonants are determined, and how <samp>"Reph"</samp>
should be encoded within a run of text.

### Stage 1: Identifying syllables and other sequences ###

A syllable in Devanagari consists of a valid orthographic sequence
that may be followed by a "tail" of modifier signs. 

> Note: The Devanagari Unicode block enumerates nine modifier signs,
> "Inverted Candrabindu" (`U+0900`), "Candrabindu" (`U+0901`),
> "Anusvara" (`U+0902`), "Visarga" (`U+0903`), "Avagraha" (`U+093D`),
> "Udatta" (`U+0951`), "Anudatta" (`U+0952`), "Grave Accent"
> (`U+0953`) and "Acute Accent" (`U+0954`). In addition, Sanskrit text
> written in Devanagari may include additional signs from Vedic
> Extensions block. 

Each syllable contains exactly one vowel sound. Valid syllables may
begin with either a consonant or an independent vowel. 

If the syllable begins with a consonant, then the consonant that
provides the vowel sound is referred to as the "base" consonant. If
the syllable begins with an independent vowel, that independent vowel
is the syllable's only vowel sound and serves as the "base". 

> Note: A consonant that is not accompanied by a dependent vowel (matra) sign
> carries the script's inherent vowel sound. This vowel sound is changed
> by a dependent vowel (matra) sign following the consonant.

From the shaping engine's perspective, the main distinction between a
syllable with a base consonant and a syllable with an
independent-vowel base is that a syllable with an independent-vowel
base is less likely to include additional consonants in special forms
and less likely to include dependent vowel signs
(matras). Therefore, in the common case, vowel-based syllables may
involve less reordering, substitution feature applications, and other
processing than consonant-based syllables.

In some languages and orthographies, vowel-based syllables are
not permitted to include additional consonants or matras, and certain
<abbr title="Glyph Substitution table">GSUB</abbr> substitution features do not occur. However, there are often
known exceptions, and real-world text makes no such guarantees. 

> Note: Shaping engines may choose to treat independent-vowel bases 
> like base consonants for the sake of simplicity or code
> reuse.
>
> However, implementations that take this approach should note
> that removing the distinction between base consonants and
> independent-vowel bases entirely may have unintended
> consequences. Making guarantees about the correctness of the results
> or about language-specific tests is out of scope for this document.

Generally speaking, the base consonant is the final consonant of the
syllable and its vowel sound designates the end of the syllable. This
rule is synonymous with the `BASE_POS_LAST` characteristic mentioned
earlier. 

Valid consonant-based syllables may include one or more additional 
consonants that precede the base consonant. Each of these
other, pre-base consonants will be followed by the <samp>"Halant"</samp> mark, which
indicates that they carry no vowel. They affect pronunciation by
combining with the base consonant (e.g., "_str_", "_pl_") but they
do not add a vowel sound.

As with other Indic scripts, the consonant <samp>"Ra"</samp> receives special
treatment; in many circumstances it is replaced by one of two combining
mark-like forms. 

  - A <samp>"Ra,Halant"</samp> sequence at the beginning of a syllable
    is replaced with an above-base mark called <samp>"Reph"</samp> (unless the <samp>"Ra"</samp>
    is the only consonant in the syllable). 
    This rule is synonymous with the `REPH_MODE_IMPLICIT`
    characteristic mentioned earlier.

  - <samp>"Halant,Ra"</samp> sequences that occur elsewhere in the syllable may take on the
    below-base form <samp>"Rakaar"</samp> .
	
	
In addition, <samp>"Rra,Halant"</samp> sequences that precede the base consonant or syllable base
may take on a form known as the <samp>"eyelash Ra"</samp> .

> Note: In `<dev2>` text runs, this substitution is canonically
> implemented as a [half form](#stage-3-step-9-half). See the [`<deva>`
> shaping](#the-deva-shaping-model) section for a discussion of the
> "eyelash Ra" implementation that was used in the `<deva>` model.

<samp>"Reph"</samp> and <samp>"Rakaar"</samp> characters must be reordered after the
syllable-identification stage is complete. 

> Note: Generally speaking, OpenType fonts will implement support for
> any below-base, post-base, and pre-base-reordering consonant forms
> by including the necessary substitution rules in their `blwf`,
> `pstf`, and `pref` lookups in <abbr title="Glyph Substitution table">GSUB</abbr>.
>
> Consequently, whenever shaping engines need to determine whether or 
> not a given consonant can take on such a special form, the most
> appropriate test is to check if the consonant is included in the
> relevant <abbr title="Glyph Substitution table">GSUB</abbr> lookup. Other implementations are possible, such as
> maintaining static tables of consonants, but checking for <abbr title="Glyph Substitution table">GSUB</abbr>
> support ensures that the expected behavior is implemented in the
> active font, and is therefore the most reliable approach.


In addition to valid syllables, standalone sequences may occur, such
as when an isolated codepoint is shown in example text.

> Note: Foreign loanwords, when written in the Devanagari script, may
> not adhere to the syllable-formation rules described above. In
> particular, it is not uncommon to encounter foreign loanwords that
> contain a word-final suffix of consonants.
>
> Nevertheless, such word-final suffixes will be correctly matched by
> the regular expressions listed below. These loanwords are pronounced
> different, which raises issues for potential readers, but the
> character sequences do not affect the shaping process.


Syllables should be identified by examining the run and matching
glyphs, based on their categorization, using regular expressions. 

The following general-purpose Indic-shaping regular expressions can be
used to match Devanagari syllables. 

The regular expressions utilize the shaping classes from the tables
above. For the purpose of syllable identification, more general
classes can be used, as defined in the following table. This
simplifies the resulting expressions. 

```markdown
_ra_		= The consonant "Ra" 
_consonant_	= ( `CONSONANT` | `CONSONANT_DEAD` ) - _ra_
_vowel_		= `VOWEL_INDEPENDENT`
_nukta_	  	= `NUKTA`
_halant_	= `VIRAMA`
_zwj_		= `JOINER`
_zwnj_		= `NON_JOINER`
_matra_		= `VOWEL_DEPENDENT` | `PURE_KILLER`
_syllablemodifier_	= `SYLLABLE_MODIFIER` | `BINDU` | `VISARGA` | `GEMINATION_MARK`
_vedicsign_	= `CANTILLATION`
_placeholder_	= `PLACEHOLDER` | `CONSONANT_PLACEHOLDER` | `NUMBER`
_dottedcircle_	= `DOTTED_CIRCLE`
_repha_		= `CONSONANT_PRE_REPHA`
_consonantmedial_	= `CONSONANT_MEDIAL`
_symbol_	= `SYMBOL` | `AVAGRAHA`
_consonantwithstacker_	= `CONSONANT_WITH_STACKER`
_other_		= `OTHER` | `MODIFYING_LETTER`
```


> Note: the _ra_ identification class is mutually exclusive with 
> the _consonant_ class. The union of the _consonant_ and _ra_ classes
> is used in the regular expression elements below in order to
> correctly identify <samp>"Ra"</samp> characters that do not trigger <samp>"Reph"</samp> or
> <samp>"Rakaar"</samp> shaping behavior.
>
> Note, also, that the cantillation mark "combining Ra" in the
> Devanagari Extended block does _not_ belong to the _ra_
> identification class, and that the other "combining consonant"
> cantillation marks in the Devanagari Extended block do not belong to
> the _consonant_ identification class.

> Note: The _placeholder_ identification class includes codepoints
> that are often used in place of vowels or consonants when a document
> needs to display a matra, mark, or special form in isolation or
> in another context beyond a standard syllable. Examples of
> _placeholder_ codepoints include hyphens and non-breaking
> spaces. Sequences that utilize this approach should be identified as
> "standalone" syllables.
>
> The _placeholder_ identification class also includes numerals, which
> are commonly used as word substitutes within normal text. Examples
> include ordinals (e.g., "4th").

> Note: The _other_ identification class includes codepoints that
> do not interact with adjacent characters for shaping purposes. Even
> though some of these codepoints (such as `MODIFYING_LETTER`) can
> occur within words, they evoke no behavior from the shaping
> engine and do not factor into the regular expressions that
> follow. Therefore, the shaping engine may choose to ignore them
> during syllable identification; they are listed here for completeness.

These identification classes form the bases of the following regular
expression elements:

```markdown
C	= _consonant_ | _ra_
Z	= _zwj_ | _zwnj_
REPH	= (_ra_ _halant_) | _repha_
CN		= C _zwj_? _nukta_?
FORCED_RAKAR	= _zwj_ _halant_ _zwj_ _ra_
S	= _symbol_ _nukta_?
MATRA_GROUP	= Z{0,3} _matra_ _nukta_? (_halant_ | FORCED_RAKAR)?
SYLLABLE_TAIL	= (Z? _syllablemodifier_ _syllablemodifier_? _zwnj_?)? _vedicsign_{0,3}
HALANT_GROUP	= Z? _halant_ (_zwj_ _nukta_?)?
FINAL_HALANT_GROUP	= HALANT_GROUP | (_halant_ _zwnj_)
MEDIAL_GROUP	= _consonantmedial_?
HALANT_OR_MATRA_GROUP	= FINAL_HALANT_GROUP | MATRA_GROUP*)
```

> Note: Practically speaking, shaping engines are highly unlikely to
> encounter more than 4 sequential `(MATRA_GROUP)`
> instances in any real-word syllables. Thus, implementations may
> choose to limit occurrences by limiting the above expressions to a
> finite length, such as `(MATRA_GROUP){0,4}` .


Using the above elements, the following regular expressions define the
possible syllable types:

A consonant-based syllable will match the expression:
```markdown
(_repha_|_consonantwithstacker_)? (CN HALANT_GROUP)* CN MEDIAL_GROUP HALANT_OR_MATRA_GROUP SYLLABLE_TAIL
```

> Note: Practically speaking, shaping engines are highly unlikely to
> encounter more than 4 sequential `(CN HALANT_GROUP)`
> instances in any real-word syllables. Thus, implementations may
> choose to limit occurrences by limiting the above expressions to a
> finite length, such as `(CN HALANT_GROUP){0,4}` .

A vowel-based syllable will match the expression:
```markdown
REPH? _vowel_ _nukta_? (_zwj_ | (HALANT_GROUP CN)* MEDIAL_GROUP HALANT_OR_MATRA_GROUP SYLLABLE_TAIL)
```

> Note: Practically speaking, shaping engines are highly unlikely to
> encounter more than 4 sequential `(HALANT_GROUP CN)`
> instances in any real-word syllables. Thus, implementations may
> choose to limit occurrences by limiting the above expressions to a
> finite length, such as `(HALANT_GROUP CN){0,4}` .

A standalone syllable will match the expression:
```markdown
((_repha_|_consonantwithstacker_)? _placeholder_ | REPH? _dottedcircle_) _nukta_? (HALANT_GROUP CN)* MEDIAL_GROUP HALANT_OR_MATRA_GROUP SYLLABLE_TAIL
```

> Note: Practically speaking, shaping engines are highly unlikely to
> encounter more than 4 sequential `(HALANT_GROUP CN)`
> instances in any real-word syllables. Thus, implementations may
> choose to limit occurrences by limiting the above expressions to a
> finite length, such as `(HALANT_GROUP CN){0,4}` .

> Note: Although they are labeled as "standalone syllables" here,
> many sequences that match the standalone regular expression above
> are instances where a document needs to display a matra, combining
> mark, or special form in isolation. Such sequences might not have
> any significance with regard to the definition of syllables used in
> the language or orthography of the text.

A symbol-based syllable will match the expression:
```markdown
S SYLLABLE_TAIL
```

A broken syllable will match the expression:
```markdown
REPH? _nukta_? (HALANT_GROUP CN)* MEDIAL_GROUP HALANT_OR_MATRA_GROUP SYLLABLE_TAIL
```

> Note: Practically speaking, shaping engines are highly unlikely to
> encounter more than 4 sequential `(HALANT_GROUP CN)`
> instances in any real-word syllables. Thus, implementations may
> choose to limit occurrences by limiting the above expressions to a
> finite length, such as `(HALANT_GROUP CN){0,4}` .

The primary problem involved in shaping broken syllables is the lack
of a syllable base (either a base consonant or an independent
vowel). Without a syllable base, the shaping engine cannot perform
<abbr title="Glyph Positioning table">GPOS</abbr> positioning and other contextual operations that are required
later in the shaping process.

To make up for this limitation, shaping engines should insert a
dotted-circle placeholder (`U+25CC`) character into the text stream
where the missing syllable base was expected to occur. This
placeholder allows the shaping process to proceed on a best-effort
basis at handling the broken-syllable sequence, but making guarantees
about the orthographic correctness or preferred appearance of the
final result is out of scope for this document.

Shaping engines can perform this dotted-circle insertion at any point
after the broken syllable has been recognized and before <abbr title="Glyph Substitution table">GSUB</abbr> features
are applied. However, the best results will likely be attained by
performing the insertion immediately, before proceeding to
stage 2. This will enable the maximum number of <abbr title="Glyph Substitution table">GSUB</abbr> and <abbr title="Glyph Positioning table">GPOS</abbr> features
in the active font to be correctly applied to the text run by ensuring
that all reordering, tagging, and sorting algorithms are executed as
usual.

> Note: In software stacks where other text-handling operations, such
> as Unicode normalization and localization, are performed before the
> text run is passed to the shaping engine, there is a potential for
> the dotted-circle insertion to cause unexpected effects.
>
> For example, if a `ccmp` or `locl` feature substitutes the default
> dotted-circle placeholder glyph with a variant glyph of a different
> size or weight for the (`U+25CC`) codepoint, then any shaping engine
> which relies on another software component to handle that
> functionality must take additional care to ensure consistency.


The expressions above use state-machine syntax from the Ragel
state-machine compiler. The operators represent:

```markdown
a* = zero or more copies of a
b+ = one or more copies of b
c? = optional instance of c
d{n} = exactly n copies of d
d{,n} = zero to n copies of d
d{n,} = n or more copies of d
d{n,m} = n to m copies of d
!e = not e
^f = character-level not f
g.h = concatenation of g and h
i|j = i or j
( ) = grouping of expression elements
```


After the syllables have been identified, each of the subsequent 
shaping stages occurs on a per-syllable basis.

### Stage 2: Initial reordering ###

The initial reordering stage is used to relocate glyphs from the
phonetic order in which they occur in a run of text to the
orthographic order in which they are presented visually.

> Note: Primarily, this means moving dependent-vowel (matra) glyphs, 
> <samp>"Ra,Halant"</samp> glyph sequences, and other consonants that take special
> treatment in some circumstances. <samp>"Ra"</samp> and <samp>"Rra"</samp> may
> take on special forms, depending on their position in the syllable.
>
> These reordering moves are mandatory. The final-reordering stage
> may make additional moves, depending on the text and on the features
> implemented in the active font.

The syllable should be processed by tagging each glyph with its
intended position based on its ordering category. After all glyphs
have been tagged, the entire syllable should be sorted in stable order,
so that glyphs of the same ordering category remain in the same
relative position with respect to each other.

The final sort order of the ordering categories should be:


	POS_RA_TO_BECOME_REPH
	POS_PREBASE_MATRA
	POS_PREBASE_CONSONANT

	POS_SYLLABLE_BASE
	POS_AFTER_MAIN

	POS_ABOVEBASE_CONSONANT

	POS_BEFORE_SUBJOINED
	POS_BELOWBASE_CONSONANT
	POS_AFTER_SUBJOINED

	POS_BEFORE_POST
	POS_POSTBASE_CONSONANT
	POS_AFTER_POST

	POS_FINAL_CONSONANT
	POS_SMVD


This sort order enumerates all of the possible final positions to
which a codepoint might be reordered, across all of the Indic
scripts. It includes some ordering categories not utilized in
Devanagari. 

The basic positions (left to right) are <samp>"Reph"</samp>
(`POS_RA_TO_BECOME_REPH`), dependent vowels (matras) and consonants
positioned before the base consonant or syllable base
(`POS_PREBASE_MATRA` and `POS_PREBASE_CONSONANT`), the base consonant
or syllable base (`POS_SYLLABLE_BASE`), above-base consonants
(`POS_ABOVEBASE_CONSONANT`), below-base consonants
(`POS_BELOWBASE_CONSONANT`), consonants positioned after the base
consonant or syllable base (`POS_POSTBASE_CONSONANT`), syllable-final
consonants (`POS_FINAL_CONSONANT`), and syllable-modifying or Vedic
signs (`POS_SMVD`).

In addition, several secondary positions are defined to handle various
reordering rules that deal with relative, rather than absolute,
positioning. `POS_AFTER_MAIN` means that a character must be
positioned immediately after the syllable base. `POS_BEFORE_SUBJOINED`
and `POS_AFTER_SUBJOINED` mean that a character must be positioned
before or after any below-base consonants, respectively. Similarly,
`POS_BEFORE_POST` and `POS_AFTER_POST` mean that a character must be
positioned before or after any post-base consonants, respectively. 

For shaping-engine implementers, the names used for the ordering
categories matter only in that they are unambiguous. 

For a definition of the "base" consonant, refer to step 2.1, which
follows.

#### Stage 2, step 1: Base consonant ####

The first step is to determine the base consonant of the syllable, if
there is one, and tag it as `POS_SYLLABLE_BASE`.

In a syllable that begins with an independent vowel, the independent
vowel will always serve as the syllable base, and it should be tagged
as `POS_SYLLABLE_BASE`. The shaping engine can then proceed to step 2.

In a standalone sequence or other syllable that begins with a placeholder
or dotted circle, the placeholder or dotted circle will always serve
as the syllable base, and it should be tagged as
`POS_SYLLABLE_BASE`. The shaping engine can then proceed to step 2.

In a syllable that begins with a consonant, the shaping engine must
determine the base consonant by a script-specific algorithm.

> Note: Shaping engines may choose to treat independent-vowel bases 
> like base consonants for the sake of simplicity or code
> reuse.
>
> However, implementations that take this approach should note
> that removing the distinction between base consonants and
> independent-vowel bases entirely may have unintended
> consequences. Making guarantees about the correctness of the results
> or about language-specific tests is out of scope for this document.

The base consonant is defined as the consonant in a consonant-based
syllable that carries the syllable's vowel sound. That vowel sound
will either be provided by the script's inherent vowel (in which case
it is not written with a separate character) or the sound will be designated
by the addition of a dependent-vowel (matra) sign.


<!--- > Because vowel-based syllables will not include consonants and
> because independent vowels do not take on special forms or require
> reordering, many of the steps that follow will involve no
> work for a vowel-based syllable. However, vowel-based syllables must
> still be sorted and their marks handled correctly, and <abbr title="Glyph Substitution table">GSUB</abbr> and <abbr title="Glyph Positioning table">GPOS</abbr>
> lookups must be applied. These steps of the shaping process follow
> the same rules that are employed for consonant-based syllables.
--->

While performing the base-consonant search, shaping engines may
also encounter special-form consonants, including below-base
consonants and post-base consonants. Each of these special-form
consonants must also be tagged (`POS_BELOWBASE_CONSONANT`,
`POS_POSTBASE_CONSONANT`, respectively). 

Any pre-base-reordering consonant (such as a pre-base-reordering <samp>"Ra"</samp>)
encountered during the base-consonant search must be tagged
`POS_POSTBASE_CONSONANT`. 
 
> Note: Shaping engines may choose any method to identify consonants that
> have below-base, post-base, or pre-base-reordering forms while
> executing the above algorithm. For example, one implementation may
> choose to maintain a static table of special-form consonants to
> compare against the text run. Another implementation might examine
> the active font to see if it includes a `blwf`, `pstf`, or `pref`
> lookup in the <abbr title="Glyph Substitution table">GSUB</abbr> table that affects the consonants encountered in
> the syllable.
>
> However, checking for <abbr title="Glyph Substitution table">GSUB</abbr> support ensures that the expected
> behavior is implemented in the active font, and is therefore the
> most reliable approach.


The algorithm for determining the base consonant is

  - If the syllable starts with <samp>"Ra,Halant"</samp> and the syllable contains
    more than one consonant, exclude the starting <samp>"Ra"</samp> from the list of
    consonants to be considered. 
  - Starting from the end of the syllable, move backwards until a consonant is found.
      * If the consonant is the first consonant, stop.
      * If the consonant is preceded by the sequence <samp>"Halant,ZWJ"</samp>, stop.
      * If the consonant has a below-base form, tag it as
        `POS_BELOWBASE_CONSONANT`, then move to the previous consonant. 
      * If the consonant has a post-base form, tag it as
        `POS_POSTBASE_CONSONANT`, then move to the previous consonant. 
      * If the consonant is a pre-base-reordering <samp>"Ra"</samp>, tag it as
        `POS_POSTBASE_CONSONANT`, then move to the previous consonant. 
      * If none of the above conditions is true, stop.
  - The consonant stopped at will be the base consonant.

> Note: The algorithm is designed to work for all Indic
> scripts. However, Devanagari does not utilize pre-base-reordering <samp>"Ra"</samp>.

Devanagari includes one below-base consonant form.

  - <samp>"Halant,Ra"</samp> (occurring after the syllable base) and <samp>"Ra,Halant"</samp>
    (before the syllable base, but in a non-syllable-initial
    position) will take on the <samp>"Rakaar"</samp> form. 
	
> Note: the sequence <samp>"Rra,Halant"</samp> (occurring before the base
> consonant) will take on the <samp>"eyelash Ra"</samp> special form. However, this
> special form is not a below-base form. Instead, it is canonically
> defined as belonging to the half-form substitutions, so it is
> addressed by the `half` feature in stage 3, step 9, and is not
> addressed in this step.

> Note: Because Devanagari employs the `BLWF_MODE_PRE_AND_POST` shaping
> characteristic, consonants with below-base special forms may occur
> before or after the syllable base. 
> 
> During the base-consonant search, only the <samp>"Halant,_consonant_"</samp> 
> pattern following the syllable base for these below-base forms will
> be encountered. Step 2.5 below ensures that the <samp>"_consonant_,Halant"</samp>
> pattern preceding the syllable base for these below-base forms will
> also be tagged correctly.


#### Stage 2, step 2: Matra decomposition ####

Second, any two-part dependent vowels (matras) must be decomposed
into their left-side and right-side components. 

Devanagari does not have any two-part dependent vowels; this step is
listed here because it is part of the general processing scheme for
shaping Indic scripts.

Because this decomposition is a character-level operation, the shaping
engine may choose to perform it earlier, such as during an initial
Unicode-normalization stage. However, all such decompositions must be
completed before the shaping engine begins step three, below.

#### Stage 2, step 3: Tag matras ####

Third, all left-side dependent-vowel (matra) signs must be tagged to be
moved to the beginning of the syllable, with `POS_PREBASE_MATRA`.

Above-base, right-side, and below-base dependent-vowel (matra) signs
must be tagged with `POS_AFTER_SUBJOINED`.

#### Stage 2, step 4: Adjacent marks ####

Fourth, any subsequences of marks that include a <samp>"Nukta"</samp> and a
<samp>"Halant"</samp> or Vedic sign must be reordered so that the <samp>"Nukta"</samp> appears
first.

This means that the subsequence <samp>"Halant,Nukta"</samp> is reordered to
<samp>"Nukta,Halant"</samp> and that the subsequence <samp>"_Vedic_sign_,Nukta"</samp> is
reordered to <samp>"Nukta,_Vedic_sign_"</samp>.

For subsequences of affected marks that are longer than two, the
reordering operation must be repeated until the <samp>"Nukta"</samp> is the first
character in the subsequence. No other marks in the subsequence
should be reordered.

This order is canonical in Unicode and is required so that
<samp>"_consonant_,Nukta"</samp> substitution rules from <abbr title="Glyph Substitution table">GSUB</abbr> will be correctly
matched later in the shaping process.

#### Stage 2, step 5: Pre-base consonants ####

Fifth, consonants that occur before the syllable base must be tagged
with `POS_PREBASE_CONSONANT`. Excluding initial <samp>"Ra,Halant"</samp> sequences
that will become <samp>"Reph"</samp>s: 

  - If the consonant has a below-base form, tag it as
          `POS_BELOWBASE_CONSONANT`. 
  - Otherwise, tag it as `POS_PREBASE_CONSONANT`.
  
> Note: Shaping engines may choose any method to identify consonants that
> have below-base, post-base, or pre-base-reordering forms while
> executing the above algorithm. For example, one implementation may
> choose to maintain a static table of special-form consonants to
> compare against the text run. Another implementation might examine
> the active font to see if it includes a `blwf`, `pstf`, or `pref`
> lookup in the <abbr title="Glyph Substitution table">GSUB</abbr> table that affects the consonants encountered in
> the syllable.
>
> However, checking for <abbr title="Glyph Substitution table">GSUB</abbr> support ensures that the expected
> behavior is implemented in the active font, and is therefore the
> most reliable approach.

Devanagari includes one below-base consonant form.

  - <samp>"Halant,Ra"</samp> (occurring after the syllable base) and <samp>"Ra,Halant"</samp>
    (before the syllable base, but in a non-syllable-initial
    position) will take on the <samp>"Rakaar"</samp> form. 
	
> Note: the sequence <samp>"Rra,Halant"</samp> (occurring before the base
> consonant) will take on the <samp>"eyelash Ra"</samp> special form. However, this
> special form is not a below-base form. Instead, it is canonically
> defined as belonging to the half-form substitutions, so it is
> addressed by the `half` feature in stage 3, step 9, and is not
> addressed in this step.

> Note: Because Devanagari employs the `BLWF_MODE_PRE_AND_POST` shaping
> characteristic, consonants with below-base special forms may occur
> before or after the syllable base. 
> 
> During the base-consonant search in 2.1, any instances of the
> <samp>"Halant,_consonant_"</samp>  pattern following the syllable base for these
> below-base forms will be encountered. The tagging in this step
> ensures that the <samp>"_consonant_,Halant"</samp> pattern preceding the syllable
> base for these below-base forms will also be tagged correctly.


#### Stage 2, step 6: Reph ####

Sixth, initial <samp>"Ra,Halant"</samp> sequences that will become <samp>"Reph"</samp>s must be tagged with
`POS_RA_TO_BECOME_REPH`.

> Note: an initial <samp>"Ra,Halant"</samp> sequence will always become a <samp>"Reph"</samp>
> unless the <samp>"Ra"</samp> is the only consonant in the syllable.

#### Stage 2, step 7: Final consonants ####

Seventh, all final consonants must be tagged. Consonants that occur
after the syllable base _and_ after a dependent vowel (matra) sign
must be tagged with  `POS_FINAL_CONSONANT`.

> Note: Final consonants occur only in Sinhala and should not be
> expected in `<dev2>` text runs. This step is included here to
> maintain compatibility across Indic scripts.


#### Stage 2, step 8: Mark tagging ####

Eighth, all marks must be tagged. 

> Note: In this step, joiner and non-joiner characters must also be
> tagged according to the same rules given for marks, even though
> these characters are not categorized as marks in Unicode.

Marks in the `BINDU`, `VISARGA`, `AVAGRAHA`, `CANTILLATION`,
`SYLLABLE_MODIFIER`, `GEMINATION_MARK`, and `SYMBOL` categories should
be tagged with `POS_SMVD`. 

All <samp>"Nukta"</samp>s must be tagged with the same positioning tag as the
preceding consonant, independent vowel, placeholder, or dotted circle.

All remaining marks (not in the `POS_SMVD` category and not <samp>"Nukta"</samp>s)
must be tagged with the same positioning tag as the closest non-mark
character the mark has affinity with, so that they move together 
during the sorting step.

There are two possible cases: those marks before the syllable base
and those marks after the syllable base. In addition, an exception is
made for <samp>"Halant"</samp> marks that follow a left-side (pre-base) matra.

  1. Initially, all remaining marks should be tagged with the same
	 positioning tag as the closest preceding consonant.

  2. For each consonant after the syllable base (such as post-base
	 consonants, below-base consonants, or final consonants), all
	 remaining marks located between that current consonant and any
	 previous consonant should be tagged with the same positioning tag as
	 the current (later) consonant.
  
     In other words, all consonants preceding the syllable base "own" the
	 marks that follow them, while all consonants after the syllable base
	 "own" the marks that come before them. When a syllable does not have
	 any consonants after the syllable base, the syllable base should
	 "own" all the marks that follow it.
  
  3. Finally, <samp>"Halant"</samp> marks that follow a left-side dependent vowel
     (matra) should _not_ be tagged with the left-side matra's
     positioning tag. Instead, the <samp>"Halant"</samp> should be tagged with the
     positioning tag of the non-mark character preceding the left-side
     matra. This prevents the <samp>"Halant"</samp> mark from being moved with the
     left-side matra when the syllable is sorted.


<!--- HarfBuzz also tags everything between a post-base consonant or -->
<!--matra and another post-base consonant as belonging to the latter -->
<!--post-base consonant. --->


#### Stage 2, step 9: Sort syllable ####

With these steps completed, the syllable can be sorted into the final
sort order as listed at the beginning of stage 2.

The glyphs in the syllable should be sorted in stable order,
so that glyphs of the same ordering category remain in the same
relative position with respect to each other.


#### Stage 2, step 10: Flag sequences for possible feature applications ####

With the initial reordering complete, those glyphs in the syllable that
may have <abbr title="Glyph Substitution table">GSUB</abbr> or <abbr title="Glyph Positioning table">GPOS</abbr> features applied in stages 3, 5, and 6 should be
flagged for each potential feature. 

This flagging is preliminary; the set of potential features varies
between different scripts and which features are supported varies
between fonts. It is also possible that the application of
one feature on a glyph sequence will perform a substitution that makes
a later feature no longer applicable to the updated sequence.

Consequently, the flagging must be completed before shaping proceeds
to the stages during which features are applied.

Some shaping features, such as `locl`, can potentially apply to any
glyphs. Therefore it is not necessary to maintain a separate flag for
these features in the bitmask (or other data structure) used to track
the flags -- although shaping engines may do so if desired.

The sequences to flag are summarized in the list below; a full
description of each feature's function and interpretation is provided
in <abbr title="Glyph Substitution table">GSUB</abbr> and <abbr title="Glyph Positioning table">GPOS</abbr> application stages that follow.

  - `nukt` should match <samp>"_Consonant_,Nukta"</samp> sequences
  - `akhn` should match <samp>"Ka,Halant,Ssa"</samp> and <samp>"Ja,Halant,Nya"</samp>
  - `rphf` should match initial <samp>"Ra,Halant"</samp> sequences but _not_ match
            initial <samp>"Ra,Halant,ZWJ"</samp> sequences
  - `rkrf` should match <samp>"_Consonant_,Halant,Ra"</samp> sequences
  - `blwf` should match <samp>"Halant,Ra"</samp> in post-base positions and
           <samp>"Ra,Halant"</samp> in non-initial pre-base positions 
  - `half` should match <samp>"_Consonant_,Halant"</samp> in pre-base position but
           _not_ match <samp>"Ra,Halant"</samp> sequences flagged for `rphf` and
           _not_ match <samp>"_Consonant_,Halant,ZWNJ,_Consonant_"</samp> sequences
  - `vatu` should match <samp>"_Consonant_,Halant,Ra"</samp> sequences
  - `cjct` should match <samp>"_Consonant_,Halant,_Consonant_"</samp> but _not_
            match <samp>"_Consonant_,Halant,ZWJ,_Consonant_"</samp> or
            <samp>"_Consonant_,Halant,ZWNJ,_Consonant_"</samp>


### Stage 3: Applying the basic substitution features from <abbr>GSUB</abbr> ###

The basic-substitution stage applies mandatory substitution features
using the rules in the font's <abbr title="Glyph Substitution table">GSUB</abbr> table. In preparation for this
stage, glyph sequences should be flagged for possible application 
of <abbr title="Glyph Substitution table">GSUB</abbr> features in stage 2, step 10.

The order in which these substitutions must be performed is fixed for
all Indic scripts:

	locl
	nukt
	akhn
	rphf 
	rkrf 
	pref (not used in Devanagari)
	blwf 
	abvf (not used in Devanagari)
	half
	pstf
	vatu
	cjct
	cfar (not used in Devanagari)

#### Stage 3, step 1: locl ####

The `locl` feature replaces default glyphs with any language-specific
variants, based on examining the language setting of the text run.

> Note: Strictly speaking, the use of localized-form substitutions is
> not part of the shaping process, but of the localization process,
> and could take place at an earlier point while handling the text
> run. However, shaping engines are expected to complete the
> application of the `locl` feature before applying the subsequent
> <abbr title="Glyph Substitution table">GSUB</abbr> substitutions in the following steps.

#### Stage 3, step 2: nukt ####

The `nukt` feature replaces <samp>"_Consonant_,Nukta"</samp> sequences with a
precomposed nukta-variant of the consonant glyph. 

  - The context defined for a `nukt` feature is:

:::{table} `nukt` feature context

| Backtrack     | Matching sequence             | Lookahead     |
|:--------------|:------------------------------|:--------------|
| _none_        | `_consonant_`(full),`_nukta_` | _none_        |
:::


:::{figure-md}
![Nukta composition](/images/devanagari/devanagari-nukt.svg "Nukta composition"){.shaping-demo .inline-svg .greyscale-svg #devanagari-nukt}

Nukta composition
:::

```{svg-color-toggle-button} devanagari-nukt
```


#### Stage 3, step 3: akhn ####

The `akhn` feature replaces two specific sequences with required ligatures. 

  - <samp>"Ka,Halant,Ssa"</samp> is substituted with the <samp>"KSsa"</samp> ligature. 
  - <samp>"Ja,Halant,Nya"</samp> is substituted with the <samp>"JNya"</samp> ligature. 
  
These sequences can occur anywhere in a syllable. The <samp>"KSsa"</samp> and
<samp>"JNya"</samp> characters have orthographic status equivalent to full
consonants in some languages, and fonts may have `cjct` substitution
rules designed to match them in subsequences. Therefore, this
feature must be applied before all other many-to-one substitutions.

  - The context defined for an `akhn` feature is:

:::{table} `akhn` feature context
    
| Backtrack     | Matching sequence           | Lookahead     |
|:--------------|:----------------------------|:--------------|
| _none_        | `AKHAND_CONSONANT_SEQUENCE` | _none_        |
:::


:::{figure-md}
![KSsa ligation](/images/devanagari/devanagari-akhn-kssa.svg "KSsa ligation"){.shaping-demo .inline-svg .greyscale-svg #devanagari-akhn-kssa}

KSsa ligation
:::

```{svg-color-toggle-button} devanagari-akhn-kssa
```


:::{figure-md}
![JNya ligation](/images/devanagari/devanagari-akhn-jnya.svg "JNya ligation"){.shaping-demo .inline-svg .greyscale-svg #devanagari-akhn-jnya}

JNya ligation
:::

```{svg-color-toggle-button} devanagari-akhn-jnya
```


#### Stage 3, step 4: rphf ####

The `rphf` feature replaces initial <samp>"Ra,Halant"</samp> sequences with the
<samp>"Reph"</samp> glyph.

  - An initial <samp>"Ra,Halant,ZWJ"</samp> sequence, however, must not be flagged for
    the `rphf` substitution.
	

  - The context defined for a `rphf` feature is:

:::{table} `rphf` feature context
    
| Backtrack        | Matching sequence       | Lookahead     |
|:-----------------|:------------------------|:--------------|
| `SYLLABLE_START` | "Ra"(full),`_halant_`   | _none_        |
:::


:::{figure-md}
![Reph composition](/images/devanagari/devanagari-rphf.svg "Reph composition"){.shaping-demo .inline-svg .greyscale-svg #devanagari-rphf}

Reph composition
:::

```{svg-color-toggle-button} devanagari-rphf
```

	
#### Stage 3, step 5: rkrf ####

The `rkrf` feature replaces <samp>"_Consonant_,Halant,Ra"</samp> sequences with the
<samp>"Rakaar"</samp>-ligature form of the consonant glyph.


  - The context defined for a `rkrf` feature is:

:::{table} `rkrf` feature context
    
| Backtrack           | Matching sequence     | Lookahead     |
|:--------------------|:----------------------|:--------------|
| `_consonant_`(full) | `_halant_`,"Ra"(full) | _none_        |
:::


:::{figure-md}
![Rakaar composition](/images/devanagari/devanagari-rkrf.svg "Rakaar composition"){.shaping-demo .inline-svg .greyscale-svg #devanagari-rkrf}

Rakaar composition
:::

```{svg-color-toggle-button} devanagari-rkrf
```


#### Stage 3, step 6: pref ####

> This feature is not used in Devanagari.

<!--- 3.5: The `pref` feature replaces pre-base-consonant glyphs with -->
<!--any special forms. --->

#### Stage 3, step 7: blwf ####

The `blwf` feature replaces below-base-consonant glyphs with any
special forms. Devanagari includes one below-base consonant
form:

  - <samp>"Halant,Ra"</samp> (occurring after the syllable base) or <samp>"Ra,Halant"</samp>
    (before the syllable base, but in a non-syllable-initial position) will
    take on the <samp>"Rakaar"</samp> form.
	
If the active font contains ligatures for the consonant adjacent to
the <samp>"Halant"</samp> (i.e., <samp>"_Consonant_,Halant,Ra"</samp>), then that ligature is
normally applied with the `rkrf` feature in step 3.5. The `blwf`
feature allows the <samp>"Ra"</samp> to be substituted with a standalone <samp>"Rakaar"</samp>
mark, to work with all consonants that do not have a `rkrf` ligature
in the font.

Because Devanagari incorporates the `BLWF_MODE_PRE_AND_POST` shaping
characteristic, any pre-base consonants and any post-base consonants
may potentially match a `blwf` substitution; therefore, both cases must
be flagged for comparison. Note that this is not necessarily the case in other
Indic scripts that use a different `BLWF_MODE_` shaping
characteristic. 

:::{figure-md}
![Below-base form](/images/devanagari/devanagari-blwf.svg "Below-base form"){.shaping-demo .inline-svg .greyscale-svg #devanagari-blwf}

Below-base form
:::

```{svg-color-toggle-button} devanagari-blwf
```


#### Stage 3, step 8: abvf ####

> This feature is not used in Devanagari.

#### Stage 3, step 9: half ####

The `half` feature replaces <samp>"_Consonant_,Halant"</samp> sequences before the
base consonant or syllable base with "half forms" of the consonant
glyphs.

In the most common case, this substitution applies to
<samp>"_Consonant_,Halant"</samp> sequences that are followed by another
<samp>"_Consonant_"</samp>.

In addition, a sequence matching <samp>"_Consonant_,Halant,ZWJ"</samp> must also be
flagged for potential `half` substitutions.

> Note: The presence of the <samp>"ZWJ"</samp> at the end of the sequence means
> that the sequence may match the regular-expression test in stage 1
> as the end of a syllable, even without being followed by a base
> consonant or syllable base.
>
> The fact that the regular-expression tests identify a syllable break
> after the <samp>"_Consonant_,Halant,ZWJ"</samp> is a byproduct of OpenType
> shaping and Unicode encoding, however, and might not have any
> significance with regard to the definition of syllables used in the
> language or orthography of the text.

There are three exceptions to the default behavior, for which
the shaping engine must test:

  - Initial <samp>"Ra,Halant"</samp> sequences, which should have been flagged for
    the `rphf` feature earlier, must not be flagged for potential
    `half` substitutions.

  - Non-initial <samp>"Ra,Halant"</samp> sequences, which should have been flagged
    for the `rkrf` or `blwf` features earlier, must not be flagged for
    potential `half` substitutions.

  - A sequence matching <samp>"_Consonant_,Halant,ZWNJ,_Consonant_"</samp> must not be
    flagged for potential `half` substitutions.

:::{figure-md}
![Half-form formation](/images/devanagari/devanagari-half.svg "Half-form formation"){.shaping-demo .inline-svg .greyscale-svg #devanagari-half}

Half-form formation
:::

```{svg-color-toggle-button} devanagari-half
```


In addition, the sequence <samp>"Rra,Halant"</samp> (occurring before the base
consonant or syllable base) will take on the <samp>"eyelash Ra"</samp> form. Because this
substitution is defined as the canonical half form of <samp>"Rra"</samp> in `<dev2>`, the
shaping engine does not need to implement any special handling to
support it. 

:::{figure-md}
![Eyelash Ra formation](/images/devanagari/devanagari-eyelash-ra.svg "Eyelash Ra formation"){.shaping-demo .inline-svg .greyscale-svg #devanagari-eyelash-ra}

Eyelash Ra formation
:::

```{svg-color-toggle-button} devanagari-eyelash-ra
```


#### Stage 3, step 10: pstf ####

> This feature is not used in Devanagari.


#### Stage 3, step 11: vatu ####

The `vatu` feature replaces certain sequences with "Vattu variant"
forms. 

"Vattu variants" are formed from glyphs followed by <samp>"Rakaar"</samp>
(the below-base form of <samp>"Ra"</samp>); therefore, this feature must be applied after
the `blwf` feature.

:::{figure-md}
![Vattu ligation](/images/devanagari/devanagari-vatu.svg "Vattu ligation"){.shaping-demo .inline-svg .greyscale-svg #devanagari-vatu}

Vattu ligation
:::

```{svg-color-toggle-button} devanagari-vatu
```


#### Stage 3, step 12: cjct ####

The `cjct` feature replaces sequences of adjacent consonants with
conjunct ligatures. These sequences must match <samp>"_Consonant_,Halant,_Consonant_"</samp>.

A sequence matching <samp>"_Consonant_,Halant,ZWJ,_Consonant_"</samp> or
<samp>"_Consonant_,Halant,ZWNJ,_Consonant_"</samp> must not be flagged to form a conjunct.

> Note: The presence of the <samp>"ZWJ"</samp> in a
> <samp>"_Consonant_,Halant,ZWJ,_Consonant_"</samp> sequence should automatically
> inhibit any `cjct` feature rules from matching the sequence as valid
> input, and thus prevent the `cjct` substitution from being applied.

> Note: The presence of the <samp>"ZWNJ"</samp> in a
> <samp>"_Consonant_,Halant,ZWNJ,_Consonant_"</samp> sequence means that the
> <samp>"_Consonant_,Halant,ZWNJ"</samp> subsequence will match the
> regular-expression test in stage 1 as the end of a syllable.
> 
> Because OpenType shaping features in `<dev2>` are defined as
> applying only within an individual syllable, this means that the
> presence of the <samp>"ZWNJ"</samp> will automatically prevent the application of
> a `cjct` feature by triggering the identification of a syllable
> break between the two consonants.
>
> The fact that the regular-expression tests identify a syllable break
> after the <samp>"_Consonant_,Halant,ZWNJ"</samp> is a byproduct of OpenType
> shaping and Unicode encoding, however, and might not have any
> significance with regard to the definition of syllables used in the
> language or orthography of the text.
>
> Note, also: The presence of the <samp>"ZWJ"</samp> means that a
> <samp>"_Consonant_,Halant,ZWJ"</samp> sequence may match the regular-expression
> test in stage 1 as the end of a syllable, even without being
> followed by a base consonant or syllable base. By definition,
> however, a <samp>"_Consonant_,Halant,ZWJ"</samp> syllable identified in stage 1
> cannot also include a <samp>"_Consonant_"</samp> after the <abbr title="Zero-Width Joiner">ZWJ</abbr>.

The font's <abbr title="Glyph Substitution table">GSUB</abbr> rules might be implemented so that `cjct`
substitutions apply to half-form consonants; therefore, this feature
must be applied after the `half` feature. 

:::{figure-md}
![Conjunct ligation](/images/devanagari/devanagari-cjct.svg "Conjunct ligation"){.shaping-demo .inline-svg .greyscale-svg #devanagari-cjct}

Conjunct ligation
:::

```{svg-color-toggle-button} devanagari-cjct
```


#### Stage 3, step 13: cfar ####

> This feature is not used in Devanagari.


### Stage 4: Final reordering ###

The final reordering stage repositions marks, dependent-vowel (matra)
signs, and <samp>"Reph"</samp> glyphs to the appropriate location with respect to
the base consonant or syllable base. Because multiple substitutions
may have occurred during the application of the basic-shaping features
in the preceding stage, these repositioning moves could not be
performed during the initial reordering stage.

Like the initial reordering stage, the steps involved in this stage
occur on a per-syllable basis.

<!--- Check that classifications have not been mangled. If the -->
<!--character is a Halant AND a ligature was formed AND a multiple
substitution was performed, restore the classification to VIRAMA
because it was almost certainly lost in the preceding <abbr title="Glyph Substitution table">GSUB</abbr> stage.
--->

#### Stage 4, step 1: Base consonant ####

The final reordering stage, like the initial reordering stage, begins
with determining the syllable base of each syllable, following the
same algorithm used in stage 2, step 1.

In a syllable that begins with an independent vowel, the independent
vowel will always serve as the syllable base. In a standalone sequence or
other syllable that begins with a placeholder or a dotted circle, the
placeholder or dotted circle will always serve as the syllable base.

In a syllable that begins with a consonant, the shaping engine must
repeat the base-consonant search algorithm used in stage 2, step 1.

The codepoint of the underlying base consonant or syllable base will
not change between the search performed in stage 2, step 1, and the
search repeated here. However, the application of <abbr title="Glyph Substitution table">GSUB</abbr> shaping
features in stage 3 means that several ligation and many-to-one
substitutions may have taken place. The final glyph produced by that
process may, therefore, be a conjunct or ligature form — in most
cases, such a glyph will not have an assigned Unicode codepoint.
   
#### Stage 4, step 2: Pre-base matras ####

Pre-base dependent vowels (matras) that were reordered during the
initial reordering stage must be moved to their final position. This
position is defined as:
   
   - after the last standalone <samp>"Halant"</samp> glyph that comes after the
     matra's starting position and also comes before the main
     consonant.
   - If a zero-width joiner follows this last standalone <samp>"Halant"</samp>, the
     final matra position is moved to after the joiner.

This means that the matra will move to the right of all explicit
<samp>"consonant,Halant"</samp> subsequences, but will stop to the left of the base
consonant or syllable base, all conjuncts or ligatures that contain
the base consonant or syllable base, and all half forms.

:::{figure-md}
![Pre-base matra positioning](/images/devanagari/devanagari-matra-position.svg "Pre-base matra positioning"){.shaping-demo .inline-svg .greyscale-svg #devanagari-matra-position}

Pre-base matra positioning
:::

```{svg-color-toggle-button} devanagari-matra-position
```


> Note: OpenType and Unicode both state that if the syllable includes
> a <abbr title="Zero-Width Joiner">ZWJ</abbr> immediately after the last <samp>"Halant"</samp>, then the final matra
> position should be after the <abbr title="Zero-Width Joiner">ZWJ</abbr>.
>
> However, there are several test sequences indicating that
> Microsoft's Uniscribe shaping engine did not follow this rule (in,
> at least, Devanagari and Bengali text), and in these circumstances
> Uniscribe instead makes the final matra position before the final
> <samp>"Consonant,Halant,ZWJ"</samp>.
>
> Subsequently, the HarfBuzz shaping engine has also followed the same
> pattern. If other shaping engine implementations prefer to maintain
> maximum compatibility with Uniscribe and HarfBuzz, then they should
> also follow suit.

> Note: The Microsoft script-development specifications for OpenType
> shaping also state that if a zero-width non-joiner follows the last
> standalone <samp>"Halant"</samp>, the final matra position is moved to after the
> non-joiner. However, it is unnecessary to test for this condition,
> because a <samp>"Halant,ZWNJ"</samp> subsequence is, by definition, the end of a
> syllable. Consequently, a <samp>"Halant,ZWNJ"</samp> cannot be followed by a
> pre-base dependent vowel.


#### Stage 4, step 3: Reph ####

<samp>"Reph"</samp> must be moved from the beginning of the syllable to its final
position. Because Devanagari incorporates the `REPH_POS_BEFORE_POST`
shaping characteristic, this final position is defined to be
immediately before any independent post-base consonant forms (meaning
the first post-base consonant that has not formed a ligature with the
syllable base).

The algorithm for finding the final <samp>"Reph"</samp> position is

<!---

  - Find the first explicit <samp>"Halant"</samp> between the first post-Reph
    consonant and the last main consonant. Move the <samp>"Reph"</samp> to the
    position immediately after this <samp>"Halant"</samp>.
	- If a zero-width joiner (<abbr title="Zero-Width Joiner">ZWJ</abbr>) or a zero-width non-joiner (<abbr title="Zero-Width Non Joiner">ZWNJ</abbr>)
      follows this <samp>"Halant"</samp>, move the <samp>"Reph"</samp> to the position
      immediately after the <abbr title="Zero-Width Joiner">ZWJ</abbr> or <abbr title="Zero-Width Non Joiner">ZWNJ</abbr>.
--->

  - Starting at the first post-<samp>"Reph"</samp> consonant, search forward looking
    for the first explicit <samp>"Halant"</samp>, ending the search when the base
    consonant is encountered. If such an explicit <samp>"Halant"</samp> is found,
    move the <samp>"Reph"</samp> to the position immediately after this
    <samp>"Halant"</samp>.
	  * If a zero-width joiner (<abbr>ZWJ</abbr>) or a zero-width non-joiner (<abbr>ZWNJ</abbr>)
        follows this <samp>"Halant"</samp>, move the <samp>"Reph"</samp> to the position
        immediately after the <abbr title="Zero-Width Joiner">ZWJ</abbr> or <abbr title="Zero-Width Non Joiner">ZWNJ</abbr>. This will be the final
        <samp>"Reph"</samp> position. 
	  * If no <abbr title="Zero-Width Joiner">ZWJ</abbr> or <abbr title="Zero-Width Non Joiner">ZWNJ</abbr> follows this <samp>"Halant"</samp>, leave the <samp>"Reph"</samp> in
        its position immediately after the <samp>"Halant"</samp>. This will be the
        final <samp>"Reph"</samp> position. 
  - If no such explicit <samp>"Halant"</samp> is found in the previous step, find
    the first post-base consonant that has not formed a ligature with
    the base consonant. If such a non-ligated post-base consonant is
    found, move the <samp>"Reph"</samp> to the position immediately before the
    non-ligated post-base consonant. This will be the final <samp>"Reph"</samp>
    position.
  - If no such non-ligated post-base consonant is found in the
    previous step, move the <samp>"Reph"</samp> to the position immediately before
    the first post-base matra, syllable modifier, or Vedic sign that
    has a positioning tag after the script's <samp>"Reph"</samp> position in the
    syllable sort order (as listed in [stage
    2](#stage-2-initial-reordering)). This will be the final <samp>"Reph"</samp>
    position. 
	> Note: Because Devanagari incorporates the
    > `REPH_POS_BEFORE_POST` shaping characteristic, this means
    > any positioning tag of `POS_POSTBASE_CONSONANT` or later,
    > although a post-base matra, syllable modifier, or Vedic sign
    > would not typically be tagged with `POS_POSTBASE_CONSONANT`.
  - If no other location has been located in the previous steps, move
    the <samp>"Reph"</samp> to the end of the syllable.


Finally, if the final position of <samp>"Reph"</samp> occurs after a
<samp>"_matra_,Halant"</samp> subsequence, then <samp>"Reph"</samp> must be repositioned to the
left of <samp>"Halant"</samp>, to allow for potential matching with `abvs` or
`psts` substitutions from <abbr title="Glyph Substitution table">GSUB</abbr>.


:::{figure-md}
![Reph positioning](/images/devanagari/devanagari-reph-position.svg "Reph positioning"){.shaping-demo .inline-svg .greyscale-svg #devanagari-reph-position}

Reph positioning
:::

```{svg-color-toggle-button} devanagari-reph-position
```


#### Stage 4, step 4: Pre-base-reordering consonants ####

Any pre-base-reordering consonants must be moved to immediately before
the base consonant or syllable base.
  
  
#### Stage 4, step 5: Initial matras ####

Any left-side dependent vowels (matras) that are at the start of a
word must be flagged for potential substitution by the `init` feature
of <abbr title="Glyph Substitution table">GSUB</abbr>.

Devanagari does not use the `init` feature, so this step will
involve no work when processing `<dev2>` text. It is included here in
order to maintain compatibility with the other Indic scripts.


### Stage 5: Applying all remaining substitution features from <abbr>GSUB</abbr> ###

In this stage, the remaining substitution features from the <abbr title="Glyph Substitution table">GSUB</abbr> table
are applied. In preparation for this stage, glyph sequences should be
flagged for possible application of <abbr title="Glyph Substitution table">GSUB</abbr> features in stage 2,
step 10.

The order in which these features are applied is not canonical; they
should be applied in the order in which they appear in the <abbr title="Glyph Substitution table">GSUB</abbr> table
in the font.

	init (not used in Devanagari)
	pres
	abvs
	blws
	psts
	haln

The `init` feature is not used in Devanagari.

The `pres` feature replaces pre-base-consonant glyphs with special
presentations forms. This can include consonant conjuncts, half-form
consonants, and stylistic variants of left-side dependent vowels
(matras). 

:::{figure-md}
![Pre-base substitution](/images/devanagari/devanagari-pres.svg "Pre-base substitution"){.shaping-demo .inline-svg .greyscale-svg #devanagari-pres}

Pre-base substitution
:::

```{svg-color-toggle-button} devanagari-pres
```


The `abvs` feature replaces above-base-consonant glyphs with special
presentation forms. This usually includes contextual variants of
above-base marks or contextually appropriate mark-and-base ligatures.

:::{figure-md}
![Above-base substitution](/images/devanagari/devanagari-abvs.svg "Above-base substitution"){.shaping-demo .inline-svg .greyscale-svg #devanagari-abvs}

Above-base substitution
:::

```{svg-color-toggle-button} devanagari-abvs
```


The `blws` feature replaces below-base-consonant glyphs with special
presentation forms. This usually includes replacing base consonants or syllable bases that
are adjacent to the below-base-consonant form <samp>"Rakaar"</samp> with contextual
ligatures.

:::{figure-md}
![Below-base substitution](/images/devanagari/devanagari-blws.svg "Below-base substitution"){.shaping-demo .inline-svg .greyscale-svg #devanagari-blws}

Below-base substitution
:::

```{svg-color-toggle-button} devanagari-blws
```


The `psts` feature replaces post-base-consonant glyphs with special
presentation forms. This usually includes replacing right-side
dependent vowels (matras) with stylistic variants or replacing
post-base-consonant/matra pairs with contextual ligatures. 

:::{figure-md}
![Post-base substitution](/images/devanagari/devanagari-psts.svg "Post-base substitution"){.shaping-demo .inline-svg .greyscale-svg #devanagari-psts}

Post-base substitution
:::

```{svg-color-toggle-button} devanagari-psts
```


The `haln` feature replaces syllable-final <samp>"_Consonant_,Halant"</samp> pairs with
special presentation forms. This can include stylistic variants of the
consonant where placing the <samp>"Halant"</samp> mark on its own is
typographically problematic. 

:::{figure-md}
![Halant substitution](/images/devanagari/devanagari-haln.svg "Halant substitution"){.shaping-demo .inline-svg .greyscale-svg #devanagari-haln}

Halant substitution
:::

```{svg-color-toggle-button} devanagari-haln
```


> Note: The `calt` feature, which allows for generalized application
> of contextual alternate substitutions, is usually applied at this
> point. However, `calt` is not mandatory for correct Devanagari shaping
> and may be disabled in the application by user preference.

### Stage 6: Applying remaining positioning features from <abbr>GPOS</abbr> ###

In this stage, mark positioning, kerning, and other <abbr title="Glyph Positioning table">GPOS</abbr> features are
applied.

As with the preceding stage, the order in which these features are
applied is not canonical; they should be applied in the order in which
they appear in the <abbr title="Glyph Positioning table">GPOS</abbr> table in the font.

        dist
        abvm
        blwm

> Note: The `kern` feature is usually applied at this stage, if it is
> present in the font. However, `kern` (like `calt`, above) is not
> mandatory for shaping Devanagari text and may be disabled by user preference.

The `dist` feature adjusts the horizontal positioning of
glyphs. Unlike `kern`, adjustments made with `dist` do not require the
application or the user to enable any software _kerning_ features, if
such features are optional. 

The `abvm` feature positions above-base marks for attachment to base
characters. In Devanagari, this includes <samp>"Reph"</samp> in addition to
above-base dependent vowels (matras), diacritical marks, and Vedic signs. 

:::{figure-md}
![Above-base mark positioning](/images/devanagari/devanagari-abvm.svg "Above-base mark positioning"){.shaping-demo .inline-svg .greyscale-svg #devanagari-abvm}

Above-base mark positioning
:::

```{svg-color-toggle-button} devanagari-abvm
```


The `blwm` feature positions below-base marks for attachment to base
characters. In Devanagari, this includes below-base dependent vowels
(matras) and diacritical marks as well as the below-base consonant form <samp>"Rakaar"</samp>.

:::{figure-md}
![Below-base mark positioning](/images/devanagari/devanagari-blwm.svg "Below-base mark positioning"){.shaping-demo .inline-svg .greyscale-svg #devanagari-blwm}

Below-base mark positioning
:::

```{svg-color-toggle-button} devanagari-blwm
```


## The `<deva>` shaping model ##

The older Devanagari script tag, `<deva>`, has been deprecated. However,
shaping engines may still encounter fonts that were built to work with
`<deva>` and some users may still have documents that were written to
take advantage of `<deva>` shaping.

### Distinctions from `<dev2>` ###

The most significant distinction between the shaping models is that the
sequence of <samp>"Halant"</samp> and consonant glyphs used to trigger shaping
features was altered when migrating from `<deva>` to
`<dev2>`. 


Specifically, shaping engines were expected to reorder post-base
<samp>"Halant,_Consonant_"</samp> sequences to <samp>"_Consonant_,Halant"</samp>.

As a result, a font's <abbr title="Glyph Substitution table">GSUB</abbr> substitutions would be written to match
<samp>"_Consonant_,Halant"</samp> sequences in all pre-base and post-base positions.


The `<deva>` syllable

	Pre-baseC Halant BaseC Halant Post-baseC

would be reordered to

	Pre-baseC Halant BaseC Post-baseC Halant

before features are applied.

In `<dev2>` text, as described above in this document, there is no
such reordering. The correct sequence to match for <abbr title="Glyph Substitution table">GSUB</abbr> substitutions is
<samp>"_Consonant_,Halant"</samp> for pre-base consonants, but <samp>"Halant,_Consonant_"</samp>
for post-base consonants.

The old Indic shaping model also did not recognize the
`BLWF_MODE_PRE_AND_POST` shaping characteristic. Instead, `<deva>`
was treated as if it followed the `BLWF_MODE_POST_ONLY`
characteristic — with a single exception made for non-syllable-initial
<samp>"Ra,Halant"</samp>.

In other words, a non-syllable-initial <samp>"Ra,Halant"</samp> sequence would
trigger a below-base form substitution, but all other below-base form
substitutions were applied only to consonants after the base
consonant or syllable base.

In addition, for some scripts, left-side dependent vowel marks
(matras) were not repositioned during the final reordering
stage. For `<deva>` text, the left-side matra was always positioned
at the beginning of the syllable.

Finally, in `<deva>` text, the <samp>"eyelash Ra"</samp> form was encoded as the
sequence <samp>"Ra,Halant,ZWJ"</samp>. 

In `<dev2>`, the required encoding for <samp>"eyelash Ra"</samp> is now
<samp>"Rra,Halant"</samp>, and the substitution is implemented using the `half`
feature of <abbr title="Glyph Substitution table">GSUB</abbr>.


### Advice for handling fonts with `<deva>` features only ###

Shaping engines may choose to match post-base <samp>"_Consonant_,Halant"</samp>
sequences in order to apply <abbr title="Glyph Substitution table">GSUB</abbr> substitutions when it is known that
the font in use supports only the `<deva>` shaping model.

### Advice for handling text runs composed in `<deva>` format ###

Shaping engines may choose to match post-base <samp>"_Consonant_,Halant"</samp>
sequences for <abbr title="Glyph Substitution table">GSUB</abbr> substitutions or to reorder them to
<samp>"Halant,_Consonant_"</samp> when processing text runs that are tagged with
the `<deva>` script tag and it is known that the font in use supports
only the `<dev2>` shaping model.

Shaping engines may also choose to apply `blwf` substitutions to
below-base consonants occurring before the base consonant or syllable base when it is
known that the font in use supports an applicable substitution lookup.

Shaping engines may also choose to position left-side matras according
to the `<deva>` ordering scheme; however, doing so might interfere
with matching <abbr title="Glyph Substitution table">GSUB</abbr> or <abbr title="Glyph Positioning table">GPOS</abbr> features.


================================================
FILE: opentype-shaping-emoji.md
================================================
# Emoji shaping in OpenType #

This document details the default shaping procedure needed to shape
emoji sequences.


**Contents**

  - [General information](#general-information)
  - [Terminology](#terminology)
  - [Normalization](#normalization)
  - [Bidirectionality](#bidirectionality)
  - [Sequence identification](#sequence-identification)
    - [Presentation sequences](#presentation-sequences)
    - [Modifier sequences](#modifier-sequences)
    - [Regional Indicator flag sequences](#regional-indicator-flag-sequences)
    - [Tag flag sequences](#tag-flag-sequences)
    - [Keycap sequences](#keycap-sequences)
    - [Zero-Width Joiner sequences](#zwj-sequences)
      - [<abbr>ZWJ</abbr> hair sequences](#zwj-hair-sequences)
      - [<abbr>ZWJ</abbr> gendered person sequences](#zwj-gendered-person-sequences)
      - [<abbr>ZWJ</abbr> multi-person group sequences](#zwj-multi-person-group-sequences)
      - [<abbr>ZWJ</abbr> role sequences](#zwj-role-sequences)
      - [<abbr>ZWJ</abbr> color sequences](#zwj-color-sequences)
      - [<abbr>ZWJ</abbr> directionality sequences](#zwj-directionality-sequences)
      - [<abbr>ZWJ</abbr> additional sequences](#zwj-additional-sequences)
    - [Other sequences and ligatures](#other-sequences-and-ligatures)
  - [Feature interaction in sequences](#feature-interaction-in-sequences)
  - [Emoji sets](#emoji-sets)
  - [The default shaping model](#the-default-shaping-model)
  
  
## General information ##

The emoji OpenType shaping model is used for correctly displaying
sequences from the Emoji block in Unicode as well as for numerous
emoji codepoints found in other blocks.

Emoji codepoints originated from a variety of pre-Unicode standards,
including mobile-phone carriers in Japan, from typographic characters
sets such as Zapf Dingbats and Wingdings, and from various symbols in
common usage.

Emoji shaping follows the default OpenType shaping model used for
scripts that are considered _non-complex_ from the shaper's
perspective. However, emoji fonts typically use <abbr title="Glyph Substitution table">GSUB</abbr> tables to
implement a variety of OpenType smart features, including several
classes of ligature, contextual alternates, or variant forms to
support emoji sequences.

In addition to standalone image glyphs, emoji shaping is also used to
display flag sequences and "keycap" sequences, both of which involve a
combination of emoji and non-emoji codepoints in order.

The default emoji glyph for a given codepoint may be substituted by
the addition of selectors, modifiers, or joiners after the emoji
codepoint.

Many of these emoji sequences carry important semantic meaning,
such as specifying gender, skin tone, object colors, and
directions. Shaping engines should therefore make a best effort to
correctly identify and display these sequences.

Fallback presentation is possible for some emoji sequences by
displaying the sequence of default emoji glyphs for the
codepoints. For other emoji sequences, however, the most appropriate
fallback approach is less clearly defined and may vary between
implementations.

Emoji glyphs may be stored in any of several color formats, or in any
of the monochrome Bézier formats typically used for standard text
codepoints. Correctly retrieving and displaying the glyph data for
the format used by the active font is outside the scope of this
document.

> Note: "shortcut codes" for emoji like `:smile:` are text mark-up
> and are _not_ handled by OpenType shaping. The set of shortcut codes
> supported by any particular application is specific to that
> application alone.
>
> Text-processing stacks typically support a set of shortcut codes
> that includes Unicode's official `Short_Name` property from the <abbr title="Common Locale Data Repository">CLDR</abbr>
> database, plus additional short codes, but the shortcut-code mapping
> is not otherwise linked to Unicode data.

Runs of emoji might be tagged with the `<Zsye>` or `<Zsym>` script
subtags, or with the `-em-emoji`, `-em-text`, or `-em-default` locale
extensions. However, these subtags and extensions are primarily
intended to control which presentation form is preferred by the
application, and must not be relied on for the purpose of identifying
emoji.


## Terminology ##

A codepoint is considered an **emoji** only if it has the `Emoji`
property in the Unicode Character Database (<abbr>UCD</abbr>). Although many
codepoints that have this property are pictographic in nature, some
codepoints that are pictographic do not have the `Emoji` property
(such as most chess, playing-card, and game-piece symbols), and some
codepoints that do have the `Emoji` property show typographic
characters rather than pictographic images.

All emoji codepoints — as well as several non-emoji codepoints — have
the `Extended_Pictographic` property. When a non-emoji codepoint has
the `Extended_Pictographic` property, this indicates that future
revisions of Unicode may incorporate the codepoint in a valid emoji
sequence, or may (for a currently-unassigned codepoint) assign an
emoji character to the codepoint.

The emoji codepoints also include two distinct sets of alphanumeric
character codepoints that are used to implement specific substitution
sequences.

The **regional indicator** set includes the 26 lower-case
Basic Latin letters (<samp>"a"</samp> to <samp>"z"</samp>), which are used to support the
predefined set of regional flags. The regional indicator set is found
within the Enclosed Alphanumeric Supplement block of Unicode.

The **tag character** set includes codepoints that correspond to the
printable characters in the ASCII set, as well as an <samp>"End"</samp> control
tag. The tag characters are used to support a more general mechanism
for local and sub-national flags that are not covered by the
predefined regional-indicator flag set. The tag characters set is
found within the Tags block of Unicode.

**Presentation style** describes whether an emoji codepoint is
shown in emoji style (for example, with a full-color bitmap or <abbr title="Scalable Vector Graphics">SVG</abbr>
glyph) or text style (such as a monochrome, Bézier glyph). Every emoji
codepoint defaults to either emoji-style or text-style
presentation.

An emoji codepoint might be followed by a **presentation selector**.
This selector requests that either emoji-style or text-style be used
for the preceding emoji codepoint, potentially overriding that
codepoint's default. There are two presentation selectors:

  - `Variation Selector 15` (VS15, `U+FE0E`) requests text
    presentation style.
  - `Variation Selector 16` (VS16, `U+FE0F`) requests emoji
    presentation style.

:::{figure-md}
![Text presentation-style selector](/images/emoji/text-presentation.png "Text presentation-style selector")

Text presentation-style selector
:::


:::{figure-md}
![Emoji presentation-style selector](/images/emoji/emoji-presentation.png "Emoji presentation-style selector"){title="Testing"}

Emoji presentation-style selector
:::


An emoji codepoint might also be followed by an emoji
**modifier**. This modifier requests an alternate version of the emoji
glyph. Currently, there are five emoji modifiers defined, all of which
are assigned to a skin-tone designation from the Fitzpatrick scale:

  - `U+1F3FB` "Light skin tone"
  - `U+1F3FC` "Medium-light skin tone"
  - `U+1F3FD` "Medium skin tone"
  - `U+1F3FE` "Medium-dark skin tone"
  - `U+1F3FF` "Dark skin tone"


Emoji **sequences** consist of one or more emoji codepoints,
optionally followed by presentation selectors, modifiers, or other
special characters. A font can implement custom ligatures for any
sequence of emoji. However, Unicode also designates specific sequences
that should be supported. These sequences can involve three special
non-printing codepoints in addition to the selectors and modifiers
mentioned above:

  - The Combining Enclosing Keycap (<abbr>CEK</abbr>, `U+20E3`) is used to form
    **keycap** sequences corresponding to telephone keypad keys.

  - The Cancel Tag (`U+E007F`) is used to form tag-based flag
    sequences.

  - The Zero-Width Joiner (<abbr>ZWJ</abbr>, `U+200D`) is used to form emoji
    sequences for multi-person groups, gendered forms, hair-color
    variants, and directionality.


## Normalization ##

Emoji sequences are not generally affected by Unicode or OpenType
normalization. However, Unicode does specify an order to be used when
representing <abbr title="Zero-Width Joiner">ZWJ</abbr>-using emoji sequences.

The correct order should be:

    Base emoji codepoint
	Emoji modifier OR Emoji presentation selector
	Hair subsequence
	Color subsequence
	Gender-sign or object subsequence
	Directionality indicator


Although this ordering is not designated a Unicode normalization form,
shaping engine implementers may find it a useful target if attempting
to correct invalid mis-ordered emoji <abbr title="Zero-Width Joiner">ZWJ</abbr> sequences.

Shaping engines should also note that the `Emoji` and
`Extended_Pictographic` properties may require tracking in any Unicode
normalization routines.

The `Emoji` property of a codepoint can be unintentionally lost when
certain string transformations are performed. For example, the
upper-case versions of the Circled Latin Letters have the `Emoji`
property, but the lower-case version of the Circled Latin do
not. Therefore, a case-transformation rule must take care not to
unintentionally break the desired output by losing the property.

The `Extended_Pictographic` property of a codepoint should be tracked
because it is set on several non-emoji codepoints that may be updated
to have the `Emoji` property in a future release of Unicode.


## Bidirectionality ##

Most emoji sequences are defined to be neutral for the purpose of
bidirectionality segmenting and handling.

However, the Regional Indicator flag sequences are defined to be
left-to-right only, overriding any levels of bidirectional embedding.


## Sequence identification ##

There are six varieties of emoji sequence defined by Unicode:

1. Presentation sequences
2. Modifier sequences
3. Regional Indicator flag sequences
4. Tag flag sequences
5. Keycap sequences
6. Zero-width joiner (<abbr>ZWJ</abbr>) sequences

> Note: The <abbr title="Zero-Width Joiner">ZWJ</abbr> sequence variety incorporates several subsets, but all
> of the <abbr title="Zero-Width Joiner">ZWJ</abbr> sequences are implemented using the same mechanism.

The set of sequences includes various mechanisms defined at different
times by either Unicode itself or by legacy encoding standards. In
some cases, an older mechanism (such as the Regional Indicator
mechanism used for national flags) has been superseded by a newer,
more flexible mechanism intended to permit emoji vendors to provide
support for a large set of new representations or emoji variants
without requiring Unicode to define new codepoints for every possible
permutation. Nevertheless, shaping-engine implementers should expect
to encounter any or all of the defined sequences.

This set includes the major categories of sequences that shaping
engines are likely to encounter and that can convey important
contextual information to users. Note, however, that fonts may
implement additional sequences via ligature substitution or other
existing mechanisms.

Each of the six sequence varieties can also be interpreted as a
different module of overall "emoji sequence support" for a
shaping-engine implementation. For example, support for Regional
Indicator flag sequences is distinct from support for Keycap
sequences. For convenience, in this document, the sequence varieties
are listed in an order that roughly approximates their complexity, but
this ordering is not definitive.

Sequences should be identified by examining the run and matching
characters, based on their categorization, using regular expressions. 

The following general-purpose identification classes can be used to
match emoji sequences in regular expressions.

```markdown
_emoji_             = `EMOJI`
_modifier_          = "U+1F3FB" | "U+1F3FC" | "U+1F3FD" | "U+1F3FE" | "U+1F3FF"
_presentation_      = `VS15` | `VS16`
_zwj_               = `ZWJ`
_cek_               = `CEK`
_blackflag_         = "U+1F3F4"
_key_               = "#" | "*" | ["0".."9"]
_color_             = "U+2B1B" | "U+2B1C" | "U+1F7E5" | "U+1F7E6" | "U+1F7E7" | "U+1F7E8" | "U+1F7E9" | "U+1F7EA" | "U+1F7EB"
_multipersongroup_ = "U+1F91D" | "U+1F46F" | "U+1F93C" | "U+1F46B" | "U+1F46C" | "U+1F46D" | "U+1F48F" | "U+1F491" | "U+1F46A"
_gendersign_        = "U+2640" | "U+2642"
_genderperson_      = "U+1F468" | "U+1F469" | "U+1F9D1"
_hairstyle_         = "U+1F9B0" | "U+1F9B1" | "U+1F9B2" | "U+1F9B3"
_direction_         = "U+2B05" | "U+27A1"
_regionalindicator_ = ["U+1F1E6".."U+1F1FF"]
_tagchar_           = `TAG_CHARACTER`
_endtag_            = "U+E007F"
```
<!---

_adult_               = `U+1F468` | `U+1F469`
_child_               = `U+1F466` | `U+1F467`
_familymember_              = _adult_ | _child_

_family_    = _adult_{1,2} _child_{0,2}

 - this doesn't work; perhaps need to redefine the above as "genderperson" and 
   the OLD genderperson as something different. Coherent haming is going 
   to be a challenge on that front.
--->

The expressions below use state-machine syntax from the Ragel
state-machine compiler. The operators represent:

```markdown
a* = zero or more copies of a
b+ = one or more copies of b
c? = optional instance of c
d{n} = exactly n copies of d
d{,n} = zero to n copies of d
d{n,} = n or more copies of d
d{n,m} = n to m copies of d
!e = not e
^f = character-level not f
g.h = concatenation of g and h
i|j = i or j
( ) = grouping of expression elements
```


### Presentation sequences ###

A presentation sequence is used to request a specific presentation
style ("text" or "emoji"), potentially overriding the default
presentation style defined for the codepoint by Unicode.

:::{figure-md}
![Requesting emoji presentation style](/images/emoji/emoji-pres-sequence.png "Requesting emoji presentation style")

Requesting emoji presentation style
:::


:::{figure-md}
![Requesting text presentation style](/images/emoji/text-pres-sequence.png "Requesting text presentation style")

Requesting text presentation style
:::


The active emoji font, however, might not contain a glyph for the
presentation style requested in the sequence. In particular, it is not
common for emoji fonts to include text-presentation glyphs for
codepoints that default to the emoji-presentation style.

In these instances, the text rendering stack should select a fallback
font that does contain the glyph requested by the presentation
sequence. Strategies for identifying appropriate fallback fonts are
beyond the scope of this document.

A standalone presentation sequence must match:

```markdown
_emoji_ _presentation_
```

Although standalone presentation sequences can occur, note that
presentation sequences also occur within longer emoji sequences.


### Modifier sequences ###

A modifier sequence is used to request an alternate glyph for an emoji
codepoint. 

Currently, there are five emoji-modifier codepoints defined by
Unicode. Each corresponds to a different human skin-tone based on the
Fitzpatrick scale.

:::{figure-md}
![Fitzpatrick 2](/images/emoji/fitzpatrick-2.png "Fitzpatrick 2")

Modifier for Fitzpatrick scale skin-tone 2
:::

:::{figure-md}
![Fitzpatrick 3](/images/emoji/fitzpatrick-3.png "Fitzpatrick 3")

Modifier for Fitzpatrick scale skin-tone 3
:::

:::{figure-md}
![Fitzpatrick 4](/images/emoji/fitzpatrick-4.png "Fitzpatrick 4")

Modifier for Fitzpatrick scale skin-tone 4
:::

:::{figure-md}
![Fitzpatrick 5](/images/emoji/fitzpatrick-5.png "Fitzpatrick 5")

Modifier for Fitzpatrick scale skin-tone 5
:::

:::{figure-md}
![Fitzpatrick 6](/images/emoji/fitzpatrick-6.png "Fitzpatrick 6")

Modifier for Fitzpatrick scale skin-tone 6
:::


A modifier sequence must match:

```markdown
_emojimodified_ = _emoji_ _modifier_
```

Fonts are expected to implement modifier sequences for emoji
codepoints that depict a single human being, and are expected not to
implement modifier sequences for other emoji codepoints.

> Note: Most emoji sequences that depict multiple human beings are
> modified using the <abbr title="Zero-Width Joiner">ZWJ</abbr> mechanisms described later, and not via this
> mechanism.
> 
> However, there are a small number of codepoints that depict groups
> of human beings in a standalone codepoint and can be modified with a
> single modifier. They are summarized in the table at the [feature
> interaction in sequences](#feature-interaction-in-sequences)
> section.
>
> Note, also that there are emoji codepoints depicting beings that are
> ambiguous in regard to their humanity, such as `U+1F9DB`,
> "Vampire". Shaping engines should not assume that these codepoints
> are unable to support a modifier.

:::{figure-md}
![Modifier sequence](/images/emoji/modifier-sequence.png "Modifier sequence")

Modifier sequence
:::


The fallback for a modifier sequence is the generic, unmodified
emoji followed by an emoji representing the skin-tone requested.

:::{figure-md}
![Modifier sequence fallback](/images/emoji/modifier-sequence-fallback.png "Modifier sequence fallback")

Modifier sequence fallback
:::


Modifier sequences use emoji presentation style by default, and cannot
include a presentation selector. However, an implementation may choose
to display text-presentation versions of sequences if emoji
presentation style is not possible in the environment.

Although standalone modifier sequences occur, note that modifier
sequences can also occur within longer emoji sequences.


### Regional Indicator flag sequences ###

A Regional Indicator flag sequence is used to request a flag
emoji. All Regional Indicator flag sequences are two codepoints long,
using codepoints from the `REGIONAL_INDICATOR` alphabetical set.

A Regional Indicator flag sequence must match:

```markdown
_regionalindicator_ _regionalindicator_
```

In addition, the only two-codepoint sequences that are considered
valid Regional Indicator flag sequences are those that correspond to
the `unicode_region_subtag` field in the <abbr title="Common Locale Data Repository">CLDR</abbr> database.

The typical emoji implementation of such a sequence in an image of a
flag for the region. However, emoji fonts may choose to represent the
region through some other visual means (for example, a regional symbol
or map image). Similarly, where there is more than one possible flag
for a region, Unicode does not specify any particular visual
representation.

Some historical region subtags have been designated as deprecated (for
example, <samp>"East Germany"</samp> and <samp>"West Germany"</samp>). Emoji fonts are not
expected to support these deprecated subtags. However, if they
encountered in a text run and are supported in the active font,
shaping engines should deal with the situation gracefully, without
offering guarantees of support.

:::{figure-md}
![Regional Indicator flag sequence](/images/emoji/regional-indicator-flag-sequence-un.png "Regional Indicator flag sequence")

Regional Indicator flag sequence
:::


Regional Indicator flag sequences use emoji presentation by default,
and cannot include a presentation selector.  However, an
implementation may choose to display text-presentation versions of
sequences if emoji presentation style is not possible in the
environment.

:::{figure-md}
![Regional Indicator flag sequence fallback](/images/emoji/regional-indicator-flag-sequence-un-fallback.png "Regional Indicator flag sequence fallback")

Regional Indicator flag sequence fallback
:::


Regional Indicator flag sequences only occur in standalone form.

> Note: The Regional Indicator flag sequences are defined to always be
> interpreted left-to-right (<abbr>LTR</abbr>) for the purpose of
> bidirectionality. This behavior differs from that of other emoji
> sequences, which are neutral in regard to bidirectionality.
>
> For example, a Regional Indicator sequence <samp>"RI_U, RI_A"</samp> should result
> in a flag for Ukraine (<samp>"UA"</samp>), even if it occurs within a run of
> right-to-left text. Reversing the sequence to result in a flag for
> Australia (<samp>"AU"</samp>) is incorrect.


### Tag flag sequences ###

A Tag flag sequence is used to request a flag emoji for any flag not
defined by the Regional Indicator flag sequence mechanism.

A Tag flag sequence must match:

```markdown
_blackflag_ _tagchar_+ _endtag_
```

The codepoints in the `TAG_CHARACTER` set come from the "Tags" block
in Unicode. At present, the set of allowable tags is defined as the
range `[U+E0020..U+E007E]`, which includes tags for space, upper- and
lower-case basic Latin alphabetic letters, numerals, and several
symbols. However, Unicode also notes that the upper-case alphabetic
tags are not currently used.

Tag sequences must end with <samp>"Cancel Tag"</samp> (`U+E007F`).

> Note: Despite the official name "Cancel Tag", this codepoint
> terminates valid tag sequences, rather than negating them.

:::{figure-md}
![Tag flag sequence](/images/emoji/tag-flag-sequence-wales.png "Tag flag sequence")

Tag flag sequence
:::


Tag flag sequences only occur in standalone form.


### Keycap sequences ###

A Keycap sequence is used to request an emoji that depicts a
telephone-keypad button.

A Keycap sequence must match:

```markdown
_key_ _presentation_ _cek_
```

:::{figure-md}
![Keycap sequence](/images/emoji/keycap-sequence.png "Keycap sequence")

Keycap sequence
:::


Keycap sequences only occur in standalone form.


### <abbr>ZWJ</abbr> sequences ###

A Zero-Width Joiner (<abbr>ZWJ</abbr>) sequence can be used to request specific
variants of an emoji glyph or to request the combined form of a
sequence of emoji glyphs.

Because the <abbr title="Zero-Width Joiner">ZWJ</abbr> codepoint itself is invisible, users will expect <abbr title="Zero-Width Joiner">ZWJ</abbr>
sequences to fall back gracefully as sequences of standalone emoji
glyphs that convey the original meaning. For example, a <abbr title="Zero-Width Joiner">ZWJ</abbr>
multi-person group sequence would be rendered as a single multi-person
emoji glyph if one is available in the active font, but would fall
back to a set of individual-person emoji glyphs.


#### <abbr>ZWJ</abbr> hair sequences ####

A <abbr title="Zero-Width Joiner">ZWJ</abbr> hair sequence is used to request a specific hairstyle version of
an emoji codepoint that depicts a single human being.

:::{figure-md}
![ZWJ hairstyle sequence](/images/emoji/hairstyle-sequence.png "ZWJ hairstyle sequence")

ZWJ hairstyle sequence
:::


A <abbr title="Zero-Width Joiner">ZWJ</abbr> hair sequence must match:

```markdown
(_emoji_ | _emojimodified_) _zwj_ _hairstyle_
```

Currently, four hairstyle modifier codepoints are defined:

  - `U+1F9B0` "Red or ginger hair"
  - `U+1F9B1` "Curly hair"
  - `U+1F9B2` "Bald"
  - `U+1F9B3` "White hair"

The set of hairstyle sequences allowed has been chosen to enable
depictions of distinct properties not easily represented by the
defaults of the fallback glyphs. By default, the hairstyle and color
on fallback emoji is expected to be nondescript and dark.

Prior to the adoption of the <abbr title="Zero-Width Joiner">ZWJ</abbr>-hair-sequence mechanism, a codepoint
specifying "person with blond hair" (`U+1F471`) already existed;
therefore "blond" was not included in the set of supported hairstyle
versions.


#### <abbr>ZWJ</abbr> gendered person sequences ####

A <abbr title="Zero-Width Joiner">ZWJ</abbr> gendered person sequence is used to request a specific-gendered
version of an emoji codepoint that depicts a single human being.

Each <abbr title="Zero-Width Joiner">ZWJ</abbr> gendered person sequence is composed of an emoji that depicts
a human by default, followed by <samp>"ZWJ"</samp>, followed by a gender symbol,
followed by <samp>"VS16"</samp>.

:::{figure-md}
![ZWJ gendered person sequence](/images/emoji/gendered-person-sequence.png "ZWJ gendered person sequence")

ZWJ gendered person sequence
:::


The fallback for a <abbr title="Zero-Width Joiner">ZWJ</abbr> gendered person sequence is a generic "person"
emoji followed by a gender symbol.

:::{figure-md}
![ZWJ gendered person sequence fallback](/images/emoji/gendered-person-sequence-fallback.png "ZWJ gendered person sequence fallback")

ZWJ gendered person sequence fallback
:::


A <abbr title="Zero-Width Joiner">ZWJ</abbr> gendered person sequence must match:

```markdown
(_emoji_ | _emojimodified_) _zwj_ _gendersign_ _VS16_
```

A small number of emoji codepoints are defined to show a single human
being with a fixed gender. These codepoints cannot have their apparent
gender modified using the <abbr title="Zero-Width Joiner">ZWJ</abbr> gendered person mechanism.

Currently, this list of codepoints includes those in the table below:

:::{table} Single-human emoji codepoints that do not support gendered-person modifiers

| Emoji codepoint                | Gender |
|:-------------------------------|:-------|
| `U+1F467` Girl                 | Female |
| `U+1F467` Boy                  | Male   |
| `U+1F467` Woman                | Female |
| `U+1F467` Man                  | Male   |
| `U+1F467` Older woman          | Female |
| `U+1F467` Older man            | Male   |
| `U+1F467` Mrs Claus            | Female |
| `U+1F467` Santa Claus          | Male   |
| `U+1F467` Princess             | Female |
| `U+1F467` Prince               | Male   |
| `U+1F467` Woman dancing        | Female |
| `U+1F467` Man dancing          | Male   |
| `U+1F467` Pregnant woman       | Female |
| `U+1F467` Breastfeeding        | Female |
| `U+1F467` Woman with headscarf | Female |
:::


However, the list may be updated in subsequent revisions of Unicode.

In addition, emoji codepoints that depict groups of two or more human
beings are handled by other mechanisms, such as the <abbr title="Zero-Width Joiner">ZWJ</abbr> multi-person
group mechanism, and are documented in the corresponding section.

> Note: The <abbr title="Zero-Width Joiner">ZWJ</abbr> gendered person sequence is not to be confused with
> the <abbr title="Zero-Width Joiner">ZWJ</abbr> role sequence.
>
> In effect, both sequence types can be used to depict a human being
> performing a task or activity, and can be used to request a specific
> gender for the human being depicted.
>
> However, all of the codepoints covered by the <abbr title="Zero-Width Joiner">ZWJ</abbr> gendered person
> sequences are emoji that show a human being by default, whereas the
> codepoints covered by the <abbr title="Zero-Width Joiner">ZWJ</abbr> role sequences begin with a generic
> human-being emoji and append a symbol or object emoji.


#### <abbr>ZWJ</abbr> multi-person group sequences ####

A <abbr title="Zero-Width Joiner">ZWJ</abbr> multi-person group sequence is used to request a multi-person
emoji glyph. The fallback for a <abbr title="Zero-Width Joiner">ZWJ</abbr> multi-person group sequence is a
sequence of individual-person emoji glyphs.

A <abbr title="Zero-Width Joiner">ZWJ</abbr> multi-person group sequence must match:

```markdown
(_emoji_ | _emojimodified_) (_zwj_ (_emoji_ | _emojimodified_) _presentation_? ){1,3}
```

Only a fixed number of such multi-person group sequences is
defined. Some of the sequences make use of specific codepoints (such
as <samp>"Heavy Black Heart"</samp> or <samp>"Kiss Mark"</samp>).

The currently supported configurations for multi-person group emoji
sequences are:
  
  - Couple with heart
  - Couple in kiss
  - Couple holding hands 
  - Family 
  - Shaking hands


A potential source of confusion for these sequences is that some of
them appear to duplicate the content of an existing emoji codepoint,
but the existing emoji codepoint is typically not involved in forming
the corresponding <abbr title="Zero-Width Joiner">ZWJ</abbr> multi-person group sequence.

Specifically, there are standalone emoji codepoints for "Kiss"
(`U+1F48F`), two people holding hands (in three permutations:
`U+1F46B`, `U+1F46C`, and `U+1F46D`), "Family" (`U+1F46A`), and
"Handshake" (`U+1F91D`). The details of each of these codepoints in
relation to the corresponding conceptually-similar <abbr title="Zero-Width Joiner">ZWJ</abbr> multi-person
group sequence are noted below.

Each of the specific <abbr title="Zero-Width Joiner">ZWJ</abbr> multi-person group sequences has a precise
definition.

The "Couple with heart" sequence is composed of
<samp>"Person,ZWJ,Heavy_Black_Heart,VS16,ZWJ,Person"</samp>, and must match:
```markdown
(_emoji_ | _emojimodified_) _zwj_ `U+2764` _VS16_ _zwj_ (_emoji_ | _emojimodified_)
```

:::{figure-md}
![ZWJ multi-person heart sequence](/images/emoji/multi-person-heart-sequence.png "ZWJ multi-person heart sequence")

ZWJ multi-person heart sequence
:::


:::{figure-md}
![ZWJ multi-person heart sequence with modifier](/images/emoji/multi-person-heart-skintone-sequence.png "ZWJ multi-person heart sequence with modifier")

ZWJ multi-person heart sequence with modifier
:::


The "Couple in kiss" sequence is composed of
<samp>"Person,ZWJ,Heavy_Black_Heart,VS16,ZWJ,Kiss_Mark,ZWJ,Person"</samp>, and must match:
```markdown
(_emoji_ | _emojimodified_) _zwj_ `U+2764` _VS16_ _zwj_ `U+1F48B` _zwj_ (_emoji_ | _emojimodified_)
```

:::{figure-md}
![ZWJ multi-person kiss sequence](/images/emoji/multi-person-kiss-sequence.png "ZWJ multi-person kiss sequence")

ZWJ multi-person kiss sequence
:::


:::{figure-md}
![ZWJ multi-person kiss sequence with modifier](/images/emoji/multi-person-kiss-skintone-sequence.png "ZWJ multi-person kiss sequence with modifier")

ZWJ multi-person kiss sequence with modifier
:::


> Note: the kiss <abbr title="Zero-Width Joiner">ZWJ</abbr> sequence does not involve the "Kiss" codepoint
> (`U+1F48F`). 


The "Couple holding hands" sequence is composed of
<samp>"Person,ZWJ,Handshake,ZWJ,Person"</samp>, and must match:
```markdown
(_emoji_ | _emojimodified_) _zwj_ `U+1F91D` _zwj_ (_emoji_ | _emojimodified_)
```

:::{figure-md}
![ZWJ multi-person holding-hands sequence](/images/emoji/multi-person-holding-hands-sequence.png "ZWJ multi-person holding-hands sequence")

ZWJ multi-person holding-hands sequence
:::


:::{figure-md}
![ZWJ multi-person holding-hands sequence with modifier](/images/emoji/multi-person-holding-hands-skintone-sequence.png "ZWJ multi-person holding-hands sequence with modifier")

ZWJ multi-person holding-hands sequence with modifier
:::


> Note: the couple-holding-hands <abbr title="Zero-Width Joiner">ZWJ</abbr> sequence does not involve any of
> the "Man and woman holding hands" (`U+1F46B`), "Two men holding
> hands" (`U+1F46C`), or "Two women holding hands" (`U+1F46D`)
> codepoints. 


The "Family" sequence is composed of two-to-four individual <samp>"Person"</samp>
subsequences, each separated by a <abbr title="Zero-Width Joiner">ZWJ</abbr>. Furthermore, the <samp>"Person"</samp>
subsequences must be sorted so that all adult subsequences precede all
child subsequences. A <samp>"Family"</samp> subsequence must match:
```markdown
(_emoji_ | _emojimodified_) (_zwj_ (_emoji_ | _emojimodified_) ){1,3}
```

> Note: The <abbr title="Zero-Width Joiner">ZWJ</abbr> "Family" sequence is defined to support modifiers on
> each individual human-codepoint component of the sequence, but these
> modified "Family" sequences are not currently included in the
> Recommended For General Interchange (<abbr>RGI</abbr>) emoji set, due to the
> number of permutations that would be added to the <abbr title="Recommended for General Interchange">RGI</abbr> set as a result.

:::{figure-md}
![ZWJ multi-person family man, boy sequence](/images/emoji/multi-person-family-man-boy-sequence.png "ZWJ multi-person family man, boy sequence")

ZWJ multi-person family sequence "man, boy"
:::

:::{figure-md}
![ZWJ multi-person family man, girl, girl sequence](/images/emoji/multi-person-family-man-girl-girl-sequence.png "ZWJ multi-person family man, girl, girl sequence")

ZWJ multi-person family sequence "man, girl, girl"
:::

:::{figure-md}
![ZWJ multi-person family man, woman, girl sequence](/images/emoji/multi-person-family-man-woman-girl-sequence.png "ZWJ multi-person family man, woman, girl sequence")

ZWJ multi-person family sequence "man, woman, girl"
:::

:::{figure-md}
![ZWJ multi-person family woman, woman, girl, boy sequence](/images/emoji/multi-person-family-woman-woman-girl-boy-sequence.png "ZWJ multi-person family woman, woman, girl, boy sequence")

ZWJ multi-person family sequence "woman, woman, girl, boy"
:::


> Note: the family <abbr title="Zero-Width Joiner">ZWJ</abbr> sequence does not involve the "Family"
> codepoint (`U+1F46A`).

The "Shaking hands" sequence is composed of two <samp>"Hand"</samp> subsequences
separated by a <abbr title="Zero-Width Joiner">ZWJ</abbr>, and must match:
```markdown
`U+1FAF1` _modifier_ _zwj_ `U+1FAF2` _modifier_
```

:::{figure-md}
![ZWJ multi-person shaking-hands sequence](/images/emoji/multi-person-shaking-hands-sequence.png "ZWJ multi-person shaking-hands sequence")

ZWJ multi-person shaking-hands sequence
:::


> Note: the <abbr title="Zero-Width Joiner">ZWJ</abbr> "Shaking hands" sequence does not involve the "Handshake"
> codepoint (`U+1F91D`), although the "Handshake" codepoint itself can
> be followed by a single _modifier_ codepoint that, for legacy
> reasons, serves to alter the skin tone of both of the hands depicted
> in the handshake.
>
> However, the "Handshake" codepoint _is_ utilized in the multi-person
> group sequence for "Couple holding hands".


#### <abbr>ZWJ</abbr> role sequences ####

A <abbr title="Zero-Width Joiner">ZWJ</abbr> role (or profession) sequence is used to request an emoji
depicting a human being performing a task or job. Role sequences are
composed of a codepoint representing a human, followed by <samp>"ZWJ"</samp>,
followed by an emoji depicting an object or symbol that references the
desired profession or role.

:::{figure-md}
![ZWJ role sequence firefighter](/images/emoji/role-sequence-firefighter.png "ZWJ role sequence 'firefighter'")

ZWJ role sequence 'firefighter'
:::


Optionally, the sequence can be further updated by requesting a
skin-tone modifier appended to the `_genderperson_` element.

:::{figure-md}
![ZWJ role sequence firefighter with modifier](/images/emoji/role-sequence-firefighter-skintone-6.png "ZWJ role sequence 'firefighter' with modifier")

ZWJ role sequence 'firefighter' with modifier
:::


In some cases, the object or symbol depicted by the standalone emoji
will not be shown in the substituted emoji resulting from the
sequence. For example, the "factory" codepoint (`U+1F3ED`) depicts a
building in its standalone emoji, but the "factory worker" sequence
depicts a human being outfitted for factory work, rather than
depicting a combination of the human being and the factory building.

In addition, some of the supported "role" codepoints do not use emoji
presentation by default; for those codepoints, the emoji will be
followed by a presentation selector.

:::{figure-md}
![ZWJ role sequence pilot](/images/emoji/role-sequence-pilot.png "ZWJ role sequence 'pilot'")

ZWJ role sequence 'pilot'
:::


:::{figure-md}
![ZWJ role sequence pilot with modifier](/images/emoji/role-sequence-pilot-skintone-2.png "ZWJ role sequence 'pilot' with modifier")

ZWJ role sequence 'pilot' with modifier
:::


The fallback for a <abbr title="Zero-Width Joiner">ZWJ</abbr> role sequence is a generic "person" emoji
followed by the emoji symbolizing the task or job.

A <abbr title="Zero-Width Joiner">ZWJ</abbr> role sequence must match:

```markdown
_genderperson_ _modifier_? _zwj_ _emoji_ _presentation_?
```

> Note: The <abbr title="Zero-Width Joiner">ZWJ</abbr> role sequence is not to be confused with the <abbr title="Zero-Width Joiner">ZWJ</abbr>
> gendered person sequence.
>
> In effect, both sequence types can be used to depict a human being
> performing a task or activity, and can be used to request a specific
> gender for the human being depicted.
>
> However, all of the codepoints covered by the <abbr title="Zero-Width Joiner">ZWJ</abbr> gendered person
> sequences are emoji that show a human being by default, whereas the
> codepoints covered by the <abbr title="Zero-Width Joiner">ZWJ</abbr> role sequences begin with a generic
> human-being emoji and append a symbol or object emoji.


#### <abbr>ZWJ</abbr> color sequences ####

A <abbr title="Zero-Width Joiner">ZWJ</abbr> color sequence is used to request a version of an emoji
codepoint depicting the base object in a specific color.

A <abbr title="Zero-Width Joiner">ZWJ</abbr> color sequence must match:

```markdown
_emoji_ _zwj_ _color_ _presentation_
```

Currently, nine codepoints are defined, each of which (in isolation)
depicts a large square filled with the color in question.

  - `U+2B1B` - "Black Large Square"
  - `U+2B1C` - "White Large Square"
  - `U+1F7E5` - "Large Red Square"
  - `U+1F7E6` - "Large Blue Square"
  - `U+1F7E7` - "Large Orange Square"
  - `U+1F7E8` - "Large Yellow Square"
  - `U+1F7E9` - "Large Green Square"
  - `U+1F7EA` - "Large Purple Square"
  - `U+1F7EB ` - "Large Brown Square"


:::{figure-md}
![ZWJ color sequence](/images/emoji/color-sequence.png "ZWJ color sequence")

ZWJ color sequence
:::


The fallback for a <abbr title="Zero-Width Joiner">ZWJ</abbr> color sequence is the default emoji followed by
the default emoji for the color codepoint (that is, the color square).


#### <abbr>ZWJ</abbr> directionality sequences ####

A <abbr title="Zero-Width Joiner">ZWJ</abbr> directionality sequence is used to request a version of an emoji
codepoint facing a specific cardinal direction.


:::{figure-md}
![ZWJ directionality sequence](/images/emoji/zwj-directionality-sequence.png "ZWJ directionality sequence")

ZWJ directionality sequence
:::


A <abbr title="Zero-Width Joiner">ZWJ</abbr> directionality sequence must match:

```markdown
_emoji_ _zwj_ _direction_ _presentation_
```

#### <abbr>ZWJ</abbr> additional sequences ####

In addition to the above <abbr title="Zero-Width Joiner">ZWJ</abbr> sequence categories, there are 13
standalone, but uncategorized, <abbr title="Zero-Width Joiner">ZWJ</abbr> sequences defined in Unicode.

  - "Heart on fire"
  - "Mending heart"
  - "transgender flag"
  - "Rainbow flag"
  - "Pirate flag"
  - "Service dog"
  - "Polar bear"
  - "Eye in speech bubble"
  - "Face exhaling"
  - "Face with spiral eyes"
  - "Face in clouds"
  - "Mx Claus"
  - "Black cat"


:::{figure-md}
![ZWJ heart-on-fire sequence](/images/emoji/zwj-sequence-heart-on-fire.png "ZWJ heart-on-fire sequence")

ZWJ heart-on-fire sequence
:::


These sequences currently match:

```markdown
_emoji_ _presentation_? _zwj_ _emoji_ _presentation_?
```

Note that the "Black cat" sequence, although it appears on this list
of additional <abbr title="Zero-Width Joiner">ZWJ</abbr> sequences, has subsequently been generalized to the
[ZWJ color sequence mechanism](#zwj-color-sequences).


### Other sequences and ligatures ###

Emoji fonts may include support for additional variants and
sequences. For example, an emoji font might implement support for
"keycap"-style emoji for alphabetical characters in addition to the
numbers and symbols defined above.

Emoji fonts may also include many-to-one emoji substitutions that do
not fit into any of the above sequence varieties and, instead, behave
more like ligatures. For example, the sequence <samp>"Ice Cream, Banana"</samp>
might be substituted with a "banana split" emoji.

Any such substitutions are included by the emoji font vendor at their
own discretion, with the understanding that fallback behavior is
unpredictable.

In all such cases, the shaping engine can make a best-effort attempt
to support the sequences, but is not obligated to provide any
guarantees as to their correctness.


## Feature interaction in sequences ##

As is noted in the descriptions of <abbr title="Zero-Width Joiner">ZWJ</abbr> gendered-person sequences and
<abbr title="Zero-Width Joiner">ZWJ</abbr> multi-person group sequences, there is potential for confusion
wherever standalone emoji codepoints and emoji sequences overlap in
meaning.

This potential for confusion is compounded by the fact that the
skin-tone modifier mechanism and the <abbr title="Zero-Width Joiner">ZWJ</abbr> gendered person mechanism
interact differently with standalone emoji codepoints and with emoji
sequences.

In particular, for several of the standalone emoji codepoints, a
single skin-tone modifier is permitted, which is defined to modify both
of the human beings depicted in the emoji. For other standalone emoji
codepoints, only a single gender designator <abbr title="Zero-Width Joiner">ZWJ</abbr> gendered-person
subsequence is allowed to be appended to the codepoint, and the gender
designator is defined to modify both of the human beings depicted in
the emoji.

The permitted combinations are summarized in the following table:


:::{table} Defined interactions between skin-tone–modifiers and gender designators

| Type       | Emoji                                   | Skin-tone-modifier | Gender depicted     |
|:-----------|:----------------------------------------|:-------------------|:--------------------|
| Standalone | "Handshake" `U+1F91D`                   | only one supported |    not supported    |
| Standalone | "Woman with bunny ears" `U+1F46F`       |   not supported    | only one supported  |
| Standalone | "Wrestlers" `U+1F93C`                   |   not supported    | only one supported  |
| Standalone | "Man and woman holding hands" `U+1F46B` | only one supported |    not supported    |
| Standalone | "Two men holding hands" `U+1F46C`       | only one supported |    not supported    |
| Standalone | "Two women holding hands" `U+1F46D`     | only one supported |    not supported    |
| Standalone | "Kiss" `U+1F48F`                        | only one supported |    not supported    |
| Standalone | "Couple with heart" `U+1F491`           | only one supported |    not supported    |
| Standalone | "Family" `U+1F46A`                      |   not supported    |    not supported    |
| Sequence   | "Couple with heart" ZWJ sequence        |     supported      |      supported      |
| Sequence   | "Couple in kiss" ZWJ sequence           |     supported      |      supported      |
| Sequence   | "Couple holding hands" ZWJ sequence     |     supported      |      supported      |
| Sequence   | "Family" ZWJ sequence                   |     supported      |      supported      |
| Sequence   | "Shaking hands" ZWJ sequence            |     required       |    not supported    |
:::


## Emoji sets ##

Unicode defines several lists of emoji codepoints and emoji sequences
that constitute the sequences that are expected in general text.

The "Basic emoji" set includes all individual codepoints that can be
rendered with the emoji presentation style (including those codepoints
that do not default to emoji presentation).

The "Emoji keycap sequence" set includes all possible valid Keycap
sequences.

The "<abbr title="Recommended for General Interchange">RGI</abbr> emoji modifier sequence", "<abbr title="Recommended for General Interchange">RGI</abbr> emoji flag sequence", "<abbr title="Recommended for General Interchange">RGI</abbr>
emoji tag sequence", and "<abbr title="Recommended for General Interchange">RGI</abbr> emoji <abbr title="Zero-Width Joiner">ZWJ</abbr> sequence" sets each include
only a subset of the possible valid sequences for their respective
variety of sequence. These sets are designated as "Recommended for
General Interchange" (<abbr>RGI</abbr>) to denote that they are in common usage.

Finally, the "<abbr title="Recommended for General Interchange">RGI</abbr> emoji set" includes all of the codepoints and
sequences included in the preceding sets. Presence in the <abbr title="Recommended for General Interchange">RGI</abbr> emoji
set can be tracked with the `RGI_Emoji` property in the <abbr title="Unicode Character Database">UCD</abbr>. Fonts are
not required to implement the entire <abbr title="Recommended for General Interchange">RGI</abbr> emoji set, nor any of the
other sets.


## The default shaping model ##

Emoji should be shaped using the
[default](opentype-shaping-default.md) shaping model. 

Processing a run of text in the default shaping model involves three
top-level stages:

1. Applying the basic substitution features from <abbr>GSUB</abbr>
2. Applying other substitution features from <abbr>GSUB</abbr>
3. Applying the positioning features from <abbr>GPOS</abbr>

Emoji sequences as described above will generally be implemented in
the active font as a <abbr title="Glyph Substitution table">GSUB</abbr> lookup feature. However, there are no
definitively invalid <abbr title="Glyph Substitution table">GSUB</abbr> or <abbr title="Glyph Positioning table">GPOS</abbr> features that must or must _not_ be
employed for this purpose.

Consequently, shaping engines should not assume (for example) that
emoji sequences will be implemented in any specific feature of <abbr title="Glyph Substitution table">GSUB</abbr>.

A font may also employ contextual features, such as using `locl`, that
affects the emoji glyph shown, or use <abbr title="Glyph Positioning table">GPOS</abbr> positioning for some emoji
glyphs. 


### Font substitution for presentation forms ###

Before shaping begins, the rendering engine should analyze the text
run and identify presentation forms.

A presentation sequence is used to request a specific presentation
style ("text" or "emoji") for a codepoint, potentially overriding the
default presentation style that is defined in Unicode for that
codepoint.

Because it is uncommon for a single font to include both an
emoji-presentation-style glyph and a text-presentation-style glyph for
the same codepoint, handling a presentation sequence might
require font substitution.

> Note: Strictly speaking, font substitution is not part of the
> shaping process, and the handling of missing presentation forms
> might be most easily performed during segmentation of the text
> stream into runs. However, shaping-engine implementers should be
> aware that such presentation-sequence substitutions are allowable
> and handle them gracefully.


### 1. Applying the basic substitution features from <abbr>GSUB</abbr> ###

The basic-substitution stage applies mandatory substitution features
using the rules in the font's <abbr title="Glyph Substitution table">GSUB</abbr> table. In preparation for this
stage, glyph sequences should be tagged for possible application 
of <abbr title="Glyph Substitution table">GSUB</abbr> features.

The order in which these features are applied is not canonical; they
should be applied in the order in which they appear in the <abbr title="Glyph Substitution table">GSUB</abbr> table
in the font.

	locl
	ccmp
	rlig
	

An emoji font can implement sequence support through any <abbr title="Glyph Substitution table">GSUB</abbr> feature
lookup.

Basic substitution features a common choice for emoji fonts and should
be applied at this stage. In particular, <abbr title="Glyph Substitution table">GSUB</abbr> features that are
enabled by default and <abbr title="Glyph Substitution table">GSUB</abbr> features that cannot be disabled by
application-level user interfaces are common choices in which the
active font may implement emoji substitutions.

The `locl` feature replaces default glyphs with any language-specific
variants, based on examining the language setting of the text run.

> Note: Strictly speaking, the use of localized-form substitutions is
> not part of the shaping process, but of the localization process,
> and could take place at an earlier point while handling the text
> run. However, shaping engines are expected to complete the
> application of the `locl` feature before applying the subsequent
> <abbr title="Glyph Substitution table">GSUB</abbr> substitutions in the following steps.

In other, non-emoji text runs, the `ccmp` feature allows a font to
substitute mark-and-base sequences with a pre-composed glyph including
the mark and the base, or to substitute a single glyph into an
equivalent decomposed sequence of glyphs. 

If present, these composition and decomposition substitutions must be
performed before applying any other <abbr title="Glyph Substitution table">GSUB</abbr> lookups, because
those lookups may be written to match only the `ccmp`-substituted
glyphs.

The `rlig` feature substitutes glyph sequences with mandatory
ligatures. Substitutions made by `rlig` cannot be disabled by
application-level user interfaces.

The basic substitution features play a relatively more important role
in shaping non-emoji text runs; therefore the shaping engine may
apply some of them (such as `locl`) them at an earlier stage in the
shaping process. Emoji shaping should be unaffected by this decision.


### 2. Applying typographic substitution features from <abbr>GSUB</abbr> ###

The typographic-substitution phase applies all remaining substitution
features using the rules in the font's <abbr title="Glyph Substitution table">GSUB</abbr> table. In preparation for
this stage, glyph sequences should be tagged for possible application 
of <abbr title="Glyph Substitution table">GSUB</abbr> features.

These substitutions include those features designed to provide
typographic consistency and correctness.

The order in which these features are applied is not canonical; they
should be applied in the order in which they appear in the <abbr title="Glyph Substitution table">GSUB</abbr> table
in the font.

An emoji font can implement sequence support through any <abbr title="Glyph Substitution table">GSUB</abbr> feature
lookup. This can include any other substitution feature in the <abbr title="Glyph Substitution table">GSUB</abbr>
feature table.

Support for <abbr title="Recommended for General Interchange">RGI</abbr> emoji sequences or other emoji sequences defined as
valid in Unicode may be implemented in a feature that are enabled by
default and cannot be disabled by application-level user interfaces,
such as the `rlig` feature (for "required ligatures").

However, emoji fonts may also include support for emoji sequences in
<abbr title="Glyph Substitution table">GSUB</abbr> features that can be disabled by application-level user
interfaces, such as the `liga` feature (for standard ligatures). Emoji
sequences may also be implemented in features that are disabled by
default, such as the `dlig` feature (for "discretionary ligatures").

An emoji font might also implement support for emoji sequences through
the use of multiple features. For example, <abbr title="Recommended for General Interchange">RGI</abbr> emoji sequences or
other emoji sequences defined as valid in Unicode may be implemented
in `rlig`, with custom sequences implemented in `liga`.


### 3. Applying the positioning features from <abbr>GPOS</abbr> ###

The positioning stage adjusts the positions of mark and base
glyphs. In preparation for this stage, glyph sequences should be
tagged for possible application of <abbr title="Glyph Positioning table">GPOS</abbr> features.

The order in which these features are applied is not canonical; they
should be applied in the order in which they appear in the <abbr title="Glyph Substitution table">GSUB</abbr> table
in the font.

In general, all emoji glyphs in a given font are expected to be
approximately equal in height and width, and the usage of <abbr title="Glyph Positioning table">GPOS</abbr>
positioning for emoji is uncommon.

However, some emoji glyphs might be narrower or wider than average by
the nature of the image itself (for example, certain national flags
are narrower or wider than others), and there may be situations in
which the active font alters the default position of an emoji glyph to
achieve a consistent alignment, spacing, or appearance.

Therefore, shaping engines should make no assumptions about the
presence or absence of <abbr title="Glyph Positioning table">GPOS</abbr> features for emoji runs, and should apply
the features if present.

<!---

FALLBACK ??

modifiers = skintone
- fallback recommended to shown skintone patch, even though modifier
  not normally visible codepoint??

emoji sequences 
- Regional Indicators Symbols: 
  - might produce flags OR as a 'country code'
  - limited to predefined list; has some deprecated code (eg, east germany)
  - HB trickiness:
    https://github.com/harfbuzz/harfbuzz/commit/2b0ced28b685de4edbd22cf5f59be30075984dfb
	https://github.com/harfbuzz/harfbuzz/issues/2265

--->


================================================
FILE: opentype-shaping-gujarati.md
================================================
```{include} /_global.md
```

# Gujarati shaping in OpenType #

This document details the shaping procedure needed to display text
runs in the Gujarati script.


**Contents**

  - [General information](#general-information)
  - [Terminology](#terminology)
  - [Glyph classification](#glyph-classification)
      - [Shaping classes and subclasses](#shaping-classes-and-subclasses)
      - [Gujarati character tables](#gujarati-character-tables)
  - [The `<gjr2>` shaping model](#the-gjr2-shaping-model)
      - [Stage 1: Identifying syllables and other sequences](#stage-1-identifying-syllables-and-other-sequences)
      - [Stage 2: Initial reordering](#stage-2-initial-reordering)
      - [Stage 3: Applying the basic substitution features from <abbr>GSUB</abbr>](#stage-3-applying-the-basic-substitution-features-from-gsub)
      - [Stage 4: Final reordering](#stage-4-final-reordering)
      - [Stage 5: Applying all remaining substitution features from <abbr>GSUB</abbr>](#stage-5-applying-all-remaining-substitution-features-from-gsub)
      - [Stage 6: Applying remaining positioning features from <abbr>GPOS</abbr>](#stage-6-applying-remaining-positioning-features-from-gpos)
  - [The `<gujr>` shaping model](#the-gujr-shaping-model)
      - [Distinctions from `<gjr2>`](#distinctions-from-gjr2)
      - [Advice for handling fonts with `<gujr>` features only](#advice-for-handling-fonts-with-gujr-features-only)
      - [Advice for handling text runs composed in `<gujr>` format](#advice-for-handling-text-runs-composed-in-gujr-format)


## General information ##

The Gujarati script belongs to the Indic family, and follows
the same general patterns as the other Indic scripts. More
specifically, it belongs to the North Indic subgroup, in which
sequences of adjacent consonants are often represented as conjuncts.

The Gujarati script is used to write multiple languages, most commonly
Gujarati, Kutchi, and Avestan. In addition, Sanskrit may be written
in Gujarati, so Gujarati script runs may include glyphs from the Vedic
Extensions block of Unicode. 

There are two extant Gujarati script tags defined in OpenType, `<gujr>`
and `<gjr2>`. The older script tag, `<gujr>`, was deprecated in 2005.
Therefore, new fonts should be engineered to work with the `<gjr2>`
shaping model. However, if a font is encountered that supports only
`<gujr>`, the shaping engine should deal with it gracefully.

## Terminology ##

OpenType shaping uses a standard set of terms for Indic scripts.  The
terms used colloquially in any particular language may vary, however,
potentially causing confusion.

**Matra** is the standard term for a dependent vowel sign. 

**Halant** and **Virama** are both standard terms for the below-base "vowel-killer"
sign. Unicode documents use the term "virama" most frequently, while
OpenType documents use the term "halant" most frequently. 

**Chandrabindu** (or simply **Bindu**) is the standard term for the diacritical mark
indicating that the preceding vowel should be nasalized. 

The term **base consonant** is also critical to Indic shaping. The
base consonant of a syllable is the consonant that carries the
syllable's vowel sound, either the inherent vowel (for an unmarked
base consonant) or a dependent vowel (with the addition of a matra).

A syllable's base consonant is generally rendered in its full form
(although it may form ligatures), while other consonants in the
syllable frequently take on secondary forms. Different <abbr title="Glyph Substitution table">GSUB</abbr>
substitutions may apply to a script's **pre-base** and **post-base**
consonants. Some of these substitutions create **above-base** or
**below-base** forms. The **Reph** form of the consonant "Ra" is an
example.

Syllables may also begin with an **independent vowel** instead of a
consonant. In these syllables, the independent vowel is rendered in
full-letter form, not as a matra, and the independent vowel serves as the
syllable base, similar to a base consonant.

Where possible, using the standard terminology is preferred, as the
use of a language-specific term necessitates choosing one language
over all of the others that share a common script.

## Glyph classification ##

Shaping Gujarati text depends on the shaping engine correctly
classifying each glyph in the run. As with most other scripts, the
classifications must distinguish between consonants, vowels
(independent and dependent), numerals, punctuation, and various types
of diacritical mark.

For most codepoints, the `General Category` property defined in the Unicode
standard is correct, but it is not sufficient to fully capture the
expected shaping behavior (such as glyph reordering). Therefore,
Gujarati glyphs must additionally be classified by how they are treated
when shaping a run of text.

### Shaping classes and subclasses ###

The shaping classes listed in the tables that follow are defined so
that they capture the positioning rules used by Indic scripts. 

For most codepoints, the _Shaping class_ is synonymous with the `Indic
Syllabic Category` defined in Unicode. However, there are some
distinctions, where the defined category does not fully capture the
behavior of the character in the shaping process.

Several of the diacritic and syllable-modifying marks behave according
to their own rules and, thus, have a special class. These include
`BINDU`, `VISARGA`, `AVAGRAHA`, `NUKTA`, and `VIRAMA`. Some
less-common marks behave according to rules that are similar to these
common marks, and are therefore classified with the corresponding
common mark. The Vedic Extensions also include a `CANTILLATION`
class for tone marks.

Letters generally fall into the classes `CONSONANT`,
`VOWEL_INDEPENDENT`, and `VOWEL_DEPENDENT`. These classes help the
shaping engine parse and identify key positions in a syllable. For
example, Unicode categorizes dependent vowels as `Mark [Mn]`, but the
shaping engine must be able to distinguish between dependent vowels
and diacritical marks (which are categorized as `Mark [Mn]`).

Other characters, such as symbols and miscellaneous letters (for
example, letter-like symbols that only occur as standalone entities
and do not occur within syllables), need no special attention from the
shaping engine, so they are not assigned a shaping class.

Numbers are classified as `NUMBER`, even though they evoke no special
behavior from the Indic shaping rules, because there are OpenType features that
might affect how the respective glyphs are drawn, such as `tnum`,
which specifies the usage of tabular-width numerals, and `sups`, which
replaces the default glyphs with superscript variants.

Marks and dependent vowels are further labeled with a mark-placement
subclass, which indicates where the glyph will be placed with respect
to the base character to which it is attached. The actual position of
the glyphs is determined by the lookups found in the font's <abbr title="Glyph Positioning table">GPOS</abbr>
table, however, the shaping rules for Indic scripts require that the
shaping engine be able to identify marks by their general
position. 

For example, left-side dependent vowels (matras), classified
with `LEFT_POSITION`, must frequently be reordered, with the final
position determined by whether or not other letters in the syllable
have formed ligatures or combined into conjunct forms. Therefore, the
`LEFT_POSITION` subclass of the character must be tracked throughout
the shaping process.

There are four basic _mark-placement subclasses_ for dependent vowels
(matras). Each corresponds to the visual position of the matra with
respect to the syllable base to which it is attached:

  - `LEFT_POSITION` matras are positioned to the left of the syllable base.
  - `RIGHT_POSITION` matras are positioned to the right of the syllable base.
  - `TOP_POSITION` matras are positioned above the syllable base.
  - `BOTTOM_POSITION` matras are positioned below syllable base.
  
These positions may also be referred to elsewhere in shaping documents as:

  - _Pre-base_ matras
  - _Post-base_ matras
  - _Above-base_ matras
  - _Below-base_ matras
  
respectively. The `LEFT`, `RIGHT`, `TOP`, and `BOTTOM` designations
corresponds to Unicode's preferred terminology. The _Pre_, _Post_,
_Above_, and _Below_ terminology is used in the official descriptions
of OpenType <abbr title="Glyph Substitution table">GSUB</abbr> and <abbr title="Glyph Positioning table">GPOS</abbr> features. Shaping engines may, internally,
use whichever terminology is preferred.

In addition, dependent-vowel codepoints that are composed of multiple
components will be designated in character tables as having a compound
_mark-placement subclass_, such as `TOP_AND_RIGHT` or `LEFT_AND_RIGHT`. 

However, these multi-part matras are decomposed into separate matra
components during the shaping process. After the decomposition, each
matra component will belong to exactly one of the four basic
_mark-placement subclasses_.

For most mark and dependent-vowel codepoints, the _mark-placement
subclass_ is synonymous with the `Indic Positional Category` defined
in Unicode. However, there are some distinctions, where the defined
category does not fully capture the behavior of the character in the
shaping process. 

### Gujarati character tables ###

Separate character tables are provided for the Gujarati and Vedic
Extensions block as well as for other miscellaneous characters that
are used in `<gjr2>` text runs:

  - [Gujarati character table](character-tables/character-tables-gujarati.md#gujarati-character-table)
  - [Vedic Extensions character table](character-tables/character-tables-gujarati.md#vedic-extensions-character-table)
  - [Miscellaneous character table](character-tables/character-tables-gujarati.md#miscellaneous-character-table)

The tables list each codepoint along with its Unicode general
category, its shaping class, and its mark-placement subclass. The
codepoint's Unicode name and an example glyph are also provided.

For example:

:::{table} example character table

| Codepoint | Unicode category | Shaping class     | Mark-placement subclass    | Glyph                        |
|:----------|:-----------------|:------------------|:---------------------------|:-----------------------------|
|`U+0A81`   | Mark [Mn]        | BINDU             | TOP_POSITION               | &#x0A81; Candrabindu         |
| | | | |
|`U+0A95`   | Letter           | CONSONANT         | _null_                     | &#x0A95; Ka                  |
:::


Codepoints with no assigned meaning are designated as _unassigned_ in
the _Unicode category_ column.

Assigned codepoints with a _null_ in the _Shaping class_
column evoke no special behavior from the shaping engine. 

The _Mark-placement subclass_ column indicates mark-placement
positioning for codepoints in the _Mark_ category. Assigned, non-mark
codepoints have a _null_ in this column and evoke no special
mark-placement behavior. Marks tagged with [Mn] in the _Unicode
category_ column are categorized as non-spacing; marks tagged with
[Mc] are categorized as spacing-combining.

Some codepoints in the tables use a _Shaping class_ that
differs from the codepoint's Unicode _General Category_. The _Shaping
class_ takes precedence during OpenType shaping, as it captures more
specific, script-aware behavior.


#### Special-function codepoints ####

Other important characters that may be encountered when shaping runs
of Gujarati text include the dotted-circle placeholder (`U+25CC`), the
zero-width joiner (`U+200D`) and zero-width non-joiner (`U+200C`), and
the no-break space (`U+00A0`).

Each of these is of particular importance to shaping engines, because
these codepoints interact with the shaping engine, the text run, and
the active font, either to mediate non-default shaping behavior or to
relay information about the current shaping process.

The dotted-circle placeholder is frequently used when displaying a
dependent vowel (matra) or a combining mark in isolation. Real-world
text syllables may also use other characters, such as hyphens or dashes,
in a similar placeholder fashion; shaping engines should cope with
this situation gracefully.

Dotted-circle placeholder characters (like any Unicode codepoint) can
appear anywhere in text input sequences and should be rendered
normally. <abbr title="Glyph Positioning table">GPOS</abbr> positioning lookups should attach mark glyphs to dotted
circles as they would to other non-mark characters. As visible glyphs,
dotted circles can also be involved in <abbr title="Glyph Substitution table">GSUB</abbr> substitutions.

In addition to the default input-text handling process, shaping
engines may also insert dotted-circle placeholders into the text
sequence. Dotted-circle insertions are required when a non-spacing
mark or dependent sign is formed with no base character present.

This requirement covers:

  - Dependent signs that are assigned their own individual Unicode
    codepoints (such as most dependent-vowel marks or matras)
  
  - Dependent signs that are formed only by specific sequences of
    other codepoints (such as <samp>"Reph"</samp>)


The zero-width joiner (<abbr>ZWJ</abbr>) is primarily used to prevent the formation
of a conjunct from a <samp>"_Consonant_,Halant,_Consonant_"</samp> sequence.

  - The sequence <samp>"_Consonant_,Halant,ZWJ,_Consonant_"</samp> blocks the
    formation of a conjunct between the two consonants. 

Note, however, that the <samp>"_Consonant_,Halant"</samp> subsequence in the above
example may still trigger a half-forms feature. To prevent the
application of the half-forms feature in addition to preventing the
conjunct, the zero-width non-joiner (<abbr>ZWNJ</abbr>) must be used instead.

  - The sequence <samp>"_Consonant_,Halant,ZWNJ,_Consonant_"</samp> should produce
    the first consonant in its standard form, followed by an explicit
    <samp>"Halant"</samp>.

A secondary usage of the zero-width joiner is to prevent the formation of
<samp>"Reph"</samp>.

  - An initial <samp>"Ra,Halant,ZWJ"</samp> sequence should not produce a <samp>"Reph"</samp>,
    even where an initial <samp>"Ra,Halant"</samp> sequence without the zero-width
    joiner would otherwise produce a <samp>"Reph"</samp>.

The <abbr title="Zero-Width Joiner">ZWJ</abbr> and <abbr title="Zero-Width Non Joiner">ZWNJ</abbr> characters are, by definition, non-printing control
characters and have the _Default_Ignorable_ property in the Unicode
Character Database. In standard text-display scenarios, their function
is to signal a request from the user to the shaping engine for some
particular non-default behavior. As such, they are not rendered
visually.

> Note: Naturally, there are special circumstances where a user or
> document might need to request that a <abbr title="Zero-Width Joiner">ZWJ</abbr> or <abbr title="Zero-Width Non Joiner">ZWNJ</abbr> be rendered
> visually, such as when illustrating the OpenType shaping process, or
> displaying Unicode tables.

Because the <abbr title="Zero-Width Joiner">ZWJ</abbr> and <abbr title="Zero-Width Non Joiner">ZWNJ</abbr> are non-printing control characters, they can
be ignored by any portion of a software text-handling stack not
involved in the shaping operations that the <abbr title="Zero-Width Joiner">ZWJ</abbr> and <abbr title="Zero-Width Non Joiner">ZWNJ</abbr> are designed
to interface with. For example, spell-checking or collation functions
will typically ignore <abbr title="Zero-Width Joiner">ZWJ</abbr> and <abbr title="Zero-Width Non Joiner">ZWNJ</abbr>.

Similarly, the <abbr title="Zero-Width Joiner">ZWJ</abbr> and <abbr title="Zero-Width Non Joiner">ZWNJ</abbr> should be ignored by the shaping engine
when matching sequences of codepoints against the backtrack and
lookahead sequences of a font's <abbr title="Glyph Substitution table">GSUB</abbr> or <abbr title="Glyph Positioning table">GPOS</abbr> lookups.

For example:

  - A lookup that substitutes an alternate version of a
    dependent-vowel (matra) glyph when it is preceded by <samp>"Ka,Halant,Tta"</samp>
    should still be applied if the dependent-vowel codepoint is preceded
    by <samp>"Ka,Halant,ZWJ,Tta"</samp> in the text run.

The no-break space (<abbr>NBSP</abbr>) is primarily used to display those
codepoints that are defined as non-spacing (marks, dependent vowels
(matras), below-base consonant forms, and post-base consonant forms)
in an isolated context, as an alternative to displaying them
superimposed on the dotted-circle placeholder. These sequences will
match <samp>"NBSP,ZWJ,Halant,_Consonant_"</samp>, <samp>"NBSP,_mark_"</samp>, or <samp>"NBSP,_matra_"</samp>.


## The `<gjr2>` shaping model ##

Processing a run of `<gjr2>` text involves six top-level stages:

1. Identifying syllables and other sequences
2. Initial reordering
3. Applying the basic substitution features from <abbr>GSUB</abbr>
4. Final reordering
5. Applying all remaining substitution features from <abbr>GSUB</abbr>
6. Applying all remaining positioning features from <abbr>GPOS</abbr>


As with other Indic scripts, the initial reordering stage and the
final reordering stage each involve applying a set of several
script-specific rules. The basic substitution features must be applied
to the run in a specific order. The remaining substitution features in
stage five, however, do not have a mandatory order.

Indic scripts follow many of the same shaping patterns, but they
differ in a few critical characteristics that the shaping engine must
track. These include:

  - The position of the base consonant in a syllable.
  
  - The final position of <samp>"Reph"</samp>.
  
  - Whether <samp>"Reph"</samp> must be requested explicitly or if it is formed by
    a specific, implicit sequence.
	
  - Whether the below-base forms feature is applied only to consonants
    before the syllable base, only to consonants after the base
    consonant, or to both.
	
  - The ordering positions for dependent vowels
    (matras). Specifically, right-side, above-base, and below-base
    matras follow different rules in different scripts. 
	All Indic scripts position left-side matras in the same
    manner, in the ordering position `POS_PREBASE_MATRA`. 

With regard to these common variations, Gujarati's specific shaping
characteristics include: 

  - `BASE_POS_LAST` = The base consonant of a syllable is the last
     consonant, not counting any special final-consonant forms.

  - `REPH_POS_BEFORE_POST` = <samp>"Reph"</samp> is ordered before all post-base consonant forms.

  - `REPH_MODE_IMPLICIT` = <samp>"Reph"</samp> is formed by an initial <samp>"Ra,Halant"</samp> sequence.

  - `BLWF_MODE_PRE_AND_POST` = The below-forms feature is applied both to
     pre-base consonants and to post-base consonants.

  - `MATRA_POS_TOP` = `POS_AFTER_SUBJOINED` = Above-base matras are
     ordered after subjoined (i.e., below-base) consonant forms. 

  - `MATRA_POS_RIGHT` = `POS_AFTER_POST` = Right-side matras are
     ordered after all post-base consonant forms. 

  - `MATRA_POS_BOTTOM` = `POS_AFTER_POST` = Below-base matras are
     ordered after all post-base consonant forms.

These characteristics determine how the shaping engine must reorder
certain glyphs, how base consonants are determined, and how <samp>"Reph"</samp>
should be encoded within a run of text.

### Stage 1: Identifying syllables and other sequences ###

A syllable in Gujarati consists of a valid orthographic sequence
that may be followed by a "tail" of modifier signs. 

> Note: The Gujarati Unicode block enumerates nine modifier signs,
> "Candrabindu" (`U+0A81`), "Anusvara" (`U+0A82`), "Visarga"
> (`U+0A83`), "Sukun" (`U+0AFA`), "Shadda" (`U+0AFB`), "Maddah"
> (`U+0AFC`), "Three-Dot Nukta Above" (`U+0AFD`), "Circle Nukta Above"
> (`U+0AFE`), and "Two-Circle Nukta Above" (`U+0AFF`). In addition,
> Sanskrit text written in Gujarati may include additional signs from
> Vedic Extensions block. 

Each syllable contains exactly one vowel sound. Valid syllables may
begin with either a consonant or an independent vowel. 

If the syllable begins with a consonant, then the consonant that
provides the vowel sound is referred to as the "base" consonant. If
the syllable begins with an independent vowel, that vowel is the
syllable's only vowel sound and, by definition, there is no "base"
consonant. 

> Note: A consonant that is not accompanied by a dependent vowel (matra) sign
> carries the script's inherent vowel sound. This vowel sound is changed
> by a dependent vowel (matra) sign following the consonant.

From the shaping engine's perspective, the main distinction between a
syllable with a base consonant and a syllable with an
independent-vowel base is that a syllable with an independent-vowel
base is less likely to include additional consonants in special forms
and less likely to include dependent vowel signs
(matras). Therefore, in the common case, vowel-based syllables may
involve less reordering, substitution feature applications, and other
processing than consonant-based syllables.

In some languages and orthographies, vowel-based syllables are
not permitted to include additional consonants or matras, and certain
<abbr title="Glyph Substitution table">GSUB</abbr> substitution features do not occur. However, there are often
known exceptions, and real-world text makes no such guarantees. 

> Note: Shaping engines may choose to treat independent-vowel bases 
> like base consonants for the sake of simplicity or code
> reuse.
>
> However, implementations that take this approach should note
> that removing the distinction between base consonants and
> independent-vowel bases entirely may have unintended
> consequences. Making guarantees about the correctness of the results
> or about language-specific tests is out of scope for this document.

Generally speaking, the base consonant is the final consonant of the
syllable and its vowel sound designates the end of the syllable. This
rule is synonymous with the `BASE_POS_LAST` characteristic mentioned
earlier. 

Valid consonant-based syllables may include one or more additional 
consonants that precede the base consonant or syllable base. Each of these
other, pre-base consonants will be followed by the <samp>"Halant"</samp> mark, which
indicates that they carry no vowel. They affect pronunciation by
combining with the base consonant (e.g., "_str_", "_pl_") but they
do not add a vowel sound.

As with other Indic scripts, the consonant <samp>"Ra"</samp> receives special
treatment; in many circumstances it is replaced by one of two combining
mark-like forms. 

  - A <samp>"Ra,Halant"</samp> sequence at the beginning of a syllable
    is replaced with an above-base mark called <samp>"Reph"</samp> (unless the <samp>"Ra"</samp>
    is the only consonant in the syllable). 
    This rule is synonymous with the `REPH_MODE_IMPLICIT`
    characteristic mentioned earlier.

  - <samp>"Halant,Ra"</samp> sequences that occur elsewhere in the syllable may take on the
    below-base form <samp>"Rakaar".</samp> 
	
<samp>"Reph"</samp> and <samp>"Rakaar"</samp> characters must be reordered after the
syllable-identification stage is complete. 

> Note: Generally speaking, OpenType fonts will implement support for
> any below-base, post-base, and pre-base-reordering consonant forms
> by including the necessary substitution rules in their `blwf`,
> `pstf`, and `pref` lookups in <abbr title="Glyph Substitution table">GSUB</abbr>.
>
> Consequently, whenever shaping engines need to determine whether or 
> not a given consonant can take on such a special form, the most
> appropriate test is to check if the consonant is included in the
> relevant <abbr title="Glyph Substitution table">GSUB</abbr> lookup. Other implementations are possible, such as
> maintaining static tables of consonants, but checking for <abbr title="Glyph Substitution table">GSUB</abbr>
> support ensures that the expected behavior is implemented in the
> active font, and is therefore the most reliable approach.


In addition to valid syllables, standalone sequences may occur, such
as when an isolated codepoint is shown in example text.

> Note: Foreign loanwords, when written in the Gujarati script, may
> not adhere to the syllable-formation rules described above. In
> particular, it is not uncommon to encounter foreign loanwords that
> contain a word-final suffix of consonants.
>
> Nevertheless, such word-final suffixes will be correctly matched by
> the regular expressions listed below. These loanwords are pronounced
> different, which raises issues for potential readers, but the
> character sequences do not affect the shaping process.


Syllables should be identified by examining the run and matching
glyphs, based on their categorization, using regular expressions. 

The following general-purpose Indic-shaping regular expressions can be
used to match Gujarati syllables. 

The regular expressions utilize the shaping classes from the tables
above. For the purpose of syllable identification, more general
classes can be used, as defined in the following table. This
simplifies the resulting expressions. 

```markdown
_ra_		= The consonant "Ra" 
_consonant_	= ( `CONSONANT` | `CONSONANT_DEAD` ) - _ra_
_vowel_		= `VOWEL_INDEPENDENT`
_nukta_	  	= `NUKTA`
_halant_	= `VIRAMA`
_zwj_		= `JOINER`
_zwnj_		= `NON_JOINER`
_matra_		= `VOWEL_DEPENDENT` | `PURE_KILLER`
_syllablemodifier_	= `SYLLABLE_MODIFIER` | `BINDU` | `VISARGA` | `GEMINATION_MARK`
_vedicsign_	= `CANTILLATION`
_placeholder_	= `PLACEHOLDER` | `CONSONANT_PLACEHOLDER` | `NUMBER`
_dottedcircle_	= `DOTTED_CIRCLE`
_repha_		= `CONSONANT_PRE_REPHA`
_consonantmedial_	= `CONSONANT_MEDIAL`
_symbol_	= `SYMBOL` | `AVAGRAHA`
_consonantwithstacker_	= `CONSONANT_WITH_STACKER`
_other_		= `OTHER` | `MODIFYING_LETTER`
```


> Note: the _ra_ identification class is mutually exclusive with 
> the _consonant_ class. The union of the _consonant_ and _ra_ classes
> is used in the regular expression elements below in order to
> correctly identify <samp>"Ra"</samp> characters that do not trigger <samp>"Reph"</samp> or
> <samp>"Rakaar"</samp> shaping behavior.
>
> Note, also, that the cantillation mark "combining Ra" in the
> Devanagari Extended block does _not_ belong to the _ra_
> identification class, and that the other "combining consonant"
> cantillation marks in the Devanagari Extended block do not belong to
> the _consonant_ identification class.

> Note: The _placeholder_ identification class includes codepoints
> that are often used in place of vowels or consonants when a document
> needs to display a matra, mark, or special form in isolation or
> in another context beyond a standard syllable. Examples of
> _placeholder_ codepoints include hyphens and non-breaking
> spaces. Sequences that utilize this approach should be identified as
> "standalone" syllables.
>
> The _placeholder_ identification class also includes numerals, which
> are commonly used as word substitutes within normal text. Examples
> include ordinals (e.g., "4th").

> Note: The _other_ identification class includes codepoints that
> do not interact with adjacent characters for shaping purposes. Even
> though some of these codepoints (such as `MODIFYING_LETTER`) can
> occur within words, they evoke no behavior from the shaping
> engine and do not factor into the regular expressions that
> follow. Therefore, the shaping engine may choose to ignore them
> during syllable identification; they are listed here for completeness.

These identification classes form the bases of the following regular
expression elements:

```markdown
C	= _consonant_ | _ra_
Z	= _zwj_ | _zwnj_
REPH	= (_ra_ _halant_) | _repha_
CN		= C _zwj_? _nukta_?
FORCED_RAKAR	= _zwj_ _halant_ _zwj_ _ra_
S	= _symbol_ _nukta_?
MATRA_GROUP	= Z{0,3} _matra_ _nukta_? (_halant_ | FORCED_RAKAR)?
SYLLABLE_TAIL	= (Z? _syllablemodifier_ _syllablemodifier_? _zwnj_?)? _vedicsign_{0,3}
HALANT_GROUP	= Z? _halant_ (_zwj_ _nukta_?)?
FINAL_HALANT_GROUP	= HALANT_GROUP | (_halant_ _zwnj_)
MEDIAL_GROUP	= _consonantmedial_?
HALANT_OR_MATRA_GROUP	= FINAL_HALANT_GROUP | MATRA_GROUP*)
```

> Note: Practically speaking, shaping engines are highly unlikely to
> encounter more than 4 sequential `(MATRA_GROUP)`
> instances in any real-word syllables. Thus, implementations may
> choose to limit occurrences by limiting the above expressions to a
> finite length, such as `(MATRA_GROUP){0,4}` .


Using the above elements, the following regular expressions define the
possible syllable types:

A consonant-based syllable will match the expression:
```markdown
(_repha_|_consonantwithstacker_)? (CN HALANT_GROUP)* CN MEDIAL_GROUP HALANT_OR_MATRA_GROUP SYLLABLE_TAIL
```

> Note: Practically speaking, shaping engines are highly unlikely to
> encounter more than 4 sequential `(CN HALANT_GROUP)`
> instances in any real-word syllables. Thus, implementations may
> choose to limit occurrences by limiting the above expressions to a
> finite length, such as `(CN HALANT_GROUP){0,4}` .

A vowel-based syllable will match the expression:
```markdown
REPH? _vowel_ _nukta_? (_zwj_ | (HALANT_GROUP CN)* MEDIAL_GROUP HALANT_OR_MATRA_GROUP SYLLABLE_TAIL)
```

> Note: Practically speaking, shaping engines are highly unlikely to
> encounter more than 4 sequential `(HALANT_GROUP CN)`
> instances in any real-word syllables. Thus, implementations may
> choose to limit occurrences by limiting the above expressions to a
> finite length, such as `(HALANT_GROUP CN){0,4}` .

A standalone syllable will match the expression:
```markdown
((_repha_|_consonantwithstacker_)? _placeholder_ | REPH? _dottedcircle_) _nukta_? (HALANT_GROUP CN)* MEDIAL_GROUP HALANT_OR_MATRA_GROUP SYLLABLE_TAIL
```

> Note: Practically speaking, shaping engines are highly unlikely to
> encounter more than 4 sequential `(HALANT_GROUP CN)`
> instances in any real-word syllables. Thus, implementations may
> choose to limit occurrences by limiting the above expressions to a
> finite length, such as `(HALANT_GROUP CN){0,4}` .

> Note: Although they are labeled as "standalone syllables" here,
> many sequences that match the standalone regular expression above
> are instances where a document needs to display a matra, combining
> mark, or special form in isolation. Such sequences might not have
> any significance with regard to the definition of syllables used in
> the language or orthography of the text.

A symbol-based syllable will match the expression:
```markdown
S SYLLABLE_TAIL
```

A broken syllable will match the expression:
```markdown
REPH? _nukta_? (HALANT_GROUP CN)* MEDIAL_GROUP HALANT_OR_MATRA_GROUP SYLLABLE_TAIL
```

> Note: Practically speaking, shaping engines are highly unlikely to
> encounter more than 4 sequential `(HALANT_GROUP CN)`
> instances in any real-word syllables. Thus, implementations may
> choose to limit occurrences by limiting the above expressions to a
> finite length, such as `(HALANT_GROUP CN){0,4}` .

The primary problem involved in shaping broken syllables is the lack
of a syllable base (either a base consonant or an independent
vowel). Without a syllable base, the shaping engine cannot perform
<abbr title="Glyph Positioning table">GPOS</abbr> positioning and other contextual operations that are required
later in the shaping process.

To make up for this limitation, shaping engines should insert a
dotted-circle placeholder (`U+25CC`) character into the text stream
where the missing syllable base was expected to occur. This
placeholder allows the shaping process to proceed on a best-effort
basis at handling the broken-syllable sequence, but making guarantees
about the orthographic correctness or preferred appearance of the
final result is out of scope for this document.

Shaping engines can perform this dotted-circle insertion at any point
after the broken syllable has been recognized and before <abbr title="Glyph Substitution table">GSUB</abbr> features
are applied. However, the best results will likely be attained by
performing the insertion immediately, before proceeding to
stage 2. This will enable the maximum number of <abbr title="Glyph Substitution table">GSUB</abbr> and <abbr title="Glyph Positioning table">GPOS</abbr> features
in the active font to be correctly applied to the text run by ensuring
that all reordering, tagging, and sorting algorithms are executed as
usual.

> Note: In software stacks where other text-handling operations, such
> as Unicode normalization and localization, are performed before the
> text run is passed to the shaping engine, there is a potential for
> the dotted-circle insertion to cause unexpected effects.
>
> For example, if a `ccmp` or `locl` feature substitutes the default
> dotted-circle placeholder glyph with a variant glyph of a different
> size or weight for the (`U+25CC`) codepoint, then any shaping engine
> which relies on another software component to handle that
> functionality must take additional care to ensure consistency.


The expressions above use state-machine syntax from the Ragel
state-machine compiler. The operators represent:

```markdown
a* = zero or more copies of a
b+ = one or more copies of b
c? = optional instance of c
d{n} = exactly n copies of d
d{,n} = zero to n copies of d
d{n,} = n or more copies of d
d{n,m} = n to m copies of d
!e = not e
^f = character-level not f
g.h = concatenation of g and h
i|j = i or j
( ) = grouping of expression elements
```


After the syllables have been identified, each of the subsequent 
shaping stages occurs on a per-syllable basis.

### Stage 2: Initial reordering ###

The initial reordering stage is used to relocate glyphs from the
phonetic order in which they occur in a run of text to the
orthographic order in which they are presented visually.

> Note: Primarily, this means moving dependent-vowel (matra) glyphs, 
> <samp>"Ra,Halant"</samp> glyph sequences, and other consonants that take special
> treatment in some circumstances. <samp>"Ra"</samp> may take on one of two special
> forms, depending on its position in the syllable. 
>
> These reordering moves are mandatory. The final-reordering stage
> may make additional moves, depending on the text and on the features
> implemented in the active font.

The syllable should be processed by tagging each glyph with its
intended position based on its ordering category. After all glyphs
have been tagged, the entire syllable should be sorted in stable order,
so that glyphs of the same ordering category remain in the same
relative position with respect to each other.

The final sort order of the ordering categories should be:


	POS_RA_TO_BECOME_REPH
	POS_PREBASE_MATRA
	POS_PREBASE_CONSONANT

	POS_SYLLABLE_BASE
	POS_AFTER_MAIN

	POS_ABOVEBASE_CONSONANT

	POS_BEFORE_SUBJOINED
	POS_BELOWBASE_CONSONANT
	POS_AFTER_SUBJOINED

	POS_BEFORE_POST
	POS_POSTBASE_CONSONANT
	POS_AFTER_POST

	POS_FINAL_CONSONANT
	POS_SMVD


This sort order enumerates all of the possible final positions to
which a codepoint might be reordered, across all of the Indic
scripts. It includes some ordering categories not utilized in
Gujarati. 

The basic positions (left to right) are <samp>"Reph"</samp>
(`POS_RA_TO_BECOME_REPH`), dependent vowels (matras) and consonants
positioned before the base consonant or syllable base
(`POS_PREBASE_MATRA` and `POS_PREBASE_CONSONANT`), the base consonant
or syllable base (`POS_SYLLABLE_BASE`), above-base consonants
(`POS_ABOVEBASE_CONSONANT`), below-base consonants
(`POS_BELOWBASE_CONSONANT`), consonants positioned after the base
consonant or syllable base (`POS_POSTBASE_CONSONANT`), syllable-final
consonants (`POS_FINAL_CONSONANT`), and syllable-modifying or Vedic
signs (`POS_SMVD`).

In addition, several secondary positions are defined to handle various
reordering rules that deal with relative, rather than absolute,
positioning. `POS_AFTER_MAIN` means that a character must be
positioned immediately after the syllable base. `POS_BEFORE_SUBJOINED`
and `POS_AFTER_SUBJOINED` mean that a character must be positioned
before or after any below-base consonants, respectively. Similarly,
`POS_BEFORE_POST` and `POS_AFTER_POST` mean that a character must be
positioned before or after any post-base consonants, respectively. 

For shaping-engine implementers, the names used for the ordering
categories matter only in that they are unambiguous. 

For a definition of the "base" consonant, refer to stage 2, step 1, which
follows.

#### Stage 2, step 1: Base consonant ####

The first step is to determine the base consonant of the syllable, if
there is one, and tag it as `POS_SYLLABLE_BASE`.

In a syllable that begins with an independent vowel, the independent
vowel will always serve as the syllable base, and it should be tagged
as `POS_SYLLABLE_BASE`. The shaping engine can then proceed to step 2.

In a standalone sequence or other syllable that begins with a placeholder
or dotted circle, the placeholder or dotted circle will always serve
as the syllable base, and it should be tagged as
`POS_SYLLABLE_BASE`. The shaping engine can then proceed to step 2.

In a syllable that begins with a consonant, the shaping engine must
determine the base consonant by a script-specific algorithm.

> Note: Shaping engines may choose to treat independent-vowel bases 
> like base consonants for the sake of simplicity or code
> reuse.
>
> However, implementations that take this approach should note
> that removing the distinction between base consonants and
> independent-vowel bases entirely may have unintended
> consequences. Making guarantees about the correctness of the results
> or about language-specific tests is out of scope for this document.

The base consonant is defined as the consonant in a consonant-based
syllable that carries the syllable's vowel sound. That vowel sound
will either be provided by the script's inherent vowel (in which case
it is not written with a separate character) or the sound will be designated
by the addition of a dependent-vowel (matra) sign.

Vowel-based syllables, standalone-sequences, and broken text runs will
not have base consonants.

<!--- > Because vowel-based syllables will not include consonants and
> because independent vowels do not take on special forms or require
> reordering, many of the steps that follow will involve no
> work for a vowel-based syllable. However, vowel-based syllables must
> still be sorted and their marks handled correctly, and <abbr title="Glyph Substitution table">GSUB</abbr> and <abbr title="Glyph Positioning table">GPOS</abbr>
> lookups must be applied. These steps of the shaping process follow
> the same rules that are employed for consonant-based syllables.
--->

While performing the base-consonant search, shaping engines may
also encounter special-form consonants, including below-base
consonants and post-base consonants. Each of these special-form
consonants must also be tagged (`POS_BELOWBASE_CONSONANT`,
`POS_POSTBASE_CONSONANT`, respectively). 

Any pre-base-reordering consonant (such as a pre-base-reordering <samp>"Ra"</samp>)
encountered during the base-consonant search must be tagged
`POS_POSTBASE_CONSONANT`. 
 
> Note: Shaping engines may choose any method to identify consonants that
> have below-base, post-base, or pre-base-reordering forms while
> executing the above algorithm. For example, one implementation may
> choose to maintain a static table of special-form consonants to
> compare against the text run. Another implementation might examine
> the active font to see if it includes a `blwf`, `pstf`, or `pref`
> lookup in the <abbr title="Glyph Substitution table">GSUB</abbr> table that affects the consonants encountered in
> the syllable.
>
> However, checking for <abbr title="Glyph Substitution table">GSUB</abbr> support ensures that the expected
> behavior is implemented in the active font, and is therefore the
> most reliable approach.


The algorithm for determining the base consonant is

  - If the syllable starts with <samp>"Ra,Halant"</samp> and the syllable contains
    more than one consonant, exclude the starting <samp>"Ra"</samp> from the list of
    consonants to be considered. 
  - Starting from the end of the syllable, move backwards until a consonant is found.
      * If the consonant is the first consonant, stop.
      * If the consonant is preceded by the sequence <samp>"Halant,ZWJ"</samp>, stop.
      * If the consonant has a below-base form, tag it as
        `POS_BELOWBASE_CONSONANT`, then move to the previous consonant. 
      * If the consonant has a post-base form, tag it as
        `POS_POSTBASE_CONSONANT`, then move to the previous consonant. 
      * If the consonant is a pre-base-reordering <samp>"Ra"</samp>, tag it as
        `POS_POSTBASE_CONSONANT`, then move to the previous consonant. 
      * If none of the above conditions is true, stop.
  - The consonant stopped at will be the base consonant.

> Note: The algorithm is designed to work for all Indic
> scripts. However, Gujarati does not utilize pre-base-reordering <samp>"Ra"</samp>.

Gujarati includes one below-base consonant form.

  - <samp>"Halant,Ra"</samp> (occurring after the syllable base) and <samp>"Ra,Halant"</samp>
    (before the syllable base, but in a non-syllable-initial
    position) will take on the <samp>"Rakaar"</samp> form. 
	
> Note: Because Gujarati employs the `BLWF_MODE_PRE_AND_POST` shaping
> characteristic, consonants with below-base special forms may occur
> before or after the syllable base. 
> 
> During the base-consonant search, only the <samp>"Halant,_consonant_"</samp> 
> pattern following the syllable base for these below-base forms will
> be encountered. Stage 2, step 5 below ensures that the <samp>"_consonant_,Halant"</samp>
> pattern preceding the syllable base for these below-base forms will
> also be tagged correctly.


#### Stage 2, step 2: Matra decomposition ####

Second, any multi-part dependent vowels (matras) must be decomposed
into their components. Gujarati has one multi-part dependent vowel,
"Candra O" (`U+0AC9`).

> "Candra O" (`U+0AC9`) decomposes to "`U+0AC5`,`U+0ABE`"

> Note: "Candra O" is categorized in Unicode as being a top-and-right
> matra, a combination that would normally decompose into one
> TOP_POSITION mark and one RIGHT_POSITION mark. In "Candra O",
> however, the `U+0AC5` component is intended to be positioned over the
> `U+0ABE` component, not above the base.
>
> Consequently, the two decomposed components should both be tagged
> for the `POS_AFTER_POST` sorting position, and neither will need to
> be reordered.
>
> In addition, the decomposition is not canonical in
> Unicode. so performing the decomposition may trigger unknown
> behavior from other components of the software stack. Consequently,
> shaping engines may choose to skip it.

Because this decomposition is a character-level operation, the shaping
engine may choose to perform it earlier, such as during an initial
Unicode-normalization stage. However, all such decompositions must be
completed before the shaping engine begins step three, below.

:::{figure-md}
![Two-part matra decomposition](/images/gujarati/gujarati-matra-decompose.svg "Two-part matra decomposition"){.shaping-demo .inline-svg .greyscale-svg #gujarati-matra-decompose}

Two-part matra decomposition
:::

```{svg-color-toggle-button} gujarati-matra-decompose
```


#### Stage 2, step 3: Tag matras ####

Third, all left-side dependent-vowel (matra) signs must be tagged to be
moved to the beginning of the syllable, with `POS_PREBASE_MATRA`.

Above-base dependent-vowel (matra) signs must be tagged with `POS_AFTER_SUBJOINED`.

Right-side dependent-vowel (matra) signs must be tagged with `POS_AFTER_POST`.

Below-base dependent-vowel (matra) signs must be tagged with `POS_AFTER_POST`.

#### Stage 2, step 4: Adjacent marks ####

Fourth, any subsequences of marks that include a <samp>"Nukta"</samp> and a
<samp>"Halant"</samp> or Vedic sign must be reordered so that the <samp>"Nukta"</samp> appears
first.

This means that the subsequence <samp>"Halant,Nukta"</samp> is reordered to
<samp>"Nukta,Halant"</samp> and that the subsequence <samp>"_Vedic_sign_,Nukta"</samp> is
reordered to <samp>"Nukta,_Vedic_sign"</samp>.

For subsequences of affected marks that are longer than two, the
reordering operation must be repeated until the <samp>"Nukta"</samp> is the first
character in the subsequence. No other marks in the subsequence
should be reordered.

This order is canonical in Unicode and is required so that
<samp>"_consonant_,Nukta"</samp> substitution rules from <abbr title="Glyph Substitution table">GSUB</abbr> will be correctly
matched later in the shaping process.

#### Stage 2, step 5: Pre-base consonants ####

Fifth, consonants that occur before the syllable base must be tagged
with `POS_PREBASE_CONSONANT`. Excluding initial <samp>"Ra,Halant"</samp> sequences
that will become <samp>"Reph"</samp>s: 

  - If the consonant has a below-base form, tag it as
          `POS_BELOWBASE_CONSONANT`. 
  - Otherwise, tag it as `POS_PREBASE_CONSONANT`.
  
> Note: Shaping engines may choose any method to identify consonants that
> have below-base, post-base, or pre-base-reordering forms while
> executing the above algorithm. For example, one implementation may
> choose to maintain a static table of special-form consonants to
> compare against the text run. Another implementation might examine
> the active font to see if it includes a `blwf`, `pstf`, or `pref`
> lookup in the <abbr title="Glyph Substitution table">GSUB</abbr> table that affects the consonants encountered in
> the syllable.
>
> However, checking for <abbr title="Glyph Substitution table">GSUB</abbr> support ensures that the expected
> behavior is implemented in the active font, and is therefore the
> most reliable approach.

Gujarati includes one below-base consonant form.

  - <samp>"Halant,Ra"</samp> (occurring after the syllable base) and <samp>"Ra,Halant"</samp>
    (before the syllable base, but in a non-syllable-initial
    position) will take on the <samp>"Rakaar"</samp> form. 
	
> Note: Because Gujarati employs the `BLWF_MODE_PRE_AND_POST` shaping
> characteristic, consonants with below-base special forms may occur
> before or after the syllable base. 
> 
> During the base-consonant search in stage 2, step 1, any instances of the
> <samp>"Halant,_consonant_"</samp>  pattern following the syllable base for these
> below-base forms will be encountered. The tagging in this step
> ensures that the <samp>"_consonant_,Halant"</samp> pattern preceding the syllable
> base for these below-base forms will also be tagged correctly.


#### Stage 2, step 6: Reph ####

Sixth, initial <samp>"Ra,Halant"</samp> sequences that will become <samp>"Reph"</samp>s must be tagged with
`POS_RA_TO_BECOME_REPH`.

> Note: an initial <samp>"Ra,Halant"</samp> sequence will always become a <samp>"Reph"</samp>
> unless the <samp>"Ra"</samp> is the only consonant in the syllable.

#### Stage 2, step 7: Final consonants ####

Seventh, all final consonants must be tagged. Consonants that occur
after the base consonant or syllable base _and_ after a dependent vowel (matra) sign
must be tagged with  `POS_FINAL_CONSONANT`.

> Note: Final consonants occur only in Sinhala and should not be
> expected in `<gjr2>` text runs. This step is included here to
> maintain compatibility across Indic scripts.

#### Stage 2, step 8: Mark tagging ####

Eighth, all marks must be tagged. 

> Note: In this step, joiner and non-joiner characters must also be
> tagged according to the same rules given for marks, even though
> these characters are not categorized as marks in Unicode.

Marks in the `BINDU`, `VISARGA`, `AVAGRAHA`, `CANTILLATION`,
`SYLLABLE_MODIFIER`, `GEMINATION_MARK`, and `SYMBOL` categories should
be tagged with `POS_SMVD`. 

All <samp>"Nukta"</samp>s must be tagged with the same positioning tag as the
preceding consonant, independent vowel, placeholder, or dotted circle.

All remaining marks (not in the `POS_SMVD` category and not <samp>"Nukta"</samp>s)
must be tagged with the same positioning tag as the closest non-mark
character the mark has affinity with, so that they move together 
during the sorting step.

There are two possible cases: those marks before the syllable base
and those marks after the syllable base. In addition, an exception is
made for <samp>"Halant"</samp> marks that follow a left-side (pre-base) matra.

  1. Initially, all remaining marks should be tagged with the same
	 positioning tag as the closest preceding consonant.

  2. For each consonant after the syllable base (such as post-base
	 consonants, below-base consonants, or final consonants), all
	 remaining marks located between that current consonant and any
	 previous consonant should be tagged with the same positioning tag as
	 the current (later) consonant.
  
     In other words, all consonants preceding the syllable base "own" the
	 marks that follow them, while all consonants after the syllable base
	 "own" the marks that come before them. When a syllable does not have
	 any consonants after the syllable base, the syllable base should
	 "own" all the marks that follow it.
  
  3. Finally, <samp>"Halant"</samp> marks that follow a left-side dependent vowel
     (matra) should _not_ be tagged with the left-side matra's
     positioning tag. Instead, the <samp>"Halant"</samp> should be tagged with the
     positioning tag of the non-mark character preceding the left-side
     matra. This prevents the <samp>"Halant"</samp> mark from being moved with the
     left-side matra when the syllable is sorted.


<!--- HarfBuzz also tags everything between a post-base consonant or -->
<!-- matra and another post-base consonant as belonging to the latter -->
<!--post-base consonant. --->


#### Stage 2, step 9: Sort syllable ####

With these steps completed, the syllable can be sorted into the final
sort order as listed at the beginning of stage 2.

The glyphs in the syllable should be sorted in stable order,
so that glyphs of the same ordering category remain in the same
relative position with respect to each other.


#### Stage 2, step 10: Flag sequences for possible feature applications ####

With the initial reordering complete, those glyphs in the syllable that
may have <abbr title="Glyph Substitution table">GSUB</abbr> or <abbr title="Glyph Positioning table">GPOS</abbr> features applied in stages 3, 5, and 6 should be
flagged for each potential feature. 

This flagging is preliminary; the set of potential features varies
between different scripts and which features are supported varies
between fonts. It is also possible that the application of
one feature on a glyph sequence will perform a substitution that makes
a later feature no longer applicable to the updated sequence.

Consequently, the flagging must be completed before shaping proceeds
to the stages during which features are applied.

Some shaping features, such as `locl`, can potentially apply to any
glyphs. Therefore it is not necessary to maintain a separate flag for
these features in the bitmask (or other data structure) used to track
the flags -- although shaping engines may do so if desired.

The sequences to flag are summarized in the list below; a full
description of each feature's function and interpretation is provided
in <abbr title="Glyph Substitution table">GSUB</abbr> and <abbr title="Glyph Positioning table">GPOS</abbr> application stages that follow.

  - `nukt` should match <samp>"_Consonant_,Nukta"</samp> sequences
  - `akhn` should match <samp>"Ka,Halant,Ssa"</samp> and <samp>"Ja,Halant,Nya"</samp>
  - `rphf` should match initial <samp>"Ra,Halant"</samp> sequences but _not_ match
            initial <samp>"Ra,Halant,ZWJ"</samp> sequences
  - `rkrf` should match <samp>"_Consonant_,Halant,Ra"</samp> sequences
  - `blwf` should match <samp>"Halant,Ra"</samp> in post-base positions and
           <samp>"Ra,Halant"</samp> in non-initial pre-base positions 
  - `half` should match <samp>"_Consonant_,Halant"</samp> in pre-base position but
           _not_ match <samp>"Ra,Halant"</samp> sequences flagged for `rphf` and
           _not_ match <samp>"_Consonant_,Halant,ZWNJ,_Consonant_"</samp> sequences
  - `vatu` should match <samp>"_Consonant_,Halant,Ra"</samp> sequences
  - `cjct` should match <samp>"_Consonant_,Halant,_Consonant_"</samp> but _not_
            match <samp>"_Consonant_,Halant,ZWJ,_Consonant_"</samp> or
            <samp>"_Consonant_,Halant,ZWNJ,_Consonant_"</samp>


### Stage 3: Applying the basic substitution features from <abbr>GSUB</abbr> ###

The basic-substitution stage applies mandatory substitution features
using the rules in the font's <abbr title="Glyph Substitution table">GSUB</abbr> table. In preparation for this
stage, glyph sequences should be flagged for possible application 
of <abbr title="Glyph Substitution table">GSUB</abbr> features in stage 2, step 10.

The order in which these substitutions must be performed is fixed for
all Indic scripts:

	locl
	nukt
	akhn
	rphf 
	rkrf 
	pref (not used in Gujarati)
	blwf 
	abvf (not used in Gujarati)
	half
	pstf
	vatu
	cjct
	cfar (not used in Gujarati)

#### Stage 3, step 1: locl ####

The `locl` feature replaces default glyphs with any language-specific
variants, based on examining the language setting of the text run.

> Note: Strictly speaking, the use of localized-form substitutions is
> not part of the shaping process, but of the localization process,
> and could take place at an earlier point while handling the text
> run. However, shaping engines are expected to complete the
> application of the `locl` feature before applying the subsequent
> <abbr title="Glyph Substitution table">GSUB</abbr> substitutions in the following steps.

#### Stage 3, step 2: nukt ####

The `nukt` feature replaces <samp>"_Consonant_,Nukta"</samp> sequences with a
precomposed nukta-variant of the consonant glyph.

  - The context defined for a `nukt` feature is:

:::{table} `nukt` feature context

| Backtrack     | Matching sequence             | Lookahead     |
|:--------------|:------------------------------|:--------------|
| _none_        | `_consonant_`(full),`_nukta_` | _none_        |
:::

:::{figure-md}
![nukt feature application](/images/gujarati/gujarati-nukt.svg "nukt feature application"){.shaping-demo .inline-svg .greyscale-svg #gujarati-nukt}

nukt feature application
:::

```{svg-color-toggle-button} gujarati-nukt
```


#### Stage 3, step 3: akhn ####

The `akhn` feature replaces two specific sequences with required ligatures. 

  - <samp>"Ka,Halant,Ssa"</samp> is substituted with the <samp>"KSsa"</samp> ligature. 
  - <samp>"Ja,Halant,Nya"</samp> is substituted with the <samp>"JNya"</samp> ligature. 
  
These sequences can occur anywhere in a syllable. The <samp>"KSsa"</samp> and
<samp>"JNya"</samp> characters have orthographic status equivalent to full
consonants in some languages, and fonts may have `cjct` substitution
rules designed to match them in subsequences. Therefore, this
feature must be applied before all other many-to-one substitutions.

  - The context defined for an `akhn` feature is:

:::{table} `akhn` feature context
    
| Backtrack     | Matching sequence           | Lookahead     |
|:--------------|:----------------------------|:--------------|
| _none_        | `AKHAND_CONSONANT_SEQUENCE` | _none_        |
:::


:::{figure-md}
![akhn KSsa formation](/images/gujarati/gujarati-akhn-kssa.svg "akhn KSsa formation"){.shaping-demo .inline-svg .greyscale-svg #gujarati-akhn-kssa}

akhn KSsa formation
:::

```{svg-color-toggle-button} gujarati-akhn-kssa
```


:::{figure-md}
![akhn JNya formation](/images/gujarati/gujarati-akhn-jnya.svg "akhn JNya formation"){.shaping-demo .inline-svg .greyscale-svg #gujarati-akhn-jnya}

akhn JNya formation
:::

```{svg-color-toggle-button} gujarati-akhn-jnya
```


#### Stage 3, step 4: rphf ####

The `rphf` feature replaces initial <samp>"Ra,Halant"</samp> sequences with the
<samp>"Reph"</samp> glyph.

  - An initial <samp>"Ra,Halant,ZWJ"</samp> sequence, however, must not be flagged for
    the `rphf` substitution.
	

  - The context defined for a `rphf` feature is:

:::{table} `rphf` feature context
    
| Backtrack        | Matching sequence       | Lookahead     |
|:-----------------|:------------------------|:--------------|
| `SYLLABLE_START` | "Ra"(full),`_halant_`   | _none_        |
:::


:::{figure-md}
![Reph formation](/images/gujarati/gujarati-rphf.svg "Reph formation"){.shaping-demo .inline-svg .greyscale-svg #gujarati-rphf}

Reph formation
:::

```{svg-color-toggle-button} gujarati-rphf
```

	
#### Stage 3, step 5: rkrf ####

The `rkrf` feature replaces <samp>"_Consonant_,Halant,Ra"</samp> sequences with the
<samp>"Rakaar"</samp>-ligature form of the consonant glyph.

  - The context defined for a `rkrf` feature is:

:::{table} `rkrf` feature context
 
| Backtrack           | Matching sequence     | Lookahead     |
|:--------------------|:----------------------|:--------------|
| `_consonant_`(full) | `_halant_`,"Ra"(full) | _none_        |
:::


:::{figure-md}
![Rakaar ligation](/images/gujarati/gujarati-rkrf.svg "Rakaar ligation"){.shaping-demo .inline-svg .greyscale-svg #gujarati-rkrf}

Rakaar ligation
:::

```{svg-color-toggle-button} gujarati-rkrf
```


#### Stage 3, step 6: pref ####

> This feature is not used in Gujarati.

<!--- 3.5: The `pref` feature replaces pre-base-consonant glyphs with -->
<!--any special forms. --->

#### Stage 3, step 7: blwf ####

The `blwf` feature replaces below-base-consonant glyphs with any
special forms. Gujarati includes one below-base consonant
form:

  - <samp>"Halant,Ra"</samp> (occurring after the syllable base) or <samp>"Ra,Halant"</samp>
    (before the syllable base, but in a non-syllable-initial position) will
    take on the <samp>"Rakaar"</samp> form.
	
If the active font contains ligatures for the consonant adjacent to
the <samp>"Halant"</samp> (i.e., <samp>"_Consonant_,Halant,Ra"</samp>), then that ligature is
normally applied with the `rkrf` feature in stage 3, step 5. The `blwf`
feature allows the <samp>"Ra"</samp> to be substituted with a standalone <samp>"Rakaar"</samp>
mark, to work with all consonants that do not have a `rkrf` ligature
in the font.

Because Gujarati incorporates the `BLWF_MODE_PRE_AND_POST` shaping
characteristic, any pre-base consonants and any post-base consonants
may potentially match a `blwf` substitution; therefore, both cases must
be flagged for comparison. Note that this is not necessarily the case in other
Indic scripts that use a different `BLWF_MODE_` shaping
characteristic. 

:::{figure-md}
![blwf feature application](/images/gujarati/gujarati-blwf.svg "blwf feature application"){.shaping-demo .inline-svg .greyscale-svg #gujarati-blwf}

blwf feature application
:::

```{svg-color-toggle-button} gujarati-blwf
```


#### Stage 3, step 8: abvf ####

> This feature is not used in Gujarati.

#### Stage 3, step 9: half ####

The `half` feature replaces <samp>"_Consonant_,Halant"</samp> sequences before the
base consonant or syllable base with "half forms" of the consonant
glyphs.

In the most common case, this substitution applies to
<samp>"_Consonant_,Halant"</samp> sequences that are followed by another
<samp>"_Consonant_"</samp>.

In addition, a sequence matching <samp>"_Consonant_,Halant,ZWJ"</samp> must also be
flagged for potential `half` substitutions.

> Note: The presence of the <samp>"ZWJ"</samp> at the end of the sequence means
> that the sequence may match the regular-expression test in stage 1
> as the end of a syllable, even without being followed by a base
> consonant or syllable base.
>
> The fact that the regular-expression tests identify a syllable break
> after the <samp>"_Consonant_,Halant,ZWJ"</samp> is a byproduct of OpenType
> shaping and Unicode encoding, however, and might not have any
> significance with regard to the definition of syllables used in the
> language or orthography of the text.

There are three exceptions to the default behavior, for which
the shaping engine must test:

  - Initial <samp>"Ra,Halant"</samp> sequences, which should have been flagged for
    the `rphf` feature earlier, must not be flagged for potential
    `half` substitutions.

  - Non-initial <samp>"Ra,Halant"</samp> sequences, which should have been flagged
    for the `rkrf` or `blwf` features earlier, must not be flagged for
    potential `half` substitutions.

  - A sequence matching <samp>"_Consonant_,Halant,ZWNJ,_Consonant_"</samp> must not be
    flagged for potential `half` substitutions.

:::{figure-md}
![half-form feature application](/images/gujarati/gujarati-half.svg "half-form feature application"){.shaping-demo .inline-svg .greyscale-svg #gujarati-half}

half-form feature application
:::

```{svg-color-toggle-button} gujarati-half
```


#### Stage 3, step 10: pstf ####

> This feature is not used in Gujarati.


#### Stage 3, step 11: vatu ####

The `vatu` feature replaces certain sequences with "Vattu variant"
forms. 

"Vattu variants" are formed from glyphs followed by <samp>"Rakaar"</samp>
(the below-base form of <samp>"Ra"</samp>); therefore, this feature must be applied after
the `blwf` feature.

> Note: for Gujarati, the `vatu` feature performs the same set of
> substitutions as the `rkrf` feature. The `rkrf` feature is
> preferred; if a given font implements `rkrf`, it does not
> necessarily have to implement `vatu`. Nevertheless, shaping engines
> must support and process both features.

:::{figure-md}
![vatu feature application](/images/gujarati/gujarati-vatu.svg "vatu feature application"){.shaping-demo .inline-svg .greyscale-svg #gujarati-vatu}

vatu feature application
:::

```{svg-color-toggle-button} gujarati-vatu
```


#### Stage 3, step 12: cjct ####

The `cjct` feature replaces sequences of adjacent consonants with
conjunct ligatures. These sequences must match <samp>"_Consonant_,Halant,_Consonant_"</samp>.

A sequence matching <samp>"_Consonant_,Halant,ZWJ,_Consonant_"</samp> or
<samp>"_Consonant_,Halant,ZWNJ,_Consonant_"</samp> must not be flagged to form a conjunct.

> Note: The presence of the <samp>"ZWJ"</samp> in a
> <samp>"_Consonant_,Halant,ZWJ,_Consonant_"</samp> sequence should automatically
> inhibit any `cjct` feature rules from matching the sequence as valid
> input, and thus prevent the `cjct` substitution from being applied.

> Note: The presence of the <samp>"ZWNJ"</samp> in a
> <samp>"_Consonant_,Halant,ZWNJ,_Consonant_"</samp> sequence means that the
> <samp>"_Consonant_,Halant,ZWNJ"</samp> subsequence will match the
> regular-expression test in stage 1 as the end of a syllable.
> 
> Because OpenType shaping features in `<gjr2>` are defined as
> applying only within an individual syllable, this means that the
> presence of the <samp>"ZWNJ"</samp> will automatically prevent the application of
> a `cjct` feature by triggering the identification of a syllable
> break between the two consonants.
>
> The fact that the regular-expression tests identify a syllable break
> after the <samp>"_Consonant_,Halant,ZWNJ"</samp> is a byproduct of OpenType
> shaping and Unicode encoding, however, and might not have any
> significance with regard to the definition of syllables used in the
> language or orthography of the text.
>
> Note, also: The presence of the <samp>"ZWJ"</samp> means that a
> <samp>"_Consonant_,Halant,ZWJ"</samp> sequence may match the regular-expression
> test in stage 1 as the end of a syllable, even without being
> followed by a base consonant or syllable base. By definition,
> however, a <samp>"_Consonant_,Halant,ZWJ"</samp> syllable identified in stage 1
> cannot also include a <samp>"_Consonant_"</samp> after the <abbr title="Zero-Width Joiner">ZWJ</abbr>.

The font's <abbr title="Glyph Substitution table">GSUB</abbr> rules might be implemented so that `cjct`
substitutions apply to half-form consonants; therefore, this feature
must be applied after the `half` feature. 

:::{figure-md}
![cjct feature application](/images/gujarati/gujarati-cjct.svg "cjct feature application"){.shaping-demo .inline-svg .greyscale-svg #gujarati-cjct}

cjct feature application
:::

```{svg-color-toggle-button} gujarati-cjct
```


#### Stage 3, step 13: cfar ####

> This feature is not used in Gujarati.


### Stage 4: Final reordering ###

The final reordering stage repositions marks, dependent-vowel (matra)
signs, and <samp>"Reph"</samp> glyphs to the appropriate location with respect to
the base consonant or syllable base. Because multiple substitutions
may have occurred during the application of the basic-shaping features
in the preceding stage, these repositioning moves could not be
performed during the initial reordering stage.

Like the initial reordering stage, the steps involved in this stage
occur on a per-syllable basis.

<!--- Check that classifications have not been mangled. If the -->
<!--character is a Halant AND a ligature was formed AND a multiple
substitution was performed, restore the classification to VIRAMA
because it was almost certainly lost in the preceding <abbr title="Glyph Substitution table">GSUB</abbr> stage.
--->

#### Stage 4, step 1: Base consonant ####

The final reordering stage, like the initial reordering stage, begins
with determining the syllable base of each syllable, following the
same algorithm used in stage 2, step 1.

In a syllable that begins with an independent vowel, the independent
vowel will always serve as the syllable base. In a standalone sequence or
other syllable that begins with a placeholder or a dotted circle, the
placeholder or dotted circle will always serve as the syllable base.

In a syllable that begins with a consonant, the shaping engine must
repeat the base-consonant search algorithm used in stage 2, step 1.

The codepoint of the underlying base consonant or syllable base will
not change between the search performed in stage 2, step 1, and the
search repeated here. However, the application of <abbr title="Glyph Substitution table">GSUB</abbr> shaping
features in stage 3 means that several ligation and many-to-one
substitutions may have taken place. The final glyph produced by that
process may, therefore, be a conjunct or ligature form — in most
cases, such a glyph will not have an assigned Unicode codepoint.
   
#### Stage 4, step 2: Pre-base matras ####

Pre-base dependent vowels (matras) that were reordered during the
initial reordering stage must be moved to their final position. This
position is defined as:
   
   - after the last standalone <samp>"Halant"</samp> glyph that comes after the
     matra's starting position and also comes before the main
     consonant.
   - If a zero-width joiner follows this last standalone <samp>"Halant"</samp>, the
     final matra position is moved to after the joiner.

This means that the matra will move to the right of all explicit
<samp>"consonant,Halant"</samp> subsequences, but will stop to the left of the base
consonant or syllable base, all conjuncts or ligatures that contain
the base consonant or syllable base, and all half forms.

:::{figure-md}
![Pre-base matra positioning](/images/gujarati/gujarati-matra-position.svg "Pre-base matra positioning"){.shaping-demo .inline-svg .greyscale-svg #gujarati-matra-position}

Pre-base matra positioning
:::

```{svg-color-toggle-button} gujarati-matra-position
```


> Note: OpenType and Unicode both state that if the syllable includes
> a <abbr title="Zero-Width Joiner">ZWJ</abbr> immediately after the last <samp>"Halant"</samp>, then the final matra
> position should be after the <abbr title="Zero-Width Joiner">ZWJ</abbr>.
>
> However, there are several test sequences indicating that
> Microsoft's Uniscribe shaping engine did not follow this rule (in,
> at least, Devanagari and Bengali text), and in these circumstances
> Uniscribe instead makes the final matra position before the final
> <samp>"Consonant,Halant,ZWJ"</samp>.
>
> Subsequently, the HarfBuzz shaping engine has also followed the same
> pattern. If other shaping engine implementations prefer to maintain
> maximum compatibility with Uniscribe and HarfBuzz, then they should
> also follow suit.

> Note: The Microsoft script-development specifications for OpenType
> shaping also state that if a zero-width non-joiner follows the last
> standalone <samp>"Halant"</samp>, the final matra position is moved to after the
> non-joiner. However, it is unnecessary to test for this condition,
> because a <samp>"Halant,ZWNJ"</samp> subsequence is, by definition, the end of a
> syllable. Consequently, a <samp>"Halant,ZWNJ"</samp> cannot be followed by a
> pre-base dependent vowel.


#### Stage 4, step 3: Reph ####

<samp>"Reph"</samp> must be moved from the beginning of the syllable to its final
position. Because Gujarati incorporates the `REPH_POS_BEFORE_POST`
shaping characteristic, this final position is immediately before any
independent post-base consonant forms (meaning the first post-base
consonant that has not formed a ligature with the syllable base).

The algorithm for finding the final <samp>"Reph"</samp> position is

<!---

  - Find the first explicit <samp>"Halant"</samp> between the first post-Reph
    consonant and the last main consonant. Move the <samp>"Reph to the
    position immediately after this <samp>"Halant"</samp>.
	- If a zero-width joiner (<abbr>ZWJ</abbr>) or a zero-width non-joiner (<abbr>ZWNJ</abbr>)
      follows this <samp>"Halant"</samp>, move the <samp>"Reph"</samp> to the position
      immediately after the <abbr title="Zero-Width Joiner">ZWJ</abbr> or <abbr title="Zero-Width Non Joiner">ZWNJ</abbr>.
--->

  - Starting at the first post-<samp>"Reph"</samp> consonant, search forward looking
    for the first explicit <samp>"Halant"</samp>, ending the search when the base
    consonant is encountered. If such an explicit <samp>"Halant"</samp> is found,
    move the <samp>"Reph"</samp> to the position immediately after this
    <samp>"Halant"</samp>.
	  * If a zero-width joiner (<abbr>ZWJ</abbr>) or a zero-width non-joiner (<abbr>ZWNJ</abbr>)
        follows this <samp>"Halant"</samp>, move the <samp>"Reph"</samp> to the position
        immediately after the <abbr title="Zero-Width Joiner">ZWJ</abbr> or <abbr title="Zero-Width Non Joiner">ZWNJ</abbr>. This will be the final
        <samp>"Reph"</samp> position. 
	  * If no <abbr title="Zero-Width Joiner">ZWJ</abbr> or <abbr title="Zero-Width Non Joiner">ZWNJ</abbr> follows this <samp>"Halant"</samp>, leave the <samp>"Reph"</samp> in
        its position immediately after the <samp>"Halant"</samp>. This will be the
        final <samp>"Reph"</samp> position. 
  - If no such explicit <samp>"Halant"</samp> is found in the previous step, find
    the first post-base consonant that has not formed a ligature with
    the base consonant. If such a non-ligated post-base consonant is
    found, move the <samp>"Reph"</samp> to the position immediately before the
    non-ligated post-base consonant. This will be the final <samp>"Reph"</samp>
    position.
  - If no such non-ligated post-base consonant is found in the
    previous step, move the <samp>"Reph"</samp> to the position immediately before
    the first post-base matra, syllable modifier, or Vedic sign that
    has a positioning tag after the script's <samp>"Reph"</samp> position in the
    syllable sort order (as listed in [stage
    2](#stage-2-initial-reordering)). This will be the final <samp>"Reph"</samp>
    position. 
	> Note: Because Gujarati incorporates the
    > `REPH_POS_BEFORE_POST` shaping characteristic, this means
    > any positioning tag of `POS_POSTBASE_CONSONANT` or later,
    > although a post-base matra, syllable modifier, or Vedic sign
    > would not typically be tagged with `POS_POSTBASE_CONSONANT`.
  - If no other location has been located in the previous steps, move
    the <samp>"Reph"</samp> to the end of the syllable.


Finally, if the final position of <samp>"Reph"</samp> occurs after a
<samp>"_matra_,Halant"</samp> subsequence, then <samp>"Reph"</samp> must be repositioned to the
left of <samp>"Halant"</samp>, to allow for potential matching with `abvs` or
`psts` substitutions from <abbr title="Glyph Substitution table">GSUB</abbr>.


:::{figure-md}
![Reph positioning](/images/gujarati/gujarati-reph-position.svg "Reph positioning"){.shaping-demo .inline-svg .greyscale-svg #gujarati-reph-position}

Reph positioning
:::

```{svg-color-toggle-button} gujarati-reph-position
```


#### Stage 4, step 4: Pre-base-reordering consonants ####

Any pre-base-reordering consonants must be moved to immediately before
the base consonant or syllable base.
  
Gujarati does not use pre-base-reordering consonants, so this step will
involve no work when processing `<gjr2>` text. It is included here in order
to maintain compatibility with the other Indic scripts.
  
#### Stage 4, step 5: Initial matras ####

Any left-side dependent vowels (matras) that are at the start of a
word must be flagged for potential substitution by the `init` feature
of <abbr title="Glyph Substitution table">GSUB</abbr>.

Gujarati does not use the `init` feature, so this step will
involve no work when processing `<gjr2>` text. It is included here in
order to maintain compatibility with the other Indic scripts.

   
### Stage 5: Applying all remaining substitution features from <abbr>GSUB</abbr> ###

In this stage, the remaining substitution features from the <abbr title="Glyph Substitution table">GSUB</abbr> table
are applied. In preparation for this stage, glyph sequences should be
flagged for possible application of <abbr title="Glyph Substitution table">GSUB</abbr> features in stage 2,
step 10.

The order in which these features are applied is not canonical; they
should be applied in the order in which they appear in the <abbr title="Glyph Substitution table">GSUB</abbr> table
in the font.

	init (not used in Gujarati)
	pres
	abvs
	blws
	psts
	haln

The `init` feature is not used in Gujarati.

The `pres` feature replaces pre-base-consonant glyphs with special
presentations forms. This can include consonant conjuncts, half-form
consonants, and stylistic variants of left-side dependent vowels
(matras). 

:::{figure-md}
![pres feature application](/images/gujarati/gujarati-pres.svg "pres feature application"){.shaping-demo .inline-svg .greyscale-svg #gujarati-pres}

pres feature application
:::

```{svg-color-toggle-button} gujarati-pres
```


The `abvs` feature replaces above-base-consonant glyphs with special
presentation forms. This usually includes contextual variants of
above-base marks or contextually appropriate mark-and-base ligatures.

:::{figure-md}
![abvs feature application](/images/gujarati/gujarati-abvs.svg "abvs feature application"){.shaping-demo .inline-svg .greyscale-svg #gujarati-abvs}

abvs feature application
:::

```{svg-color-toggle-button} gujarati-abvs
```


The `blws` feature replaces below-base-consonant glyphs with special
presentation forms. This usually includes replacing base consonants or
syllable bases that
are adjacent to the below-base-consonant form <samp>"Rakaar"</samp> with contextual
ligatures.

:::{figure-md}
![blws feature application](/images/gujarati/gujarati-blws.svg "blws feature application"){.shaping-demo .inline-svg .greyscale-svg #gujarati-blws}

blws feature application
:::

```{svg-color-toggle-button} gujarati-blws
```


The `psts` feature replaces post-base-consonant glyphs with special
presentation forms. This usually includes replacing right-side
dependent vowels (matras) with stylistic variants or replacing
post-base-consonant/matra pairs with contextual ligatures. 

:::{figure-md}
![psts feature application](/images/gujarati/gujarati-psts.svg "psts feature application"){.shaping-demo .inline-svg .greyscale-svg #gujarati-psts}

psts feature application
:::

```{svg-color-toggle-button} gujarati-psts
```


The `haln` feature replaces syllable-final <samp>"_Consonant_,Halant"</samp> pairs with
special presentation forms. This can include stylistic variants of the
consonant where placing the <samp>"Halant"</samp> mark on its own is
typographically problematic. 

:::{figure-md}
![haln feature application](/images/gujarati/gujarati-haln.svg "haln feature application"){.shaping-demo .inline-svg .greyscale-svg #gujarati-haln}

haln feature application
:::

```{svg-color-toggle-button} gujarati-haln
```


> Note: The `calt` feature, which allows for generalized application
> of contextual alternate substitutions, is usually applied at this
> point. However, `calt` is not mandatory for correct Gujarati shaping
> and may be disabled in the application by user preference.

### Stage 6: Applying remaining positioning features from <abbr>GPOS</abbr> ###

In this stage, mark positioning, kerning, and other <abbr title="Glyph Positioning table">GPOS</abbr> features are
applied.

As with the preceding stage, the order in which these features are
applied is not canonical; they should be applied in the order in which
they appear in the <abbr title="Glyph Positioning table">GPOS</abbr> table in the font.

        dist
        abvm
        blwm

> Note: The `kern` feature is usually applied at this stage, if it is
> present in the font. However, `kern` (like `calt`, above) is not
> mandatory for shaping Gujarati text and may be disabled by user preference.

The `dist` feature adjusts the horizontal positioning of
glyphs. Unlike `kern`, adjustments made with `dist` do not require the
application or the user to enable any software _kerning_ features, if
such features are optional. 

:::{figure-md}
![Distance feature application](/images/gujarati/gujarati-dist.svg "Distance feature application"){.shaping-demo .inline-svg .greyscale-svg #gujarati-dist}

Distance feature application
:::

```{svg-color-toggle-button} gujarati-dist
```

The `abvm` feature positions above-base marks for attachment to base
characters. In Gujarati, this includes <samp>"Reph"</samp> in addition to
above-base dependent vowels (matras), diacritical marks, and Vedic signs. 

:::{figure-md}
![Above-base mark positioning](/images/gujarati/gujarati-abvm.svg "Above-base mark positioning"){.shaping-demo .inline-svg .greyscale-svg #gujarati-abvm}

Above-base mark positioning
:::

```{svg-color-toggle-button} gujarati-abvm
```


The `blwm` feature positions below-base marks for attachment to base
characters. In Gujarati, this includes below-base dependent vowels
(matras) and diacritical marks as well as the below-base consonant form <samp>"Rakaar"</samp>.

:::{figure-md}
![Below-base mark positioning](/images/gujarati/gujarati-blwm.svg "Below-base mark positioning"){.shaping-demo .inline-svg .greyscale-svg #gujarati-blwm}

Below-base mark positioning
:::

```{svg-color-toggle-button} gujarati-blwm
```


## The `<gujr>` shaping model ##

The older Gujarati script tag, `<gujr>`, has been deprecated. However,
shaping engines may still encounter fonts that were built to work with
`<gujr>` and some users may still have documents that were written to
take advantage of `<gujr>` shaping.

### Distinctions from `<gjr2>` ###

The most significant distinction between the shaping models is that the
sequence of <samp>"Halant"</samp> and consonant glyphs used to trigger shaping
features) was altered when migrating from `<gujr>` to
`<gjr2>`.

Specifically, shaping engines were expected to reorder post-base
<samp>"Halant,_Consonant_"</samp> sequences to <samp>"_Consonant_,Halant"</samp>.

As a result, a font's <abbr title="Glyph Substitution table">GSUB</abbr> substitutions would be written to match
<samp>"_Consonant_,Halant"</samp> sequences in all pre-base and post-base positions.


The `<gujr>` syllable

	Pre-baseC Halant BaseC Halant Post-baseC

would be reordered to

	Pre-baseC Halant BaseC Post-baseC Halant

before features are applied.

In `<gjr2>` text, as described above in this document, there is no
such reordering. The correct sequence to match for <abbr title="Glyph Substitution table">GSUB</abbr> substitutions is
<samp>"_Consonant_,Halant"</samp> for pre-base consonants, but <samp>"Halant,_Consonant_"</samp>
for post-base consonants.

The old Indic shaping model also did not recognize the
`BLWF_MODE_PRE_AND_POST` shaping characteristic. Instead, `<gujr>`
was treated as if it followed the `BLWF_MODE_POST_ONLY`
characteristic. In other words, below-base form substitutions were
only applied to consonants after the base consonant or syllable base.

In addition, for some scripts, left-side dependent vowel marks
(matras) were not repositioned during the final reordering
stage. For `<gujr>` text, the left-side matra was always positioned
at the beginning of the syllable.


### Advice for handling fonts with `<gujr>` features only ###

Shaping engines may choose to match post-base <samp>"_Consonant_,Halant"</samp>
sequences in order to apply <abbr title="Glyph Substitution table">GSUB</abbr> substitutions when it is known that
the font in use supports only the `<gujr>` shaping model.

### Advice for handling text runs composed in `<gujr>` format ###

Shaping engines may choose to match post-base <samp>"_Consonant_,Halant"</samp>
sequences for <abbr title="Glyph Substitution table">GSUB</abbr> substitutions or to reorder them to
<samp>"Halant,_Consonant_"</samp> when processing text runs that are tagged with
the `<gujr>` script tag and it is known that the font in use supports
only the `<gjr2>` shaping model.

Shaping engines may also choose to apply `blwf` substitutions to
below-base consonants occurring before the base consonant or syllable base when it is
known that the font in use supports an applicable substitution lookup.

Shaping engines may also choose to position left-side matras according
to the `<gujr>` ordering scheme; however, doing so might interfere
with matching <abbr title="Glyph Substitution table">GSUB</abbr> or <abbr title="Glyph Positioning table">GPOS</abbr> features.


================================================
FILE: opentype-shaping-gurmukhi.md
================================================
```{include} /_global.md
```

# Gurmukhi shaping in OpenType #

This document details the shaping procedure needed to display text
runs in the Gurmukhi script.


**Contents**

  - [General information](#general-information)
  - [Terminology](#terminology)
  - [Glyph classification](#glyph-classification)
      - [Shaping classes and subclasses](#shaping-classes-and-subclasses)
      - [Gurmukhi character tables](#gurmukhi-character-tables)
  - [The `<gur2>` shaping model](#the-gur2-shaping-model)
      - [Stage 1: Identifying syllables and other sequences](#stage-1-identifying-syllables-and-other-sequences)
      - [Stage 2: Initial reordering](#stage-2-initial-reordering)
      - [Stage 3: Applying the basic substitution features from <abbr>GSUB</abbr>](#stage-3-applying-the-basic-substitution-features-from-gsub)
      - [Stage 4: Final reordering](#stage-4-final-reordering)
      - [Stage 5: Applying all remaining substitution features from <abbr>GSUB</abbr>](#stage-5-applying-all-remaining-substitution-features-from-gsub)
      - [Stage 6: Applying remaining positioning features from <abbr>GPOS</abbr>](#stage-6-applying-remaining-positioning-features-from-gpos)
  - [The `<guru>` shaping model](#the-guru-shaping-model)
      - [Distinctions from `<gur2>`](#distinctions-from-gur2)
      - [Advice for handling fonts with `<guru>` features only](#advice-for-handling-fonts-with-guru-features-only)
      - [Advice for handling text runs composed in `<guru>` format](#advice-for-handling-text-runs-composed-in-guru-format)


## General information ##

The Gurmukhi script belongs to the Indic family, and follows
the same general patterns as the other Indic scripts. More
specifically, it belongs to the North Indic subgroup, in which
sequences of adjacent consonants are often represented as conjuncts.

The Gurmukhi script is used to write multiple languages, most commonly
Punjabi, Sant Bhasha, and Sindhi. In addition, Sanskrit may be written
in Gurmukhi, so Gurmukhi script runs may include glyphs from the Vedic
Extensions block of Unicode. 

There are two extant Gurmukhi script tags defined in OpenType, `<guru>`
and `<gur2>`. The older script tag, `<guru>`, was deprecated in 2005.
Therefore, new fonts should be engineered to work with the `<gur2>`
shaping model. However, if a font is encountered that supports only
`<guru>`, the shaping engine should deal with it gracefully.

## Terminology ##

OpenType shaping uses a standard set of terms for Indic scripts.  The
terms used colloquially in any particular language may vary, however,
potentially causing confusion.

**Matra** is the standard term for a dependent vowel sign.

The term "matra" is also used to refer to the headline above most
Gurmukhi letters. To avoid ambiguity, the term **headline** is
used in most Unicode and OpenType shaping documents.

**Halant** and **Virama** are both standard terms for the below-base "vowel-killer"
sign. Unicode documents use the term "virama" most frequently, while
OpenType documents use the term "halant" most frequently.

**Chandrabindu** (or simply **Bindu**) is the standard term for the diacritical mark
indicating that the preceding vowel should be nasalized. In the Punjabi
language, this mark is known as the _adak bindi_.

The term **base consonant** is also critical to Indic shaping. The
base consonant of a syllable is the consonant that carries the
syllable's vowel sound, either the inherent vowel (for an unmarked
base consonant) or a dependent vowel (with the addition of a matra).

A syllable's base consonant is generally rendered in its full form
(although it may form ligatures), while other consonants in the
syllable frequently take on secondary forms. Different <abbr title="Glyph Substitution table">GSUB</abbr>
substitutions may apply to a script's **pre-base** and **post-base**
consonants. Some of these substitutions create **above-base** or
**below-base** forms. The **Reph** form of the consonant "Ra" is an
example.

Syllables may also begin with an **independent vowel** instead of a
consonant. In these syllables, the independent vowel is rendered in
full-letter form, not as a matra, and the independent vowel serves as the
syllable base, similar to a base consonant.

Where possible, using the standard terminology is preferred, as the
use of a language-specific term necessitates choosing one language
over all of the others that share a common script.

## Glyph classification ##

Shaping Gurmukhi text depends on the shaping engine correctly
classifying each glyph in the run. As with most other scripts, the
classifications must distinguish between consonants, vowels
(independent and dependent), numerals, punctuation, and various types
of diacritical mark. 

For most codepoints, the `General Category` property defined in the Unicode
standard is correct, but it is not sufficient to fully capture the
expected shaping behavior (such as glyph reordering). Therefore,
Gurmukhi glyphs must additionally be classified by how they are treated
when shaping a run of text.

### Shaping classes and subclasses ###

The shaping classes listed in the tables that follow are defined so
that they capture the positioning rules used by Indic scripts. 

For most codepoints, the _Shaping class_ is synonymous with the `Indic
Syllabic Category` defined in Unicode. However, there are some
distinctions, where the defined category does not fully capture the
behavior of the character in the shaping process.

Several of the diacritic and syllable-modifying marks behave according
to their own rules and, thus, have a special class. These include
`BINDU`, `VISARGA`, `AVAGRAHA`, `NUKTA`, and `VIRAMA`. Some
less-common marks behave according to rules that are similar to these
common marks, and are therefore classified with the corresponding
common mark. The Vedic Extensions also include a `CANTILLATION`
class for tone marks.

Letters generally fall into the classes `CONSONANT`,
`VOWEL_INDEPENDENT`, and `VOWEL_DEPENDENT`. These classes help the
shaping engine parse and identify key positions in a syllable. For
example, Unicode categorizes dependent vowels as `Mark [Mn]`, but the
shaping engine must be able to distinguish between dependent vowels
and diacritical marks (which are categorized as `Mark [Mn]`).

Gurmukhi uses one subclass of consonant, `CONSONANT_MEDIAL`.

> Note: Unicode includes a second subclass of consonant,
> `CONSONANT_PLACEHOLDER`, for two special vowel-carrier letters,
> "Iri" (`U+0A72`) and "Ura" (`U+0A73`). For shaping purposes, however,
> both <samp>"Iri"</samp> and <samp>"Ura"</samp> are classified as `CONSONANT`.

The `CONSONANT_MEDIAL` subclass is used for <samp>"Yakash"</samp> (`U+0A75`), a
consonant used in Sikh religious texts that is believed to be derived
from the character <samp>"Ya"</samp> (`U+0A2F`). <samp>"Yakash"</samp> is positioned in a mark-like,
below-base form, but it must pass tests for consonants when
identifying syllables.

Gurmukhi differs from many other Indic scripts in that independent
vowels are represented by the standard dependent-vowel marks (matras)
attached to a special vowel-carrier character. However, because each
independent vowel has been assigned its own codepoint by Unicode, the
standard `VOWEL_INDEPENDENT` and `VOWEL_DEPENDENT` classifications
function normally.

The vowel carrier <samp>"Aira"</samp>, with no dependent-vowel mark, represents the
independent form of the inherent vowel, "A" (`U+0A05`).  In a sense,
this character serves a double function. 

The other two vowel carriers, <samp>"Iri"</samp> (`U+0A72`) and <samp>"Ura"</samp> (`U+0A73`)
do not normally occur on their own in Gurmukhi syllables, but they may
appear as standalone entities, much like marks and other symbols do
when they are referenced or displayed as examples. To support this use
case, the <samp>"Iri"</samp> and <samp>"Ura"</samp> characters have the status of consonants for
shaping purposes. 

<!--- Both subclasses should match tests for consonants, such as when [identifying
syllables](#1-identifying-syllables-and-other-sequences), but may
require special treatment in other circumstances. --->

Other characters, such as symbols and miscellaneous letters (for
example, letter-like symbols that only occur as standalone entities
and do not occur within syllables), need no special attention from the
shaping engine, so they are not assigned a shaping class.

Numbers are classified as `NUMBER`, even though they evoke no special
behavior from the Indic shaping rules, because there are OpenType features that
might affect how the respective glyphs are drawn, such as `tnum`,
which specifies the usage of tabular-width numerals, and `sups`, which
replaces the default glyphs with superscript variants.

Marks and dependent vowels are further labeled with a mark-placement
subclass, which indicates where the glyph will be placed with respect
to the base character to which it is attached. The actual position of
the glyphs is determined by the lookups found in the font's <abbr title="Glyph Positioning table">GPOS</abbr>
table, however, the shaping rules for Indic scripts require that the
shaping engine be able to identify marks by their general
position. 

For example, left-side dependent vowels (matras), classified
with `LEFT_POSITION`, must frequently be reordered, with the final
position determined by whether or not other letters in the syllable
have formed ligatures or combined into conjunct forms. Therefore, the
`LEFT_POSITION` subclass of the character must be tracked throughout
the shaping process.

There are four basic _mark-placement subclasses_ for dependent vowels
(matras). Each corresponds to the visual position of the matra with
respect to the syllable base to which it is attached:

  - `LEFT_POSITION` matras are positioned to the left of the syllable base.
  - `RIGHT_POSITION` matras are positioned to the right of the syllable base.
  - `TOP_POSITION` matras are positioned above the syllable base.
  - `BOTTOM_POSITION` matras are positioned below syllable base.
  
These positions may also be referred to elsewhere in shaping documents as:

  - _Pre-base_ matras
  - _Post-base_ matras
  - _Above-base_ matras
  - _Below-base_ matras
  
respectively. The `LEFT`, `RIGHT`, `TOP`, and `BOTTOM` designations
corresponds to Unicode's preferred terminology. The _Pre_, _Post_,
_Above_, and _Below_ terminology is used in the official descriptions
of OpenType <abbr title="Glyph Substitution table">GSUB</abbr> and <abbr title="Glyph Positioning table">GPOS</abbr> features. Shaping engines may, internally,
use whichever terminology is preferred.

In addition, dependent-vowel codepoints that are composed of multiple
components will be designated in character tables as having a compound
_mark-placement subclass_, such as `TOP_AND_RIGHT` or `LEFT_AND_RIGHT`. 

However, these multi-part matras are decomposed into separate matra
components during the shaping process. After the decomposition, each
matra component will belong to exactly one of the four basic
_mark-placement subclasses_.

For most mark and dependent-vowel codepoints, the _mark-placement
subclass_ is synonymous with the `Indic Positional Category` defined
in Unicode. However, there are some distinctions, where the defined
category does not fully capture the behavior of the character in the
shaping process. 

### Gurmukhi character tables ###

Separate character tables are provided for the Gurmukhi and Vedic
Extensions blocks as well as for other miscellaneous characters that
are used in `<gur2>` text runs:

  - [Gurmukhi character table](character-tables/character-tables-gurmukhi.md#gurmukhi-character-table)
  - [Vedic Extensions character table](character-tables/character-tables-gurmukhi.md#vedic-extensions-character-table)
  - [Miscellaneous character table](character-tables/character-tables-gurmukhi.md#miscellaneous-character-table)

The tables list each codepoint along with its Unicode general
category, its shaping class, and its mark-placement subclass. The
codepoint's Unicode name and an example glyph are also provided.

For example:

:::{table} example character table

| Codepoint | Unicode category | Shaping class     | Mark-placement subclass    | Glyph                        |
|:----------|:-----------------|:------------------|:---------------------------|:-----------------------------|
|`U+0A01`   | Mark [Mn]        | BINDU             | TOP_POSITION               | &#x0A01; Adak Bindi          |
| | | | |
|`U+0A15`   | Letter           | CONSONANT         | _null_                     | &#x0A15; Ka                  |
:::


Codepoints with no assigned meaning are
designated as _unassigned_ in the _Unicode category_ column. 

Assigned codepoints with a _null_ in the _Shaping class_
column evoke no special behavior from the shaping engine.

The _Mark-placement subclass_ column indicates mark-placement
positioning for codepoints in the _Mark_ category. Assigned, non-mark
codepoints have a _null_ in this column and evoke no special
mark-placement behavior. Marks tagged with [Mn] in the _Unicode
category_ column are categorized as non-spacing; marks tagged with
[Mc] are categorized as spacing-combining.

Some codepoints in the tables use a _Shaping class_ that
differs from the codepoint's Unicode _General Category_. The _Shaping
class_ takes precedence during OpenType shaping, as it captures more
specific, script-aware behavior.


#### Special-function codepoints ####

Other important characters that may be encountered when shaping runs
of Gurmukhi text include the dotted-circle placeholder (`U+25CC`), the
zero-width joiner (`U+200D`) and zero-width non-joiner (`U+200C`), and
the no-break space (`U+00A0`).

Each of these is of particular importance to shaping engines, because
these codepoints interact with the shaping engine, the text run, and
the active font, either to mediate non-default shaping behavior or to
relay information about the current shaping process.

The dotted-circle placeholder is frequently used when displaying a
dependent vowel (matra) or a combining mark in isolation. Real-world
text syllables may also use other characters, such as hyphens or dashes,
in a similar placeholder fashion; shaping engines should cope with
this situation gracefully.

Dotted-circle placeholder characters (like any Unicode codepoint) can
appear anywhere in text input sequences and should be rendered
normally. <abbr title="Glyph Positioning table">GPOS</abbr> positioning lookups should attach mark glyphs to dotted
circles as they would to other non-mark characters. As visible glyphs,
dotted circles can also be involved in <abbr title="Glyph Substitution table">GSUB</abbr> substitutions.

In addition to the default input-text handling process, shaping
engines may also insert dotted-circle placeholders into the text
sequence. Dotted-circle insertions are required when a non-spacing
mark or dependent sign is formed with no base character present.

This requirement covers:

  - Dependent signs that are assigned their own individual Unicode
    codepoints (such as most dependent-vowel marks or matras)
  
  - Dependent signs that are formed only by specific sequences of
    other codepoints (such as <samp>"Reph"</samp>)


The zero-width joiner (<abbr>ZWJ</abbr>) is primarily used to prevent the formation
of a conjunct from a <samp>"_Consonant_,Halant,_Consonant_"</samp> sequence.

  - The sequence <samp>"_Consonant_,Halant,ZWJ,_Consonant_"</samp> blocks the
    formation of a conjunct between the two consonants. 

Note, however, that the <samp>"_Consonant_,Halant"</samp> subsequence in the above
example may still trigger a half-forms feature. To prevent the
application of the half-forms feature in addition to preventing the
conjunct, the zero-width non-joiner (<abbr>ZWNJ</abbr>) must be used instead.

  - The sequence <samp>"_Consonant_,Halant,ZWNJ,_Consonant_"</samp> should produce
    the first consonant in its standard form, followed by an explicit
    <samp>"Halant"</samp>. 

A secondary usage of the zero-width joiner is to prevent the formation of
<samp>"Reph"</samp>.

  - An initial <samp>"Ra,Halant,ZWJ"</samp> sequence should not produce a <samp>"Reph"</samp>,
    even where an initial <samp>"Ra,Halant"</samp> sequence without the zero-width
    joiner would otherwise produce a <samp>"Reph"</samp>.

> Note: <samp>"Reph"</samp> substitutions are rare in Gurmukhi text. `<gur2>` fonts may
> not implement the <samp>"Reph"</samp> substitution in <abbr title="Glyph Substitution table">GSUB</abbr> at all. Nevertheless,
> shaping engines must test for it in order to provide the
> functionality if it is implemented.

The <abbr title="Zero-Width Joiner">ZWJ</abbr> and <abbr title="Zero-Width Non Joiner">ZWNJ</abbr> characters are, by definition, non-printing control
characters and have the _Default_Ignorable_ property in the Unicode
Character Database. In standard text-display scenarios, their function
is to signal a request from the user to the shaping engine for some
particular non-default behavior. As such, they are not rendered
visually.

> Note: Naturally, there are special circumstances where a user or
> document might need to request that a <abbr title="Zero-Width Joiner">ZWJ</abbr> or <abbr title="Zero-Width Non Joiner">ZWNJ</abbr> be rendered
> visually, such as when illustrating the OpenType shaping process, or
> displaying Unicode tables.

Because the <abbr title="Zero-Width Joiner">ZWJ</abbr> and <abbr title="Zero-Width Non Joiner">ZWNJ</abbr> are non-printing control characters, they can
be ignored by any portion of a software text-handling stack not
involved in the shaping operations that the <abbr title="Zero-Width Joiner">ZWJ</abbr> and <abbr title="Zero-Width Non Joiner">ZWNJ</abbr> are designed
to interface with. For example, spell-checking or collation functions
will typically ignore <abbr title="Zero-Width Joiner">ZWJ</abbr> and <abbr title="Zero-Width Non Joiner">ZWNJ</abbr>.

Similarly, the <abbr title="Zero-Width Joiner">ZWJ</abbr> and <abbr title="Zero-Width Non Joiner">ZWNJ</abbr> should be ignored by the shaping engine
when matching sequences of codepoints against the backtrack and
lookahead sequences of a font's <abbr title="Glyph Substitution table">GSUB</abbr> or <abbr title="Glyph Positioning table">GPOS</abbr> lookups.

For example:

  - A lookup that substitutes an alternate version of a
    dependent-vowel (matra) glyph when it is preceded by <samp>"Ka,Halant,Tta"</samp>
    should still be applied if the dependent-vowel codepoint is preceded
    by <samp>"Ka,Halant,ZWJ,Tta"</samp> in the text run.

The no-break space (<abbr>NBSP</abbr>) is primarily used to display those
codepoints that are defined as non-spacing (marks, dependent vowels
(matras), below-base consonant forms, and post-base consonant forms)
in an isolated context, as an alternative to displaying them
superimposed on the dotted-circle placeholder. These sequences will
match <samp>"NBSP,ZWJ,Halant,_Consonant_"</samp>, <samp>"NBSP,_mark_"</samp>, or <samp>"NBSP,_matra_"</samp>.

In addition to general punctuation, runs of Gurmukhi text often use the
danda (`U+0964`) and double danda (`U+0965`) punctuation marks from
the Devanagari block.


## The `<gur2>` shaping model ##

Processing a run of `<gur2>` text involves six top-level stages:

1. Identifying syllables and other sequences
2. Initial reordering
3. Applying the basic substitution features from <abbr>GSUB</abbr>
4. Final reordering
5. Applying all remaining substitution features from <abbr>GSUB</abbr>
6. Applying all remaining positioning features from <abbr>GPOS</abbr>


As with other Indic scripts, the initial reordering stage and the
final reordering stage each involve applying a set of several
script-specific rules. The basic substitution features must be applied
to the run in a specific order. The remaining substitution features in
stage five, however, do not have a mandatory order.

Indic scripts follow many of the same shaping patterns, but they
differ in a few critical characteristics that the shaping engine must
track. These include:

  - The position of the base consonant in a syllable.
  
  - The final position of <samp>"Reph"</samp>.
  
  - Whether <samp>"Reph"</samp> must be requested explicitly or if it is formed by
    a specific, implicit sequence.
	
  - Whether the below-base forms feature is applied only to consonants
    before the syllable base, only to consonants after the base
    consonant, or to both.
	
  - The ordering positions for dependent vowels
    (matras). Specifically, right-side, above-base, and below-base
    matras follow different rules in different scripts. 
	All Indic scripts position left-side matras in the same
    manner, in the ordering position `POS_PREBASE_MATRA`. 

With regard to these common variations, Gurmukhi's specific shaping
characteristics include:

  - `BASE_POS_LAST` = The base consonant of a syllable is the last
     consonant, not counting any special final-consonant forms.

  - `REPH_POS_BEFORE_SUBJOINED` = <samp>"Reph"</samp> is ordered before all subjoined (i.e.,
     below-base) consonant forms.

  - `REPH_MODE_IMPLICIT` = <samp>"Reph"</samp> is formed by an initial <samp>"Ra,Halant"</samp> sequence.

  - `BLWF_MODE_PRE_AND_POST` = The below-forms feature is applied both to
     pre-base consonants and to post-base consonants.

  - `MATRA_POS_TOP` = `POS_AFTER_POST` = above-base matras are
     ordered after all post-base consonant forms.

  - `MATRA_POS_RIGHT` = `POS_AFTER_POST` = Right-side matras are
     ordered after all post-base consonant forms.

  - `MATRA_POS_BOTTOM` = `POS_AFTER_POST` = Below-base matras are
     ordered after all post-base consonant forms.

These characteristics determine how the shaping engine must reorder
certain glyphs, how base consonants are determined, and how <samp>"Reph"</samp>
should be encoded within a run of text.

### Stage 1: Identifying syllables and other sequences ###

A syllable in Gurmukhi consists of a valid orthographic sequence
that may be followed by a "tail" of modifier signs. 

> Note: The Gurmukhi Unicode block enumerates six modifier signs,
> "Adak Bindi" (`U+0A01`), "Bindi" (`U+0A02`), "Visarga" 
> (`U+0A03`), "Udaat" (`U+0A51`), "Tippi" (`U+0A70`), and "Addak"
> (`U+0A71`). In addition, Sanskrit text written in Gurmukhi may
> include additional signs from Vedic Extensions block.

Each syllable contains exactly one vowel sound. Valid syllables may
begin with either a consonant or an independent vowel. 

If the syllable begins with a consonant, then the consonant that
provides the vowel sound is referred to as the "base" consonant. If
the syllable begins with an independent vowel, that independent vowel
is the syllable's only vowel sound and serves as the "base". 

> Note: A consonant that is not accompanied by a dependent vowel (matra) sign
> carries the script's inherent vowel sound. This vowel sound is changed
> by a dependent vowel (matra) sign following the consonant.

From the shaping engine's perspective, the main distinction between a
syllable with a base consonant and a syllable with an
independent-vowel base is that a syllable with an independent-vowel
base is less likely to include additional consonants in special forms
and less likely to include dependent vowel signs
(matras). Therefore, in the common case, vowel-based syllables may
involve less reordering, substitution feature applications, and other
processing than consonant-based syllables.

In some languages and orthographies, vowel-based syllables are
not permitted to include additional consonants or matras, and certain
<abbr title="Glyph Substitution table">GSUB</abbr> substitution features do not occur. However, there are often
known exceptions, and real-world text makes no such guarantees. 

> Note: Shaping engines may choose to treat independent-vowel bases 
> like base consonants for the sake of simplicity or code
> reuse.
>
> However, implementations that take this approach should note
> that removing the distinction between base consonants and
> independent-vowel bases entirely may have unintended
> consequences. Making guarantees about the correctness of the results
> or about language-specific tests is out of scope for this document.

Generally speaking, the base consonant is the final consonant of the
syllable and its vowel sound designates the end of the syllable. This
rule is synonymous with the `BASE_POS_LAST` characteristic mentioned
earlier. 

Valid consonant-based syllables may include one or more additional 
consonants that precede the base consonant or syllable base. Each of these
other, pre-base consonants will be followed by the <samp>"Halant"</samp> mark, which
indicates that they carry no vowel. They affect pronunciation by
combining with the base consonant or syllable base (e.g., "_str_", "_pl_") but they
do not add a vowel sound.

Gurmukhi also includes special consonants that can occur after the
base consonant or syllable base. These post-base consonants will also be separated from
the base consonant or syllable base by a <samp>"Halant"</samp> mark; the algorithm for correctly
identifying the base consonant includes a test to recognize these sequences
and not mis-identify the base consonant.

As with other Indic scripts, the consonant <samp>"Ra"</samp> receives special
treatment; in many circumstances it is replaced by one of two combining
mark-like forms. 

  - A <samp>"Ra,Halant"</samp> sequence at the beginning of a syllable may be replaced
    with an above-base mark called <samp>"Reph"</samp> (unless the <samp>"Ra"</samp> is the only
    consonant in the syllable). This rule is synonymous with the
    `REPH_MODE_IMPLICIT` characteristic mentioned earlier.

  - A <samp>"Ra,Halant"</samp> sequence before the base consonant or syllable base or a <samp>"Halant,Ra"</samp>
    sequence after the base consonant or syllable base may be replaced with a
    below-base mark.
  
> Note: <samp>"Reph"</samp> substitutions are rare in Gurmukhi text. `<gur2>` fonts may
> not implement the <samp>"Reph"</samp> substitution in <abbr title="Glyph Substitution table">GSUB</abbr> at all. Nevertheless,
> shaping engines must test for it in order to provide the
> functionality if it is implemented.

<samp>"Reph"</samp> characters must be reordered after the syllable-identification
stage is complete.

> Note: Generally speaking, OpenType fonts will implement support for
> any below-base, post-base, and pre-base-reordering consonant forms
> by including the necessary substitution rules in their `blwf`,
> `pstf`, and `pref` lookups in <abbr title="Glyph Substitution table">GSUB</abbr>.
>
> Consequently, whenever shaping engines need to determine whether or 
> not a given consonant can take on such a special form, the most
> appropriate test is to check if the consonant is included in the
> relevant <abbr title="Glyph Substitution table">GSUB</abbr> lookup. Other implementations are possible, such as
> maintaining static tables of consonants, but checking for <abbr title="Glyph Substitution table">GSUB</abbr>
> support ensures that the expected behavior is implemented in the
> active font, and is therefore the most reliable approach.


In addition to valid syllables, standalone sequences may occur, such
as when an isolated codepoint is shown in example text.

> Note: Foreign loanwords, when written in the Gurmukhi script, may
> not adhere to the syllable-formation rules described above. In
> particular, it is not uncommon to encounter foreign loanwords that
> contain a word-final suffix of consonants.
>
> Nevertheless, such word-final suffixes will be correctly matched by
> the regular expressions listed below. These loanwords are pronounced
> different, which raises issues for potential readers, but the
> character sequences do not affect the shaping process.


Syllables should be identified by examining the run and matching
glyphs, based on their categorization, using regular expressions. 

The following general-purpose Indic-shaping regular expressions can be
used to match Gurmukhi syllables. 

The regular expressions utilize the shaping classes from the tables
above. For the purpose of syllable identification, more general
classes can be used, as defined in the following table. This
simplifies the resulting expressions. 

```markdown
_ra_		= The consonant "Ra" 
_consonant_	= ( `CONSONANT` | `CONSONANT_DEAD` ) - _ra_
_vowel_		= `VOWEL_INDEPENDENT`
_nukta_	  	= `NUKTA`
_halant_	= `VIRAMA`
_zwj_		= `JOINER`
_zwnj_		= `NON_JOINER`
_matra_		= `VOWEL_DEPENDENT` | `PURE_KILLER`
_syllablemodifier_	= `SYLLABLE_MODIFIER` | `BINDU` | `VISARGA` | `GEMINATION_MARK`
_vedicsign_	= `CANTILLATION`
_placeholder_	= `PLACEHOLDER` | `CONSONANT_PLACEHOLDER` | `NUMBER`
_dottedcircle_	= `DOTTED_CIRCLE`
_repha_		= `CONSONANT_PRE_REPHA`
_consonantmedial_	= `CONSONANT_MEDIAL`
_symbol_	= `SYMBOL` | `AVAGRAHA`
_consonantwithstacker_	= `CONSONANT_WITH_STACKER`
_other_		= `OTHER` | `MODIFYING_LETTER`
```


> Note: the _ra_ identification class is mutually exclusive with 
> the _consonant_ class. The union of the _consonant_ and _ra_ classes
> is used in the regular expression elements below in order to
> correctly identify <samp>"Ra"</samp> characters that do not trigger <samp>"Reph"</samp> or
> <samp>"Rakaar"</samp> shaping behavior.
>
> Note, also, that the cantillation mark "combining Ra" in the
> Devanagari Extended block does _not_ belong to the _ra_
> identification class, and that the other "combining consonant"
> cantillation marks in the Devanagari Extended block do not belong to
> the _consonant_ identification class.

> Note: The _placeholder_ identification class includes codepoints
> that are often used in place of vowels or consonants when a document
> needs to display a matra, mark, or special form in isolation or
> in another context beyond a standard syllable. Examples of
> _placeholder_ codepoints include hyphens and non-breaking
> spaces. Sequences that utilize this approach should be identified as
> "standalone" syllables.
>
> The _placeholder_ identification class also includes numerals, which
> are commonly used as word substitutes within normal text. Examples
> include ordinals (e.g., "4th").

> Note: The _other_ identification class includes codepoints that
> do not interact with adjacent characters for shaping purposes. Even
> though some of these codepoints (such as `MODIFYING_LETTER`) can
> occur within words, they evoke no behavior from the shaping
> engine and do not factor into the regular expressions that
> follow. Therefore, the shaping engine may choose to ignore them
> during syllable identification; they are listed here for completeness.

These identification classes form the bases of the following regular
expression elements:

```markdown
C	= _consonant_ | _ra_
Z	= _zwj_ | _zwnj_
REPH	= (_ra_ _halant_) | _repha_
CN		= C _zwj_? _nukta_?
FORCED_RAKAR	= _zwj_ _halant_ _zwj_ _ra_
S	= _symbol_ _nukta_?
MATRA_GROUP	= Z{0,3} _matra_ _nukta_? (_halant_ | FORCED_RAKAR)?
SYLLABLE_TAIL	= (Z? _syllablemodifier_ _syllablemodifier_? _zwnj_?)? _vedicsign_{0,3}
HALANT_GROUP	= Z? _halant_ (_zwj_ _nukta_?)?
FINAL_HALANT_GROUP	= HALANT_GROUP | (_halant_ _zwnj_)
MEDIAL_GROUP	= _consonantmedial_?
HALANT_OR_MATRA_GROUP	= FINAL_HALANT_GROUP | MATRA_GROUP*)
```

> Note: Practically speaking, shaping engines are highly unlikely to
> encounter more than 4 sequential `(MATRA_GROUP)`
> instances in any real-word syllables. Thus, implementations may
> choose to limit occurrences by limiting the above expressions to a
> finite length, such as `(MATRA_GROUP){0,4}` .


Using the above elements, the following regular expressions define the
possible syllable types:

A consonant-based syllable will match the expression:
```markdown
(_repha_|_consonantwithstacker_)? (CN HALANT_GROUP)* CN MEDIAL_GROUP HALANT_OR_MATRA_GROUP SYLLABLE_TAIL
```

> Note: Practically speaking, shaping engines are highly unlikely to
> encounter more than 4 sequential `(CN HALANT_GROUP)`
> instances in any real-word syllables. Thus, implementations may
> choose to limit occurrences by limiting the above expressions to a
> finite length, such as `(CN HALANT_GROUP){0,4}` .

A vowel-based syllable will match the expression:
```markdown
REPH? _vowel_ _nukta_? (_zwj_ | (HALANT_GROUP CN)* MEDIAL_GROUP HALANT_OR_MATRA_GROUP SYLLABLE_TAIL)
```

> Note: Practically speaking, shaping engines are highly unlikely to
> encounter more than 4 sequential `(HALANT_GROUP CN)`
> instances in any real-word syllables. Thus, implementations may
> choose to limit occurrences by limiting the above expressions to a
> finite length, such as `(HALANT_GROUP CN){0,4}` .

A standalone syllable will match the expression:
```markdown
((_repha_|_consonantwithstacker_)? _placeholder_ | REPH? _dottedcircle_) _nukta_? (HALANT_GROUP CN)* MEDIAL_GROUP HALANT_OR_MATRA_GROUP SYLLABLE_TAIL
```

> Note: Practically speaking, shaping engines are highly unlikely to
> encounter more than 4 sequential `(HALANT_GROUP CN)`
> instances in any real-word syllables. Thus, implementations may
> choose to limit occurrences by limiting the above expressions to a
> finite length, such as `(HALANT_GROUP CN){0,4}` .

> Note: Although they are labeled as "standalone syllables" here,
> many sequences that match the standalone regular expression above
> are instances where a document needs to display a matra, combining
> mark, or special form in isolation. Such sequences might not have
> any significance with regard to the definition of syllables used in
> the language or orthography of the text.

A symbol-based syllable will match the expression:
```markdown
S SYLLABLE_TAIL
```

A broken syllable will match the expression:
```markdown
REPH? _nukta_? (HALANT_GROUP CN)* MEDIAL_GROUP HALANT_OR_MATRA_GROUP SYLLABLE_TAIL
```

> Note: Practically speaking, shaping engines are highly unlikely to
> encounter more than 4 sequential `(HALANT_GROUP CN)`
> instances in any real-word syllables. Thus, implementations may
> choose to limit occurrences by limiting the above expressions to a
> finite length, such as `(HALANT_GROUP CN){0,4}` .


The primary problem involved in shaping broken syllables is the lack
of a syllable base (either a base consonant or an independent
vowel). Without a syllable base, the shaping engine cannot perform
<abbr title="Glyph Positioning table">GPOS</abbr> positioning and other contextual operations that are required
later in the shaping process.

To make up for this limitation, shaping engines should insert a
dotted-circle placeholder (`U+25CC`) character into the text stream
where the missing syllable base was expected to occur. This
placeholder allows the shaping process to proceed on a best-effort
basis at handling the broken-syllable sequence, but making guarantees
about the orthographic correctness or preferred appearance of the
final result is out of scope for this document.

Shaping engines can perform this dotted-circle insertion at any point
after the broken syllable has been recognized and before <abbr title="Glyph Substitution table">GSUB</abbr> features
are applied. However, the best results will likely be attained by
performing the insertion immediately, before proceeding to
stage 2. This will enable the maximum number of <abbr title="Glyph Substitution table">GSUB</abbr> and <abbr title="Glyph Positioning table">GPOS</abbr> features
in the active font to be correctly applied to the text run by ensuring
that all reordering, tagging, and sorting algorithms are executed as
usual.

> Note: In software stacks where other text-handling operations, such
> as Unicode normalization and localization, are performed before the
> text run is passed to the shaping engine, there is a potential for
> the dotted-circle insertion to cause unexpected effects.
>
> For example, if a `ccmp` or `locl` feature substitutes the default
> dotted-circle placeholder glyph with a variant glyph of a different
> size or weight for the (`U+25CC`) codepoint, then any shaping engine
> which relies on another software component to handle that
> functionality must take additional care to ensure consistency.


The expressions above use state-machine syntax from the Ragel
state-machine compiler. The operators represent:

```markdown
a* = zero or more copies of a
b+ = one or more copies of b
c? = optional instance of c
d{n} = exactly n copies of d
d{,n} = zero to n copies of d
d{n,} = n or more copies of d
d{n,m} = n to m copies of d
!e = not e
^f = character-level not f
g.h = concatenation of g and h
i|j = i or j
( ) = grouping of expression elements
```


After the syllables have been identified, each of the subsequent 
shaping stages occurs on a per-syllable basis.

### Stage 2: Initial reordering ###

The initial reordering stage is used to relocate glyphs from the
phonetic order in which they occur in a run of text to the
orthographic order in which they are presented visually.

> Note: Primarily, this means moving dependent-vowel (matra) glyphs, 
> <samp>"Ra,Halant"</samp> glyph sequences, and other consonants that take special
> treatment in some circumstances. This includes the below-base forms
> of <samp>"Ra"</samp>, <samp>"Ha"</samp>, and <samp>"Va"</samp>.
>
> These reordering moves are mandatory. The final-reordering stage
> may make additional moves, depending on the text and on the features
> implemented in the active font.

The syllable should be processed by tagging each glyph with its
intended position based on its ordering category. After all glyphs
have been tagged, the entire syllable should be sorted in stable order,
so that glyphs of the same ordering category remain in the same
relative position with respect to each other.

The final sort order of the ordering categories should be:


	POS_RA_TO_BECOME_REPH
	POS_PREBASE_MATRA
	POS_PREBASE_CONSONANT

	POS_SYLLABLE_BASE
	POS_AFTER_MAIN

	POS_ABOVEBASE_CONSONANT

	POS_BEFORE_SUBJOINED
	POS_BELOWBASE_CONSONANT
	POS_AFTER_SUBJOINED

	POS_BEFORE_POST
	POS_POSTBASE_CONSONANT
	POS_AFTER_POST

	POS_FINAL_CONSONANT
	POS_SMVD


This sort order enumerates all of the possible final positions to
which a codepoint might be reordered, across all of the Indic
scripts. It includes some ordering categories not utilized in
Gurmukhi. 

The basic positions (left to right) are <samp>"Reph"</samp>
(`POS_RA_TO_BECOME_REPH`), dependent vowels (matras) and consonants
positioned before the base consonant or syllable base
(`POS_PREBASE_MATRA` and `POS_PREBASE_CONSONANT`), the base consonant
or syllable base (`POS_SYLLABLE_BASE`), above-base consonants
(`POS_ABOVEBASE_CONSONANT`), below-base consonants
(`POS_BELOWBASE_CONSONANT`), consonants positioned after the base
consonant or syllable base (`POS_POSTBASE_CONSONANT`), syllable-final
consonants (`POS_FINAL_CONSONANT`), and syllable-modifying or Vedic
signs (`POS_SMVD`).

In addition, several secondary positions are defined to handle various
reordering rules that deal with relative, rather than absolute,
positioning. `POS_AFTER_MAIN` means that a character must be
positioned immediately after the syllable base. `POS_BEFORE_SUBJOINED`
and `POS_AFTER_SUBJOINED` mean that a character must be positioned
before or after any below-base consonants, respectively. Similarly,
`POS_BEFORE_POST` and `POS_AFTER_POST` mean that a character must be
positioned before or after any post-base consonants, respectively. 

For shaping-engine implementers, the names used for the ordering
categories matter only in that they are unambiguous. 

For a definition of the "base" consonant, refer to stage 2, step 1, which
follows.


#### Stage 2, step 1: Base consonant ####

The first step is to determine the base consonant of the syllable, if
there is one, and tag it as `POS_SYLLABLE_BASE`.

In a syllable that begins with an independent vowel, the independent
vowel will always serve as the syllable base, and it should be tagged
as `POS_SYLLABLE_BASE`. The shaping engine can then proceed to step 2.

In a standalone sequence or other syllable that begins with a placeholder
or dotted circle, the placeholder or dotted circle will always serve
as the syllable base, and it should be tagged as
`POS_SYLLABLE_BASE`. The shaping engine can then proceed to step 2.

In a syllable that begins with a consonant, the shaping engine must
determine the base consonant by a script-specific algorithm.

> Note: Shaping engines may choose to treat independent-vowel bases 
> like base consonants for the sake of simplicity or code
> reuse.
>
> However, implementations that take this approach should note
> that removing the distinction between base consonants and
> independent-vowel bases entirely may have unintended
> consequences. Making guarantees about the correctness of the results
> or about language-specific tests is out of scope for this document.

The base consonant is defined as the consonant in a consonant-based
syllable that carries the syllable's vowel sound. That vowel sound
will either be provided by the script's inherent vowel (in which case
it is not written with a separate character) or the sound will be designated
by the addition of a dependent-vowel (matra) sign.


<!--- > Because vowel-based syllables will not include consonants and
> because independent vowels do not take on special forms or require
> reordering, many of the steps that follow will involve no
> work for a vowel-based syllable. However, vowel-based syllables must
> still be sorted and their marks handled correctly, and <abbr title="Glyph Substitution table">GSUB</abbr> and <abbr title="Glyph Positioning table">GPOS</abbr>
> lookups must be applied. These steps of the shaping process follow
> the same rules that are employed for consonant-based syllables.
--->

While performing the base-consonant search, shaping engines may
also encounter special-form consonants, including below-base
consonants and post-base consonants. Each of these special-form
consonants must also be tagged (`POS_BELOWBASE_CONSONANT`,
`POS_POSTBASE_CONSONANT`, respectively). 

Any pre-base-reordering consonant (such as a pre-base-reordering <samp>"Ra"</samp>)
encountered during the base-consonant search must be tagged
`POS_POSTBASE_CONSONANT`. 
 
> Note: Shaping engines may choose any method to identify consonants that
> have below-base, post-base, or pre-base-reordering forms while
> executing the above algorithm. For example, one implementation may
> choose to maintain a static table of special-form consonants to
> compare against the text run. Another implementation might examine
> the active font to see if it includes a `blwf`, `pstf`, or `pref`
> lookup in the <abbr title="Glyph Substitution table">GSUB</abbr> table that affects the consonants encountered in
> the syllable.
>
> However, checking for <abbr title="Glyph Substitution table">GSUB</abbr> support ensures that the expected
> behavior is implemented in the active font, and is therefore the
> most reliable approach.


The algorithm for determining the base consonant is

  - If the syllable starts with <samp>"Ra,Halant"</samp> and the syllable contains
    more than one consonant, exclude the starting <samp>"Ra"</samp> from the list of
    consonants to be considered. 
  - Starting from the end of the syllable, move backwards until a consonant is found.
      * If the consonant is the first consonant, stop.
      * If the consonant is preceded by the sequence <samp>"Halant,ZWJ"</samp>, stop.
      * If the consonant has a below-base form, tag it as
        `POS_BELOWBASE_CONSONANT`, then move to the previous consonant. 
      * If the consonant has a post-base form, tag it as
        `POS_POSTBASE_CONSONANT`, then move to the previous consonant. 
      * If the consonant is a pre-base-reordering <samp>"Ra"</samp>, tag it as
        `POS_POSTBASE_CONSONANT`, then move to the previous consonant. 
      * If none of the above conditions is true, stop.
  - The consonant stopped at will be the base consonant.

> Note: The algorithm is designed to work for all Indic
> scripts. However, Gurmukhi does not utilize pre-base-reordering <samp>"Ra"</samp>.

Gurmukhi includes one post-base form:

  - <samp>"Halant,Ya"</samp> takes on a post-base form.
  
:::{figure-md}
![Post-base consonants](/images/gurmukhi/gurmukhi-pstf.svg "Post-base consonants"){.shaping-demo .inline-svg .greyscale-svg #gurmukhi-pstf}

Post-base consonants
:::

```{svg-color-toggle-button} gurmukhi-pstf
```


Gurmukhi includes three below-base consonant forms:

  - <samp>"Halant,Ra"</samp> (after the base consonant or syllable base) and <samp>"Ra,Halant"</samp> (in a
    non-syllable-initial position) take on a below-base form.
  - <samp>"Halant,Ha"</samp> (after the base consonant or syllable base) and <samp>"Ha,Halant"</samp> (in a
    non-syllable-initial position) take on a below-base form. 
  - <samp>"Halant,Va"</samp> (after the base consonant or syllable base) and <samp>"Va,Halant"</samp> (in a
    non-syllable-initial position) take on a below-base form. 

Gurmukhi also includes the CONSONANT_MEDIAL subclass, used only for <samp>"Yakash"</samp>
(U+0A75), which is rendered as a below-base form. <samp>"Yakash"</samp> should
be tagged as `POS_BELOWBASE_CONSONANT`.

> Note: Because Gurmukhi employs the `BLWF_MODE_PRE_AND_POST` shaping
> characteristic, consonants with below-base special forms may occur
> before or after the syllable base. 
> 
> During the base-consonant search, only the <samp>"Halant,_consonant_"</samp> 
> pattern following the syllable base for these below-base forms will
> be encountered. Stage 2, step 5 below ensures that the <samp>"_consonant_,Halant"</samp>
> pattern preceding the syllable base for these below-base forms will
> also be tagged correctly.


#### Stage 2, step 2: Matra decomposition ####

Second, any multi-part dependent vowels (matras) must be decomposed
into their left-side and right-side components. Gurmukhi has no
two-part dependent vowels, so this step will involve no work when
processing `<gur2>` text. It is included here in order to maintain
compatibility with the other Indic scripts.

Because this decomposition is a character-level operation, the shaping
engine may choose to perform it earlier, such as during an initial
Unicode-normalization stage. However, all such decompositions must be
completed before the shaping engine begins step three, below.

#### Stage 2, step 3: Tag matras ####

Third, all left-side dependent-vowel (matra) signs, including those that
resulted from the preceding decomposition step, must be tagged to be
moved to the beginning of the syllable, with `POS_PREBASE_MATRA`.

All above-base dependent-vowel (matra) signs are tagged
`POS_AFTER_POST`.

All right-side dependent-vowel (matra) signs are tagged
`POS_AFTER_POST`.

All below-base dependent-vowel (matra) signs are tagged
`POS_AFTER_POST`.

For simplicity, shaping engines may choose to tag single-part matras
in an earlier text-processing step, using the information in the
_Mark-placement subclass_ column of the character tables. It is
critical at this step, however, that all decomposed matras are also
correctly tagged before proceeding to the next step.

#### Stage 2, step 4: Adjacent marks ####

Fourth, any subsequences of marks that include a <samp>"Nukta"</samp> and a
<samp>"Halant"</samp> or Vedic sign must be reordered so that the <samp>"Nukta"</samp> appears
first.

This means that the subsequence <samp>"Halant,Nukta"</samp> is reordered to
<samp>"Nukta,Halant"</samp> and that the subsequence <samp>"_Vedic_sign_,Nukta"</samp> is
reordered to <samp>"Nukta,_Vedic_sign"</samp>.

For subsequences of affected marks that are longer than two, the
reordering operation must be repeated until the <samp>"Nukta"</samp> is the first
character in the subsequence. No other marks in the subsequence
should be reordered.

This order is canonical in Unicode and is required so that
<samp>"_consonant_,Nukta"</samp> substitution rules from <abbr title="Glyph Substitution table">GSUB</abbr> will be correctly
matched later in the shaping process.

#### Stage 2, step 5: Pre-base consonants ####

Fifth, consonants that occur before the syllable base must be tagged
with `POS_PREBASE_CONSONANT`. Excluding initial <samp>"Ra,Halant"</samp> sequences
that will become <samp>"Reph"</samp>s: 

  - If the consonant has a below-base form, tag it as
          `POS_BELOWBASE_CONSONANT`. 
  - Otherwise, tag it as `POS_PREBASE_CONSONANT`.
  
> Note: Shaping engines may choose any method to identify consonants that
> have below-base, post-base, or pre-base-reordering forms while
> executing the above algorithm. For example, one implementation may
> choose to maintain a static table of special-form consonants to
> compare against the text run. Another implementation might examine
> the active font to see if it includes a `blwf`, `pstf`, or `pref`
> lookup in the <abbr title="Glyph Substitution table">GSUB</abbr> table that affects the consonants encountered in
> the syllable.
>
> However, checking for <abbr title="Glyph Substitution table">GSUB</abbr> support ensures that the expected
> behavior is implemented in the active font, and is therefore the
> most reliable approach.

Gurmukhi includes three below-base consonant forms:

  - <samp>"Halant,Ra"</samp> (after the base consonant or syllable base) and <samp>"Ra,Halant"</samp> (in a
    non-syllable-initial position) take on a below-base form.
  - <samp>"Halant,Ha"</samp> (after the base consonant or syllable base) and <samp>"Ha,Halant"</samp> (in a
    non-syllable-initial position) take on a below-base form. 
  - <samp>"Halant,Va"</samp> (after the base consonant or syllable base) and <samp>"Va,Halant"</samp> (in a
    non-syllable-initial position) take on a below-base form. 

> Note: Because Gurmukhi employs the `BLWF_MODE_PRE_AND_POST` shaping
> characteristic, consonants with below-base special forms may occur
> before or after the syllable base. 
> 
> During the base-consonant search in 2.1, any instances of the
> <samp>"Halant,_consonant_"</samp>  pattern following the syllable base for these
> below-base forms will be encountered. The tagging in this step
> ensures that the <samp>"_consonant_,Halant"</samp> pattern preceding the syllable
> base for these below-base forms will also be tagged correctly.


#### Stage 2, step 6: Reph ####

Sixth, initial <samp>"Ra,Halant"</samp> sequences that will become <samp>"Reph"</samp>s must be tagged with
`POS_RA_TO_BECOME_REPH`.

> Note: an initial <samp>"Ra,Halant"</samp> sequence will always become a <samp>"Reph"</samp>
> unless the <samp>"Ra"</samp> is the only consonant in the syllable.

> Note: <samp>"Reph"</samp> substitutions are rare in Gurmukhi text. `<gur2>` fonts may
> not implement the <samp>"Reph"</samp> substitution in <abbr title="Glyph Substitution table">GSUB</abbr> at all. Nevertheless,
> shaping engines must test for it in order to provide the
> functionality if it is implemented.

#### Stage 2, step 7: Final consonants ####

Seventh, all final consonants must be tagged. Consonants that occur
after the syllable base _and_ after a dependent vowel (matra) sign
must be tagged with  `POS_FINAL_CONSONANT`.

> Note: Final consonants occur only in Sinhala and should not be
> expected in `<gur2>` text runs. This step is included here to
> maintain compatibility across Indic scripts.


#### Stage 2, step 8: Mark tagging ####

Eighth, all marks must be tagged. 

> Note: In this step, joiner and non-joiner characters must also be
> tagged according to the same rules given for marks, even though
> these characters are not categorized as marks in Unicode.

Marks in the `BINDU`, `VISARGA`, `AVAGRAHA`, `CANTILLATION`,
`SYLLABLE_MODIFIER`, `GEMINATION_MARK`, and `SYMBOL` categories should
be tagged with `POS_SMVD`. 

All <samp>"Nukta"</samp>s must be tagged with the same positioning tag as the
preceding consonant, independent vowel, placeholder, or dotted circle.

All remaining marks (not in the `POS_SMVD` category and not <samp>"Nukta"</samp>s)
must be tagged with the same positioning tag as the closest non-mark
character the mark has affinity with, so that they move together 
during the sorting step.

There are two possible cases: those marks before the syllable base
and those marks after the syllable base. In addition, an exception is
made for <samp>"Halant"</samp> marks that follow a left-side (pre-base) matra.

  1. Initially, all remaining marks should be tagged with the same
	 positioning tag as the closest preceding consonant.

  2. For each consonant after the syllable base (such as post-base
	 consonants, below-base consonants, or final consonants), all
	 remaining marks located between that current consonant and any
	 previous consonant should be tagged with the same positioning tag as
	 the current (later) consonant.
  
     In other words, all consonants preceding the syllable base "own" the
	 marks that follow them, while all consonants after the syllable base
	 "own" the marks that come before them. When a syllable does not have
	 any consonants after the syllable base, the syllable base should
	 "own" all the marks that follow it.
  
  3. Finally, <samp>"Halant"</samp> marks that follow a left-side dependent vowel
     (matra) should _not_ be tagged with the left-side matra's
     positioning tag. Instead, the <samp>"Halant"</samp> should be tagged with the
     positioning tag of the non-mark character preceding the left-side
     matra. This prevents the <samp>"Halant"</samp> mark from being moved with the
     left-side matra when the syllable is sorted.


#### Stage 2, step 9: Sort syllable ####

With these steps completed, the syllable can be sorted into the final
sort order as listed at the beginning of stage 2.

The glyphs in the syllable should be sorted in stable order,
so that glyphs of the same ordering category remain in the same
relative position with respect to each other.


#### Stage 2, step 10: Flag sequences for possible feature applications ####

With the initial reordering complete, those glyphs in the syllable that
may have <abbr title="Glyph Substitution table">GSUB</abbr> or <abbr title="Glyph Positioning table">GPOS</abbr> features applied in stages 3, 5, and 6 should be
flagged for each potential feature. 

This flagging is preliminary; the set of potential features varies
between different scripts and which features are supported varies
between fonts. It is also possible that the application of
one feature on a glyph sequence will perform a substitution that makes
a later feature no longer applicable to the updated sequence.

Consequently, the flagging must be completed before shaping proceeds
to the stages during which features are applied.

Some shaping features, such as `locl`, can potentially apply to any
glyphs. Therefore it is not necessary to maintain a separate flag for
these features in the bitmask (or other data structure) used to track
the flags -- although shaping engines may do so if desired.

The sequences to flag are summarized in the list below; a full
description of each feature's function and interpretation is provided
in <abbr title="Glyph Substitution table">GSUB</abbr> and <abbr title="Glyph Positioning table">GPOS</abbr> application stages that follow.

  - `nukt` should match <samp>"_Consonant_,Nukta"</samp> sequences
  - `akhn` should match <samp>"Ka,Halant,Ssa"</samp> and <samp>"Ja,Halant,Nya"</samp>
  - `rphf` should match initial <samp>"Ra,Halant"</samp> sequences but _not_ match
            initial <samp>"Ra,Halant,ZWJ"</samp> sequences
  - `blwf` should match <samp>"Halant,Ra"</samp>, <samp>"Halant,Ha"</samp>, and <samp>"Halant,Va"</samp> in
            post-base positions and <samp>"Ra,Halant"</samp>, <samp>"Ha,Halant"</samp>, and
            <samp>"Va,Halant"</samp> in non-initial pre-base positions
  - `half` should match <samp>"_Consonant_,Halant"</samp> in pre-base position but
           _not_ match <samp>"Ra,Halant"</samp> sequences flagged for `rphf` and
           _not_ match <samp>"_Consonant_,Halant,ZWNJ,_Consonant_"</samp> sequences
  - `pstf` should match initial <samp>"Halant,Ya"</samp> in post-base position
  - `vatu` should match <samp>"_Consonant_,Halant,Ra"</samp>,
           <samp>"_Consonant_,Halant,Ha"</samp>, and <samp>"_Consonant_,Halant,Va"</samp>
  - `cjct` should match <samp>"_Consonant_,Halant,_Consonant_"</samp> but _not_
            match <samp>"_Consonant_,Halant,ZWJ,_Consonant_"</samp> or
            <samp>"_Consonant_,Halant,ZWNJ,_Consonant_"</samp>


### Stage 3: Applying the basic substitution features from <abbr>GSUB</abbr> ###

The basic-substitution stage applies mandatory substitution features
using the rules in the font's <abbr title="Glyph Substitution table">GSUB</abbr> table. In preparation for this
stage, glyph sequences should be flagged for possible application 
of <abbr title="Glyph Substitution table">GSUB</abbr> features in stage 2, step 10.

The order in which these substitutions must be performed is fixed for
all Indic scripts:

	locl
	nukt
	akhn
	rphf 
	rkrf (not used in Gurmukhi)
	pref (not used in Gurmukhi)
	blwf 
	abvf (not used in Gurmukhi)
	half
	pstf
	vatu
	cjct
	cfar (not used in Gurmukhi)

#### Stage 3, step 1: locl ####

The `locl` feature replaces default glyphs with any language-specific
variants, based on examining the language setting of the text run.

> Note: Strictly speaking, the use of localized-form substitutions is
> not part of the shaping process, but of the localization process,
> and could take place at an earlier point while handling the text
> run. However, shaping engines are expected to complete the
> application of the `locl` feature before applying the subsequent
> <abbr title="Glyph Substitution table">GSUB</abbr> substitutions in the following steps.

#### Stage 3, step 2: nukt ####

The `nukt` feature replaces <samp>"_Consonant_,Nukta"</samp> sequences with a
precomposed nukta-variant of the consonant glyph. 

  - The context defined for a `nukt` feature is:

:::{table} `nukt` feature context
    
| Backtrack     | Matching sequence             | Lookahead     |
|:--------------|:------------------------------|:--------------|
| _none_        | `_consonant_`(full),`_nukta_` | _none_        |
:::


:::{figure-md}
![Nukta composition](/images/gurmukhi/gurmukhi-nukt.svg "Nukta composition"){.shaping-demo .inline-svg .greyscale-svg #gurmukhi-nukt}

Nukta composition
:::

```{svg-color-toggle-button} gurmukhi-nukt
```


#### Stage 3, step 3: akhn ####

The `akhn` feature replaces two specific sequences with required ligatures. 

  - <samp>"Ka,Halant,Ssa"</samp> is substituted with the <samp>"KSsa"</samp> ligature. 
  - <samp>"Ja,Halant,Nya"</samp> is substituted with the <samp>"JNya"</samp> ligature. 
  
These sequences can occur anywhere in a syllable. The <samp>"KSsa"</samp> and
<samp>"JNya"</samp> characters have orthographic status equivalent to full
consonants in some languages, and fonts may have `cjct` substitution
rules designed to match them in subsequences. Therefore, this
feature must be applied before all other many-to-one substitutions.

  - The context defined for an `akhn` feature is:

:::{table} `akhn` feature context
    
| Backtrack     | Matching sequence           | Lookahead     |
|:--------------|:----------------------------|:--------------|
| _none_        | `AKHAND_CONSONANT_SEQUENCE` | _none_        |
:::


> Note: Akhand ligatures are rare in Gurmukhi text. Nevertheless,
> shaping engines must test for the feature in order to provide the
> functionality if it is implemented.


#### Stage 3, step 4: rphf ####

The `rphf` feature replaces initial <samp>"Ra,Halant"</samp> sequences with the
<samp>"Reph"</samp> glyph.

  - An initial <samp>"Ra,Halant,ZWJ"</samp> sequence, however, must not be flagged for
    the `rphf` substitution.
	

  - The context defined for a `rphf` feature is:

:::{table} `rphf` feature context
    
| Backtrack        | Matching sequence       | Lookahead     |
|:-----------------|:------------------------|:--------------|
| `SYLLABLE_START` | "Ra"(full),`_halant_`   | _none_        |
:::


> Note: <samp>"Reph"</samp> usage is rare in Gurmukhi text. Nevertheless,
> shaping engines must test for the feature in order to provide the
> functionality if it is implemented.


#### Stage 3, step 5: rkrf ####

> This feature is not used in Gurmukhi.

#### Stage 3, step 6: pref ####

> This feature is not used in Gurmukhi.


#### Stage 3, step 7: blwf ####

The `blwf` feature replaces below-base-consonant glyphs with any
special forms. Gurmukhi includes three below-base consonant
forms:

  - <samp>"Halant,Ra"</samp> (after the base consonant or syllable base) and <samp>"Ra,Halant"</samp> (in a
    non-syllable-initial position) take on a below-base form.
  - <samp>"Halant,Ha"</samp> (after the base consonant or syllable base) and <samp>"Ha,Halant"</samp> (in a
    non-syllable-initial position) take on a below-base form. 
  - <samp>"Halant,Va"</samp> (after the base consonant or syllable base) and <samp>"Va,Halant"</samp> (in a
    non-syllable-initial position) take on a below-base form. 

Because Gurmukhi incorporates the `BLWF_MODE_PRE_AND_POST` shaping
characteristic, any pre-base consonants and any post-base consonants
may potentially match a `blwf` substitution; therefore, both cases must
be flagged for comparison. Note that this is not necessarily the case in other
Indic scripts that use a different `BLWF_MODE_` shaping
characteristic. 


:::{figure-md}
![Below-base Ra composition](/images/gurmukhi/gurmukhi-blwf-ra.svg "Below-base Ra composition"){.shaping-demo .inline-svg .greyscale-svg #gurmukhi-blwf-ra}

Below-base Ra composition
:::

```{svg-color-toggle-button} gurmukhi-blwf-ra
```


:::{figure-md}
![Below-base Va composition](/images/gurmukhi/gurmukhi-blwf-va.svg "Below-base Va composition"){.shaping-demo .inline-svg .greyscale-svg #gurmukhi-blwf-va}

Below-base Va composition
:::

```{svg-color-toggle-button} gurmukhi-blwf-va
```


:::{figure-md}
![Below-base Ha composition](/images/gurmukhi/gurmukhi-blwf-ha.svg "Below-base Ha composition"){.shaping-demo .inline-svg .greyscale-svg #gurmukhi-blwf-ha}

Below-base Ha composition
:::

```{svg-color-toggle-button} gurmukhi-blwf-ha
```


#### Stage 3, step 8: abvf ####

> This feature is not used in Gurmukhi.

#### Stage 3, step 9: half ####

The `half` feature replaces <samp>"_Consonant_,Halant"</samp> sequences before the
base consonant or syllable base with "half forms" of the consonant
glyphs.

In the most common case, this substitution applies to
<samp>"_Consonant_,Halant"</samp> sequences that are followed by another
<samp>"_Consonant_"</samp>.

In addition, a sequence matching <samp>"_Consonant_,Halant,ZWJ"</samp> must also be
flagged for potential `half` substitutions.

> Note: The presence of the <samp>"ZWJ"</samp> at the end of the sequence means
> that the sequence may match the regular-expression test in stage 1
> as the end of a syllable, even without being followed by a base
> consonant or syllable base.
>
> The fact that the regular-expression tests identify a syllable break
> after the <samp>"_Consonant_,Halant,ZWJ"</samp> is a byproduct of OpenType
> shaping and Unicode encoding, however, and might not have any
> significance with regard to the definition of syllables used in the
> language or orthography of the text.

There are three exceptions to the default behavior, for which
the shaping engine must test:

  - Initial <samp>"Ra,Halant"</samp> sequences, which should have been flagged for
    the `rphf` feature earlier, must not be flagged for potential
    `half` substitutions.

  - Non-initial <samp>"Ra,Halant"</samp>, <samp>"Ha,Halant"</samp>, and <samp>"Va,Halant"</samp> sequences,
    which should have been flagged for the `rkrf` or `blwf` features
    earlier, must not be flagged for potential `half` substitutions.

  - A sequence matching <samp>"_Consonant_,Halant,ZWNJ,_Consonant_"</samp> must not be
    flagged for potential `half` substitutions.

> Note: Half forms are rare in Gurmukhi text. Fonts supporting
> `<gur2>` may implement the `half` feature using explicit <samp>"Halant"</samp>
> glyphs, as illustrated here.

:::{figure-md}
![Half-form composition](/images/gurmukhi/gurmukhi-half.svg "Half-form composition"){.shaping-demo .inline-svg .greyscale-svg #gurmukhi-half}

Half-form composition
:::

```{svg-color-toggle-button} gurmukhi-half
```


#### Stage 3, step 10: pstf ####

The `pstf` feature replaces post-base-consonant glyphs with any special forms.

Gurmukhi includes one post-base form:

  - <samp>"Halant,Ya"</samp> takes on a post-base form.

:::{figure-md}
![Post-base Ya composition](/images/gurmukhi/gurmukhi-pstf-1.svg "Post-base Ya composition"){.shaping-demo .inline-svg .greyscale-svg #gurmukhi-pstf-1}

Post-base Ya composition
:::

```{svg-color-toggle-button} gurmukhi-pstf-1
```


#### Stage 3, step 11: vatu ####

The `vatu` feature replaces certain sequences with "Vattu variant"
forms. 

"Vattu variants" are formed from glyphs followed by the below-base
form of <samp>"Ra"</samp>, <samp>"Ha"</samp>, or <samp>"Va"</samp>; therefore, this feature must be applied after
the `blwf` feature.

> Note: vattu variants are rare in Gurmukhi text. Nevertheless,
> shaping engines must test for the feature in order to provide the
> functionality if it is implemented.

#### Stage 3, step 12: cjct ####

The `cjct` feature replaces sequences of adjacent consonants with
conjunct ligatures. These sequences must match <samp>"_Consonant_,Halant,_Consonant_"</samp>.

A sequence matching <samp>"_Consonant_,Halant,ZWJ,_Consonant_"</samp> or
<samp>"_Consonant_,Halant,ZWNJ,_Consonant_"</samp> must not be flagged to form a conjunct.

> Note: The presence of the <samp>"ZWJ"</samp> in a
> <samp>"_Consonant_,Halant,ZWJ,_Consonant_"</samp> sequence should automatically
> inhibit any `cjct` feature rules from matching the sequence as valid
> input, and thus prevent the `cjct` substitution from being applied.

> Note: The presence of the <samp>"ZWNJ"</samp> in a
> <samp>"_Consonant_,Halant,ZWNJ,_Consonant_"</samp> sequence means that the
> <samp>"_Consonant_,Halant,ZWNJ"</samp> subsequence will match the
> regular-expression test in stage 1 as the end of a syllable.
> 
> Because OpenType shaping features in `<gur2>` are defined as
> applying only within an individual syllable, this means that the
> presence of the <samp>"ZWNJ"</samp> will automatically prevent the application of
> a `cjct` feature by triggering the identification of a syllable
> break between the two consonants.
>
> The fact that the regular-expression tests identify a syllable break
> after the <samp>"_Consonant_,Halant,ZWNJ"</samp> is a byproduct of OpenType
> shaping and Unicode encoding, however, and might not have any
> significance with regard to the definition of syllables used in the
> language or orthography of the text.
>
> Note, also: The presence of the <samp>"ZWJ"</samp> means that a
> <samp>"_Consonant_,Halant,ZWJ"</samp> sequence may match the regular-expression
> test in stage 1 as the end of a syllable, even without being
> followed by a base consonant or syllable base. By definition,
> however, a <samp>"_Consonant_,Halant,ZWJ"</samp> syllable identified in stage 1
> cannot also include a <samp>"_Consonant_"</samp> after the <abbr title="Zero-Width Joiner">ZWJ</abbr>.

The font's <abbr title="Glyph Substitution table">GSUB</abbr> rules might be implemented so that `cjct`
substitutions apply to half-form consonants; therefore, this feature
must be applied after the `half` feature. 

> Note: Conjunct forms are rare in Gurmukhi text. Nevertheless,
> shaping engines must test for the feature in order to provide the
> functionality if it is implemented.

#### Stage 3, step 13: cfar ####

> This feature is not used in Gurmukhi.


### Stage 4: Final reordering ###

The final reordering stage repositions marks, dependent-vowel (matra)
signs, and <samp>"Reph"</samp> glyphs to the appropriate location with respect to
the base consonant or syllable base. Because multiple substitutions
may have occurred during the application of the basic-shaping features
in the preceding stage, these repositioning moves could not be
performed during the initial reordering stage.

Like the initial reordering stage, the steps involved in this stage
occur on a per-syllable basis.

<!--- Check that classifications have not been mangled. If the --->
<!--- character is a Halant AND a ligature was formed AND a multiple
substitution was performed, restore the classification to VIRAMA
because it was almost certainly lost in the preceding <abbr title="Glyph Substitution table">GSUB</abbr> stage.
--->

#### Stage 4, step 1: Base consonant ####

The final reordering stage, like the initial reordering stage, begins
with determining the syllable base of each syllable, following the
same algorithm used in stage 2, step 1.

In a syllable that begins with an independent vowel, the independent
vowel will always serve as the syllable base. In a standalone sequence or
other syllable that begins with a placeholder or a dotted circle, the
placeholder or dotted circle will always serve as the syllable base.

In a syllable that begins with a consonant, the shaping engine must
repeat the base-consonant search algorithm used in stage 2, step 1.

The codepoint of the underlying base consonant or syllable base will
not change between the search performed in stage 2, step 1, and the
search repeated here. However, the application of <abbr title="Glyph Substitution table">GSUB</abbr> shaping
features in stage 3 means that several ligation and many-to-one
substitutions may have taken place. The final glyph produced by that
process may, therefore, be a conjunct or ligature form — in most
cases, such a glyph will not have an assigned Unicode codepoint.
   
#### Stage 4, step 2: Pre-base matras ####

Pre-base dependent vowels (matras) that were reordered during the
initial reordering stage must be moved to their final position. This
position is defined as:
   
   - after the last standalone <samp>"Halant"</samp> glyph that comes after the
     matra's starting position and also comes before the main
     consonant.
   - If a zero-width joiner follows this last standalone <samp>"Halant"</samp>, the
     final matra position is moved to after the joiner.

This means that the matra will move to the right of all explicit
<samp>"consonant,Halant"</samp> subsequences, but will stop to the left of the base
consonant or syllable base, all conjuncts or ligatures that contain
the base consonant or syllable base, and all half forms.

:::{figure-md}
![Pre-base matra positioning](/images/gurmukhi/gurmukhi-matra-position.svg "Pre-base matra positioning"){.shaping-demo .inline-svg .greyscale-svg #gurmukhi-matra-position}

Pre-base matra positioning
:::

```{svg-color-toggle-button} gurmukhi-matra-position
```


> Note: OpenType and Unicode both state that if the syllable includes
> a <abbr title="Zero-Width Joiner">ZWJ</abbr> immediately after the last <samp>"Halant"</samp>, then the final matra
> position should be after the <abbr title="Zero-Width Joiner">ZWJ</abbr>.
>
> However, there are several test sequences indicating that
> Microsoft's Uniscribe shaping engine did not follow this rule (in,
> at least, Devanagari and Bengali text), and in these circumstances
> Uniscribe instead makes the final matra position before the final
> <samp>"Consonant,Halant,ZWJ"</samp>.
>
> Subsequently, the HarfBuzz shaping engine has also followed the same
> pattern. If other shaping engine implementations prefer to maintain
> maximum compatibility with Uniscribe and HarfBuzz, then they should
> also follow suit.

> Note: The Microsoft script-development specifications for OpenType
> shaping also state that if a zero-width non-joiner follows the last
> standalone <samp>"Halant"</samp>, the final matra position is moved to after the
> non-joiner. However, it is unnecessary to test for this condition,
> because a <samp>"Halant,ZWNJ"</samp> subsequence is, by definition, the end of a
> syllable. Consequently, a <samp>"Halant,ZWNJ"</samp> cannot be followed by a
> pre-base dependent vowel.


#### Stage 4, step 3: Reph ####

<samp>"Reph"</samp> must be moved from the beginning of the syllable to its final
position. Because Gurmukhi incorporates the `REPH_POS_BEFORE_SUBJOINED`
shaping characteristic, this final position is defined to be
immediately after the syllable base and before any subjoined
(below-base consonant or below-base dependent vowel) forms.

The algorithm for finding the final <samp>"Reph"</samp> position is

  - Starting at the first post-<samp>"Reph"</samp> consonant, search forward looking
    for the first explicit <samp>"Halant"</samp>, ending the search when the base
    consonant is encountered. If such an explicit <samp>"Halant"</samp> is found,
    move the <samp>"Reph"</samp> to the position immediately after this
    <samp>"Halant"</samp>.
	  * If a zero-width joiner (<abbr>ZWJ</abbr>) or a zero-width non-joiner (<abbr>ZWNJ</abbr>)
        follows this <samp>"Halant"</samp>, move the <samp>"Reph"</samp> to the position
        immediately after the <abbr title="Zero-Width Joiner">ZWJ</abbr> or <abbr title="Zero-Width Non Joiner">ZWNJ</abbr>. This will be the final
        <samp>"Reph"</samp> position. 
	  * If no <abbr title="Zero-Width Joiner">ZWJ</abbr> or <abbr title="Zero-Width Non Joiner">ZWNJ</abbr> follows this <samp>"Halant"</samp>, leave the <samp>"Reph"</samp> in
        its position immediately after the <samp>"Halant"</samp>. This will be the
        final <samp>"Reph"</samp> position. 
  - If no such explicit <samp>"Halant"</samp> is found in the previous step, find
    the first post-base consonant that has not formed a ligature with
    the base consonant. If such a non-ligated post-base consonant is
    found, move the <samp>"Reph"</samp> to the position immediately before the
    non-ligated post-base consonant. This will be the final <samp>"Reph"</samp>
    position.
  - If no such non-ligated post-base consonant is found in the
    previous step, move the <samp>"Reph"</samp> to the position immediately before
    the first post-base matra, syllable modifier, or Vedic sign that
    has a positioning tag after the script's <samp>"Reph"</samp> position in the
    syllable sort order (as listed in [stage
    2](#stage-2-initial-reordering)). This will be the final <samp>"Reph"</samp>
    position. 
	> Note: Because Gurmukhi incorporates the
    > `REPH_POS_BEFORE_SUBJOINED` shaping characteristic, this means
    > any positioning tag of `POS_BELOWBASE_CONSONANT` or later,
    > although a post-base matra, syllable modifier, or Vedic sign
    > would not typically be tagged with `POS_BELOWBASE_CONSONANT`.
  - If no other location has been located in the previous steps, move
    the <samp>"Reph"</samp> to the end of the syllable.


Finally, if the final position of <samp>"Reph"</samp> occurs after a
<samp>"_matra_,Halant"</samp> subsequence, then <samp>"Reph"</samp> must be repositioned to the
left of <samp>"Halant"</samp>, to allow for potential matching with `abvs` or
`psts` substitutions from <abbr title="Glyph Substitution table">GSUB</abbr>.


<!---
  - If the syllable does not have a base consonant (such as a syllable
    based on an independent vowel), then the final <samp>"Reph"</samp> position is
    immediately before the first character tagged with the
    `POS_BEFORE_POST` position or any later position in the sort
    order.

    -- If there are no characters tagged with `POS_BEFORE_POST` or
       later positions, then <samp>"Reph"</samp> is positioned at the end of the
       syllable.

Finally, if the final position of <samp>"Reph"</samp> occurs after a
<samp>"_matra_,Halant"</samp> subsequence, then <samp>"Reph"</samp> must be repositioned to the
left of <samp>"Halant"</samp>, to allow for potential matching with `abvs` or
`psts` substitutions from <abbr title="Glyph Substitution table">GSUB</abbr>.
--->
#### Stage 4, step 4: Pre-base-reordering consonants ####

Any pre-base-reordering consonants must be moved to immediately before
the base consonant or syllable base.
  
Gurmukhi does not use pre-base-reordering consonants, so this step will
involve no work when processing `<gur2>` text. It is included here in order
to maintain compatibility with the other Indic scripts.


#### Stage 4, step 5: Initial matras ####

Any left-side dependent vowels (matras) that are at the start of a
word must be flagged for potential substitution by the `init` feature
of <abbr title="Glyph Substitution table">GSUB</abbr>.

Gurmukhi does not use the `init` feature, so this step will
involve no work when processing `<gur2>` text. It is included here in
order to maintain compatibility with the other Indic scripts.


### Stage 5: Applying all remaining substitution features from <abbr>GSUB</abbr> ###

In this stage, the remaining substitution features from the <abbr title="Glyph Substitution table">GSUB</abbr> table
are applied. In preparation for this stage, glyph sequences should be
flagged for possible application of <abbr title="Glyph Substitution table">GSUB</abbr> features in stage 2,
step 10.

The order in which these features are applied is not canonical; they
should be applied in the order in which they appear in the <abbr title="Glyph Substitution table">GSUB</abbr> table
in the font.

	init
	pres
	abvs
	blws
	psts
	haln

The `init` feature replaces word-initial glyphs with special
presentation forms. Generally, these forms involve removing the
headline in-stroke from the left side of the glyph.

The `pres` feature replaces pre-base-consonant glyphs with special
presentations forms. This can include consonant conjuncts, half-form
consonants, and stylistic variants of left-side dependent vowels
(matras). 

The `abvs` feature replaces above-base-consonant glyphs with special
presentation forms. This usually includes contextual variants of
above-base marks or contextually appropriate mark-and-base ligatures.

:::{figure-md}
![Above-base substitutions](/images/gurmukhi/gurmukhi-abvs.svg "Above-base substitutions"){.shaping-demo .inline-svg .greyscale-svg #gurmukhi-abvs}

Above-base substitutions
:::

```{svg-color-toggle-button} gurmukhi-abvs
```


The `blws` feature replaces below-base-consonant glyphs with special
presentation forms. This usually includes replacing base consonant or syllable bases that
are followed by below-base-consonant forms (like those of <samp>"Ra"</samp>, <samp>"Ha"</samp>,
<samp>"Va"</samp>, or <samp>"Yakash"</samp>) with contextual ligatures.

:::{figure-md}
![Below-base substitutions](/images/gurmukhi/gurmukhi-blws.svg "Below-base substitutions"){.shaping-demo .inline-svg .greyscale-svg #gurmukhi-blws}

Below-base substitutions
:::

```{svg-color-toggle-button} gurmukhi-blws
```


The `psts` feature replaces post-base-consonant glyphs with special
presentation forms. This usually includes replacing right-side
dependent vowels (matras) with stylistic variants or replacing
post-base-consonant/matra pairs with contextual ligatures.

The `haln` feature replaces word-final <samp>"_Consonant_,Halant"</samp> pairs with
special presentation forms. This can include stylistic variants of the
consonant where placing the <samp>"Halant"</samp> mark on its own is
typographically problematic. 

:::{figure-md}
![Halant form substitutions](/images/gurmukhi/gurmukhi-haln.svg "Halant form substitutions"){.shaping-demo .inline-svg .greyscale-svg #gurmukhi-haln}

Halant form substitutions
:::

```{svg-color-toggle-button} gurmukhi-haln
```


> Note: The `calt` feature, which allows for generalized application
> of contextual alternate substitutions, is usually applied at this
> point. However, `calt` is not mandatory for correct Gurmukhi shaping
> and may be disabled in the application by user preference.

### Stage 6: Applying remaining positioning features from <abbr>GPOS</abbr> ###

In this stage, mark positioning, kerning, and other <abbr title="Glyph Positioning table">GPOS</abbr> features are
applied.

As with the preceding stage, the order in which these features are
applied is not canonical; they should be applied in the order in which
they appear in the <abbr title="Glyph Positioning table">GPOS</abbr> table in the font.

        dist
        abvm
        blwm

> Note: The `kern` feature is usually applied at this stage, if it is
> present in the font. However, `kern` (like `calt`, above) is not
> mandatory for shaping Gurmukhi text and may be disabled by user preference.

The `dist` feature adjusts the horizontal positioning of
glyphs. Unlike `kern`, adjustments made with `dist` do not require the
application or the user to enable any software _kerning_ features, if
such features are optional. 

:::{figure-md}
![Application of the dist feature](/images/gurmukhi/gurmukhi-dist.svg "Application of the dist feature"){.shaping-demo .inline-svg .greyscale-svg #gurmukhi-dist}

Application of the dist feature
:::

```{svg-color-toggle-button} gurmukhi-dist
```


The `abvm` feature positions above-base marks for attachment to base
characters. In Gurmukhi, this includes <samp>"Reph"</samp> in addition to the
diacritical marks and Vedic signs. 

:::{figure-md}
![Above-base mark positioning](/images/gurmukhi/gurmukhi-abvm.svg "Above-base mark positioning"){.shaping-demo .inline-svg .greyscale-svg #gurmukhi-abvm}

Above-base mark positioning
:::

```{svg-color-toggle-button} gurmukhi-abvm
```


The `blwm` feature positions below-base marks for attachment to base
characters. In Gurmukhi, this includes below-base dependent vowels
(matras) as well as the below-base consonant forms of <samp>"Ra"</samp>, <samp>"Ha"</samp>, and
<samp>"Va"</samp>.

:::{figure-md}
![Below-base mark positioning](/images/gurmukhi/gurmukhi-blwm.svg "Below-base mark positioning"){.shaping-demo .inline-svg .greyscale-svg #gurmukhi-blwm}

Below-base mark positioning
:::

```{svg-color-toggle-button} gurmukhi-blwm
```


## The `<guru>` shaping model ##

The older Gurmukhi script tag, `<guru>`, has been deprecated. However,
shaping engines may still encounter fonts that were built to work with
`<guru>` and some users may still have documents that were written to
take advantage of `<guru>` shaping.

### Distinctions from `<gur2>` ###

The most significant distinction between the shaping models is that the
sequence of <samp>"Halant"</samp> and consonant glyphs used to trigger shaping
features was altered when migrating from `<guru>` to
`<gur2>`. 

Specifically, shaping engines were expected to reorder post-base
<samp>"Halant,_Consonant_"</samp> sequences to <samp>"_Consonant_,Halant"</samp>.

As a result, a font's <abbr title="Glyph Substitution table">GSUB</abbr> substitutions would be written to match
<samp>"_Consonant_,Halant"</samp> sequences in all pre-base and post-base positions.


The `<guru>` syllable

	Pre-baseC Halant BaseC Halant Post-baseC

would be reordered to

	Pre-baseC Halant BaseC Post-baseC Halant

before features are applied.

In `<gur2>` text, as described above in this document, there is no
such reordering. The correct sequence to match for <abbr title="Glyph Substitution table">GSUB</abbr> substitutions is
<samp>"_Consonant_,Halant"</samp> for pre-base consonants, but <samp>"Halant,_Consonant_"</samp>
for post-base consonants.

The old Indic shaping model also did not recognize the
`BLWF_MODE_PRE_AND_POST` shaping characteristic. Instead, `<guru>`
was treated as if it followed the `BLWF_MODE_POST_ONLY`
characteristic. In other words, below-base form substitutions were
only applied to consonants after the base consonant or syllable base.

In addition, for some scripts, left-side dependent vowel marks
(matras) were not repositioned during the final reordering
stage. For `<guru>` text, the left-side matra was always positioned
at the beginning of the syllable.


### Advice for handling fonts with `<guru>` features only ###

Shaping engines may choose to match post-base <samp>"_Consonant_,Halant"</samp>
sequences in order to apply <abbr title="Glyph Substitution table">GSUB</abbr> substitutions when it is known that
the font in use supports only the `<guru>` shaping model.

### Advice for handling text runs composed in `<guru>` format ###

Shaping engines may choose to match post-base <samp>"_Consonant_,Halant"</samp>
sequences for <abbr title="Glyph Substitution table">GSUB</abbr> substitutions or to reorder them to
<samp>"Halant,_Consonant_"</samp> when processing text runs that are tagged with
the `<guru>` script tag and it is known that the font in use supports
only the `<gur2>` shaping model.

Shaping engines may also choose to apply `blwf` substitutions to
below-base consonants occurring before the base consonant or syllable base when it is
known that the font in use supports an applicable substitution lookup.

Shaping engines may also choose to position left-side matras according
to the `<guru>` ordering scheme; however, doing so might interfere
with matching <abbr title="Glyph Substitution table">GSUB</abbr> or <abbr title="Glyph Positioning table">GPOS</abbr> features.


================================================
FILE: opentype-shaping-hangul.md
================================================
```{include} /_global.md
```

# Hangul script shaping in OpenType #

This document details the general shaping procedure shared by all
Hangul script styles, and defines the common pieces that style-specific
implementations share. 


**Contents**

  - [General information](#general-information)
  - [Terminology](#terminology)
  - [Glyph classification](#glyph-classification)
      - [Jamo type](#jamo-type)
	  - [Composing behavior](#composing-behavior)
	  - [Character tables](#character-tables)
  - [The `<hang>` shaping model](#the-hang-shaping-model)
      - [Stage 1: Identifying syllables](#stage-1-identifying-syllables)
      - [Stage 2: Determining if the syllable can be composed into a Hangul Syllables codepoint](#stage-2-determining-if-the-syllable-can-be-composed-into-a-hangul-syllables-codepoint)
      - [Stage 3: Composing the syllable (if composition is possible)](#stage-3-composing-the-syllable-if-composition-is-possible)
      - [Stage 4: Fully decomposing the syllable (if composition is not possible)](#stage-4-fully-decomposing-the-syllable-if-composition-is-not-possible)
      - [Stage 5: Shaping the fully decomposed syllable with <abbr>GSUB</abbr> features](#stage-5-shaping-the-fully-decomposed-syllable-with-gsub-features)
      - [Stage 6: Reordering tone marks](#stage-6-reordering-tone-marks)
 

## General information ##

The Hangul script is used to write Korean. It may also be referred to
as the Choseongul script or Jeongum script, and is in use in both
North Korea and South Korea as well as regions within China. It may
also be used to write the Cia-Cia language in Indonesia.

Hangul syllables are formed from individual alphabetic letters that
are arranged into square cells using pre-defined patterns. The
syllables themselves are monospaced in a run of text, using interword
spacing and punctuation.

Korean text may, in practice, incorporate Chinese characters ("Hanja")
in addition to Hangul. Hanja characters are not affected by the
shaping model for Hangul.

Modern Korean text is typically written (and, therefore, rendered)
left to right. Classical and older texts, however, may be written
vertically, top to bottom.


## Terminology ##

OpenType shaping uses a standard set of terms for elements of the
Hangul script. The terms used colloquially in any particular language
or country may vary, however, potentially causing confusion.

**Jamo** characters are the fundamental letters from which syllable
blocks are constructed.  There are three classes of jamo:

  - **L**eading consonants (choseong)
  - **V**owels (jungseong)
  - **T**railing consonants (jongseong)

Most, but not all, of the basic consonant letters can appear either in
leading or in trailing form. Nevertheless, the leading and trailing forms are
assigned distinct codepoints in Unicode. In addition, the set of valid
trailing consonants includes several compound-consonant pairs that can
never occur in leading form.

Old Korean featured a considerable number of additional jamo, which
are also defined in Unicode.  Many of these Old Korean jamo are
compound forms that concatenate two or three basic jamo. 
  
A **syllable** is formed by arranging a sequence of jamo into its
appropriate square-cell form. The horizontal and vertical positioning
of each jamo in the cell depends on the content of the syllable. The
exact shape and proportions of each jamo will also vary with its final
position in the cell. 

Valid syllables must be either of the form "**`L`**,**`V`**" or of the form
"**`L`**,**`V`**,**`T`**". That is, each syllable must begin with one leading
consonant, must include one vowel in the second position, and may or may
not end with one trailing consonant. 

:::{figure-md}
![LV syllable](images/hangul/hangul-lv-syllable.svg "LVT syllable"){.shaping-demo .inline-svg .greyscale-svg #hangul-lv-syllable}

LV syllable
:::

```{svg-color-toggle-button} hangul-lv-syllable
```


:::{figure-md}
![LVT syllable](images/hangul/hangul-lvt-syllable.svg "LVT syllable"){.shaping-demo .inline-svg .greyscale-svg #hangul-lvt-syllable}

LVT syllable
:::

```{svg-color-toggle-button} hangul-lvt-syllable
```


All possible syllables for Modern Korean are defined in the Hangul
Syllables block of Unicode. A sequence of individual jamo codepoints
that corresponds to a valid Modern Korean syllable can therefore be
**composed** into a syllable codepoint. 

Sequences of codepoints that involve Old Korean jamo cannot be
composed into syllable codepoints and are handled separately by the
shaping engine.

Two tone marks are common in Old Korean, the **single-dot bangjeom**
and the **double-dot bangjeom**. Both bangjeom marks are rendered to
the left of the syllable to which they are applied.


## Glyph classification ##

Proper shaping of Hangul text runs involves determining when
sequences of jamo can be composed into syllable codepoints that are included in
the active font — in which case they should be replaced by the corresponding
syllable glyph — and when they cannot. 

Those jamo sequences that cannot be composed into a syllable codepoint
(or that compose into a syllable codepoint that is missing in the
active font) are then rendered by shaping and positioning each
individual jamo using <abbr title="Glyph Substitution table">GSUB</abbr> substitution rules. 


### Jamo type ###

Each Hangul jamo is assigned a `JAMO_TYPE` property that indicates whether
it is a leading consonant (`L`), a vowel (`V`), or a trailing
consonant (`T`).

Most, but not all, of the basic consonant letters can appear either in
leading or in trailing form. Nevertheless, the leading and trailing
forms are assigned distinct codepoints in Unicode. In addition, the
set of valid trailing consonants includes several compound consonant
pairs that can never occur in leading form.

For example, the basic consonant "Kiyeok" (&#x1100;) is encoded as `U+1100`
in its leading (choseong) form but as `U+11A8` in its trailing
(jongseong) form. The tense or emphatic form of the consonant,
"Ssangkiyeok" (&#x1101;), is encoded in its leading (choseong) form as
`U+1101` but in its trailing (jongseong) form as `U+11A9`, and is
rendered visually as a doubled version of the basic consonant.

In addition, two compound trailing consonants, "Kiyeok-sios" (&#x11aa;
`U+11AA`) and "Rieul-kiyeok" (&#x11b0; `U+11B0`), also incorporate the
Kiyeok basic consonant. But Kiyeok-sios and Rieul-kiyeok are never
used as leading consonants, therefore they are not encoded in leading
(choseong) forms.

> Note: compound consonant jamo are not written as sequences of basic
> jamo. That is, <samp>"Kiyeok,Kiyeok"</samp> (&#x1100;&#x1100;) is not equivalent
> to <samp>"Ssangkiyeok"</samp> (&#x1101;). 

The Hangul Jamo block also includes two "filler" codepoints. "Choseong
Filler" (`U+115F`) can take the place of a missing choseong (`L`
consonant), and "Jungseong Filler" (`U+1160`) can take the place of a
missing jungseong (`V` vowel). For shaping purposes, the fillers are
classified as type `Lf` and type `Vf`, respectively.


### Composing behavior ###

Modern Korean features 19 leading consonants (`L` forms), 21 vowels
(`V` forms), and 27 trailing consonants (`T` forms). 

Old Korean featured a considerable number of additional jamo, which
are also defined in Unicode.  Some of these Old Korean jamo are
distinct basic letters that are no longer used in Modern Korean. Many
others are compound forms that concatenate two or even three basic jamo. 

The Hangul Syllables block in Unicode only includes those syllables
that contain solely Modern jamo. Consequently, each jamo is assigned a
`COMPOSING_BEHAVIOR` property to indicate whether it can be composed
into a Hangul Syllable codepoint. 

An <samp>"`L`,`V`,`T`"</samp> sequence with the `COMPOSING_BEHAVIOR`s
"`YES`,`YES`,`YES`" or an <samp>"`L`,`V`"</samp> sequence with the
`COMPOSING_BEHAVIOR`s "`YES`,`YES`" will compose to a codepoint in the Hangul
Syllables block. A sequence containing any `NO`s will not compose to a
codepoint in the Hangul Syllables block.

> Note: the jamo filler codepoints are both designated with the
> `COMPOSING_BEHAVIOR` of `NO`.


<!--- ### Identification by Unicode range ### --->


### Character tables ###


Separate character tables are provided for the Hangul Jamo, Hangul
Jamo Extended-A, and Hangul Jamo Extended-B blocks, as well as for other miscellaneous
characters that are used in `<hang>` text runs:

  - [Hangul Jamo character table](character-tables/character-tables-hangul.md#hangul-jamo-character-table)
  - [Hangul Jamo Extended-A character table](character-tables/character-tables-hangul.md#hangul-jamo-extended-a-character-table)
  - [Hangul Jamo Extended-B character table](character-tables/character-tables-hangul.md#hangul-jamo-extended-b-character-table)
  - [Miscellaneous character table](character-tables/character-tables-hangul.md#miscellaneous-character-table)


The Hangul Jamo block contains all of the Modern Korean jamo, the two
jamo fillers, and the most common Old Korean jamo. 

The Hangul Jamo Extended-A block contains additional `L` (choseong)
jamo for Old Korean. The Hangul Jamo Extended-B block contains
additional `V` (jungseong) and `T` (jongseong) jamo for Old Korean.

The Hangul Syllables block contains all of the valid permutations of the
Modern Korean jamo. Each syllable codepoint can be classified by
syllable type, either `LV` or `LVT`. These types are synonymous with
the "Hangul Syllable Type" property in Unicode. Due to the size of the
Hangul Syllables block, a full character table is not
provided. However, a
[summary](character-tables/character-tables-hangul.md#hangul-syllables-character-table)
is included to show the ranges of `LV` and `LVT` syllables.

Unicode also defines a Hangul Compatibility Jamo block that implements
backward compatibility with a retired file-encoding format. Unless a
software application is required to support specific stores of
documents that are known to have used the older encoding, however, the 
shaping engine should not be expected to handle any text runs
incorporating codepoints from this block.

The tables list each codepoint along with its Unicode general
category, its jamo type, and its composing behavior. The codepoint's
Unicode name and an example glyph are also provided.

For example:

:::{table} Example character table

| Codepoint | Unicode category | Jamo type | Composing | Glyph                            |
|:----------|:-----------------|:----------|:----------|:---------------------------------|
|`U+1109`   | Letter           | L         | YES       | &#x1109; Sios                    |
| | | | | |
|`U+1182`   | Letter           | V         | NO        | &#x1182; O-O                     |
:::


Codepoints with no assigned meaning are
designated as _unassigned_ in the _Unicode category_ column. 

In addition to general punctuation, runs of Hangul text may use
punctuation marks from the CJK Symbols And Punctuation block. 

Of particular note are the single-dot tone mark (single-dot bangjeom)
and double-dot tone mark (double-dot bangjeom), `U+302E` and
`U+302F`. These non-spacing marks are common in Old Korean.

Other important characters that may be encountered when shaping runs
of Hangul text include the dotted-circle placeholder (`U+25CC`), the
zero-width joiner (`U+200D`), and zero-width non-joiner (`U+200C`).

The dotted-circle placeholder is frequently used when displaying a
mark in isolation. Real-world text may also use other characters, such
as hyphens or dashes, in a similar placeholder fashion; shaping
engines should cope with this situation gracefully.

The zero-width space (`U+200B`) or word joiner (`U+2060`) may be used
between two jamo to prevent them from being conjoined into a
syllable. The zero-width space allows a line break to happen between
the jamo, while the word joiner prevents the jamo from being separated
by a line break.


## The `<hang>` shaping model ##

Processing a run of `<hang>` text involves six top-level stages:

1. Identifying syllables
2. Determining if the syllable can be composed into a Hangul Syllables codepoint
3. Composing the syllable (if composition is possible)
4. Fully decomposing the syllable (if composition is not possible)
5. Shaping the fully decomposed syllable with <abbr title="Glyph Substitution table">GSUB</abbr> features
6. Reordering tone marks


### Stage 1: Identifying syllables ###

The precomposed syllable codepoints in the Hangul Syllable block come in
two forms: `LV` syllables (which represent an `L` jamo and a `V` jamo)
and `LVT` syllables (which represent an `L` jamo, a `V` jamo, and a `T` jamo).

A syllable consisting of a string of jamo must match either the
sequence <samp>"`L`,`V`"</samp> or the sequence <samp>"`L`,`V`,`T`"</samp>.

The `L`, `V`, and `T` components must be a single jamo each. In Modern
Korean, all of the jamo must have a `COMPOSING_BEHAVIOR` of `YES`. In
Old Korean, `YES` and `NO` are both acceptable for
`COMPOSING_BEHAVIOR`. 

However, real-world input can also include syllables entered as a
precomposed `LV` Hangul Syllable codepoint followed by a standalone
`T` jamo.

Syllables in a text run can therefore be identified with the following
regular expression:

```
    Slvt |  <Slv | <L|Lf>+<V|Vf>> + [T]
```


    L	  L jamo
	V	  V jamo
	T	  T jamo
	Lf	  L jamo filler
	Vf	  V jamo filler
	Slv	  Precomposed "LV" syllable
	Slvt	  Precomposed "LVT" syllable
	
	[ ]	  optional occurence of the enclosed expression
	<|>	  one of the options separated by the vertical bar


The expression matches five possible syllable types:

  - `Slvt`
  - `Slv`
  - `Slv`,`T`
  - `L`,`V`
  - `L`,`V`,`T`

Sequences of jamo that do not match the above expression should be
treated as runs of standalone jamo letters.

After the syllables have been identified, each of the subsequent 
shaping stages occurs on a per-syllable basis.


### Stage 2: Determining if the syllable can be composed into a Hangul Syllables codepoint ###


#### Stage 2, step 1: Fully precomposed syllables ####

A precomposed `Slvt` or `Slv` syllable requires no shaping if the active
font includes a glyph for the corresponding Hangul Syllables
codepoint. If the glyph is present, the shaping engine can render it
and proceed directly to stage six without further work. If the glyph
is not present, the shaping engine must proceed to stage four.

The other syllable types involve jamo, and each syllable must be
examined to determine if it composes into a codepoint in the Hangul
Syllables block.


#### Stage 2, step 2: Partially precomposed syllables ####

For <samp>"`Slv`,`T`"</samp> syllables, the `Slv` codepoint must first be
decomposed into its constituent jamo. Then, the resulting
<samp>"`L`,`V`,`T`"</samp> syllable must be examined in the [next
step](#stage-2-step-3-fully-jamo-syllables). 

The decomposition of the `Slv` syllable is canonical, and uses the
algorithm defined in [stage four](#stage-4-fully-decomposing-the-syllable-if-composition-is-not-possible).


#### Stage 2, step 3: Fully jamo syllables ####

For <samp>"`L`,`V`"</samp> and <samp>"`L`,`V`,`T`"</samp> syllables, the `COMPOSING_BEHAVIOR` of
each jamo must be examined. 

If all jamo in the syllable have `COMPOSING_BEHAVIOR` of `YES`, then
the shaping engine should proceed to stage three and attempt to
compose the jamo into the corresponding Hangul Syllables codepoint.

If any of the jamo in the syllable have `COMPOSING_BEHAVIOR` of `NO`,
then the shaping engine should proceed to stage five and shape the
syllable using <abbr title="Glyph Substitution table">GSUB</abbr> features.


### Stage 3: Composing the syllable (if composition is possible) ###

Unicode defines a canonical algorithm for composing jamo into Hangul
Syllables codepoints. The algorithm leverages the strict jamo-ordering
of the syllables in the block to provide an algebraic method to
determine the codepoint of a syllable using the codepoints of its
constituent `L`, `V`, and (if needed) `T` jamo as input.

The algorithm defines the following consonants:

```
	SBase = AC00
	LBase = 1100
	VBase = 1161
	TBase = 11A7
	LCount = 19
	VCount = 21
	TCount = 28
	NCount = (VCount * TCount) = 588
	SCount = (LCount * NCount) = 11172
```
	
For a jamo sequence <samp>"`L`,`V`"</samp>, where both `L` and `V` are of
`COMPOSING_BEHAVIOR` `YES`, the composed syllable codepoint is found
by computing:

```
	LIndex = L - LBase
	VIndex = V - VBase
	LVIndex = LIndex * NCount + VIndex * TCount
	Slv = SBase + LVIndex
```

Similarly, for a jamo sequence <samp>"`L`,`V`,`T`"</samp>, where `L`, `V`, and `T`
are all of `COMPOSING_BEHAVIOR` `YES`, the composed syllable codepoint
is found by computing:

```
	LIndex = L - LBase
	VIndex = V - VBase
	TIndex = T - TBase
	LVIndex = LIndex * NCount + VIndex * TCount
	Slvt = SBase + LVIndex + TIndex
```

After the syllable codepoint has been found, the shaping engine must
verify that the codepoint's glyph exists in the active font. If the
glyph is present, the shaping engine must substitute the input jamo
sequence with the glyph. The shaping engine can then proceed to stage
six. 

If the needed codepoint is missing, the shaping engine should perform
no substitution and must proceed to stage five with the original `L`,
`V`, and (if used) `T` jamo. 

:::{figure-md}
![Syllable composition](images/hangul/hangul-compose.svg "Syllable composition"){.shaping-demo .inline-svg .greyscale-svg #hangul-compose}

Syllable composition
:::

```{svg-color-toggle-button} hangul-compose
```


### Stage 4: Fully decomposing the syllable (if composition is not possible) ###

An <samp>"`Slv`,`T`"</samp> syllable that does not compose into a Hangul Syllables
codepoint or that composes into a Hangul Syllables codepoint which is
missing in the active font must be fully decomposed into jamo.

Similarly, a precomposed `Slvt` or `Slv` syllable requires no shaping
if the active font includes a glyph for the corresponding Hangul
Syllables codepoint. If the corresponding codepoint is missing in the
active font, however, the syllable must be fully decomposed into jamo.

Unicode defines a canonical algorithm for decomposing Hangul Syllables
codepoints into constituent jamo. The algorithm leverages the strict
jamo-ordering of the syllables in the block to provide an algebraic method to
determine the codepoints of a syllable's `L`, `V`, and (if needed) `T`
jamo from the syllable's codepoint.

The algorithm defines the following consonants:

```
	SBase = AC00
	LBase = 1100
	VBase = 1161
	TBase = 11A7
	LCount = 19
	VCount = 21
	TCount = 28
	NCount = (VCount * TCount) = 588
	SCount = (LCount * NCount) = 11172
```
	
For a syllable codepoint S, the codepoints of the constituent `L`,
`V`, and `T` jamo are found by computing:

```
	SIndex = S - SBase
	LIndex = SIndex div NCount
	VIndex = (SIndex mod NCount) div TCount
	TIndex = SIndex mod TCount
	L = LBase + LIndex
	V = VBase + VIndex
	T = TBase + TIndex if TIndex > 0
```

If `TIndex` = 0, then the syllable has no `T` jamo in the
trailing-consonant (jongseong) position.

With the syllable decomposed, the shaping engine can proceed to stage
five with the `L`, `V`, and (if used) `T` jamo. 

:::{figure-md}
![Syllable decomposition](images/hangul/hangul-decompose.svg "Syllable decomposition"){.shaping-demo .inline-svg .greyscale-svg #hangul-decompose}

Syllable decomposition
:::

```{svg-color-toggle-button} hangul-decompose
```


### Stage 5: Shaping the fully decomposed syllable with <abbr>GSUB</abbr> features ###

With the syllable fully decomposed into a sequence of jamo, the next
stage applies mandatory substitution features using rules in the
font's <abbr title="Glyph Substitution table">GSUB</abbr> table. 


#### Stage 5, step 1: `ccmp` ####

The `ccmp` feature allows a font to substitute basic-jamo sequences
with a pre-composed glyph including compound jamo. 
 
If present, these composition and decomposition substitutions must be
performed before applying any other <abbr title="Glyph Substitution table">GSUB</abbr> lookups, because
those lookups may be written to match only the `ccmp`-substituted
glyphs. 


#### Stage 5, step 2: `ljmo` ####

This feature replaces the default (i.e., standalone) forms of leading
consonant (choseong) glyphs in a syllable cell with alternate forms
that fit into syllable-appropriate positions.

The appropriate shape of the choseong glyph depends on the shape of
the vowel (jungseong) that follows. For example, a tall jungseong forces
the usage of a tall choseong form.

In addition, if the syllable ends in a trailing consonant (jongseong),
then shorter forms of both the leading consonant (choseong) and vowel
(jungseong) glyphs will be used in order to provide sufficient
vertical space. 

:::{figure-md}
![L Jamo feature application](images/hangul/hangul-ljmo.svg "L Jamo feature application"){.shaping-demo .inline-svg .greyscale-svg #hangul-ljmo}

L Jamo feature application
:::

```{svg-color-toggle-button} hangul-ljmo
```


#### Stage 5, step 3: `vjmo` ####

This feature replaces the default (i.e., standalone) forms of vowel
(jungseong) glyphs in a syllable cell with alternate forms that fit into
syllable-appropriate positions.

The appropriate shape of the jungseong glyph depends on the presence
or absence of a trailing consonant (jongseong) at the end of the syllable.

If the syllable ends in a trailing consonant (jongseong), then shorter
forms of both the leading consonant (choseong) and vowel (jungseong)
glyphs will be used in order to provide sufficient vertical space.

:::{figure-md}
![V Jamo feature application](images/hangul/hangul-vjmo.svg "V Jamo feature application"){.shaping-demo .inline-svg .greyscale-svg #hangul-vjmo}

V Jamo feature application
:::

```{svg-color-toggle-button} hangul-vjmo
```


#### Stage 5, step 4: `tjmo` ####

This feature replaces the default (i.e., standalone) forms of trailing
consonant (jongseong) glyphs in a syllable cell with alternate forms
that fit into syllable-appropriate positions.

Because jongseong jamo are always preceded by a choseong jamo and a
jungseong jamo, there is less variation in shape that the alternate
forms can take on. A given font may, however, include several
context-dependent alternates for stylistic or typographic variation.

:::{figure-md}
![T Jamo feature application](images/hangul/hangul-tjmo.svg "T Jamo feature application"){.shaping-demo .inline-svg .greyscale-svg #hangul-tjmo}

T Jamo feature application
:::

```{svg-color-toggle-button} hangul-tjmo
```


### Stage 6. Reordering tone marks ###

Any tone marks should now be reordered. In the text run, marks occur immediately after
the syllable to which they apply. After reordering, each mark should
be placed immediately to the left of the syllable.

This reordering move is the same regardless of whether the syllable in
question is a precomposed syllable codepoint from the Hangul Syllables
block or a jamo-based syllable composed via the application of <abbr title="Glyph Substitution table">GSUB</abbr>
features. Therefore, the reordering must take place at the end of the
shaping process.

:::{figure-md}
![Tone-mark reordering](images/hangul/hangul-tone.svg "Tone-mark reordering"){.shaping-demo .inline-svg .greyscale-svg #hangul-tone}

Tone-mark reordering
:::

```{svg-color-toggle-button} hangul-tone
```


================================================
FILE: opentype-shaping-hebrew.md
================================================
```{include} /_global.md
```

# Hebrew script shaping in OpenType #

This document details the general shaping procedure shared by all
Hebrew script styles, and defines the common pieces that style-specific
implementations share. 


**Contents**

  - [General information](#general-information)
  - [Terminology](#terminology)
  - [Glyph classification](#glyph-classification)
	  - [Mark classification](#mark-classification)
	  - [Character tables](#character-tables)
  - [The `<hebr>` shaping model](#the-hebr-shaping-model)
      - [Stage 1: Compound character composition and decomposition](#stage-1-compound-character-composition-and-decomposition)
      - [Stage 2: Composing any Alphabetic Presentation forms](#stage-2-composing-any-alphabetic-presentation-forms)
      - [Stage 3: Applying the language-form substitution features from <abbr>GSUB</abbr>](#stage-3-applying-the-language-form-substitution-features-from-gsub)
      - [Stage 4: Applying the typographic-form substitution features from <abbr>GSUB</abbr>](#stage-4-applying-the-typographic-form-substitution-features-from-gsub)
      - [Stage 5: Applying the positioning features from <abbr>GPOS</abbr>](#stage-5-applying-the-positioning-features-from-gpos)
  

## General information ##

The Hebrew script is used to write multiple languages, including
Hebrew, Yiddish, and Judezmo. 

Hebrew is written (and, therefore, rendered) from right to
left. Shaping engines must track the directionality of the text run
when scripts of different direction are mixed.

The Hebrew script tag defined in OpenType is `<hebr>`. Apart from the
fact that Hebrew uses right-to-left directionality, the shaping
process for `<hebr>` is identical to the default script-shaping
model.

> Note: The Letterlike Symbols block in Unicode includes four
> codepoints corresponding to mathematical symbols based on Hebrew
> letters. 
>
> These codepoints are not expected to occur within valid Hebrew text
> runs. In addition, because these codepoints are defined for usage in 
> mathematical expressions, they are designated as using left-to-right
> directionality.


## Terminology ##

OpenType shaping uses a standard set of terms for elements of the
Hebrew script. The terms used colloquially in any particular language
may vary, however, potentially causing confusion.

**Base** glyph or character is the standard term for a Hebrew
character that is capable of taking a diacritical mark. 
 
Most of the base characters in Hebrew are consonants, although some
base characters are used to represent vowels in certain contexts.

Vowels that are not represented with base characters are frequently
omitted from the text run entirely. Alternatively, such vowels may
appear as marks called **niqqud**. Niqqud are also referred to as
**points** in the Unicode standard.

Pronunciation marks, such as the dot used to distinguish "Shin" from
"Sin" are also considered **niqqud**. Niqqud are typically positioned
above or below the base character.

**Dagesh** is the term for a particular diacritic that alters the
pronunciation of a consonant. The dagesh is distinctive for being
positioned inside the consonant glyph. Other Hebrew diacritics are
positioned either above or below the base character.

Hebrew also includes a sizable set of **cantillation marks**, in
addition to vowel, diacritical, and pronunciation marks. Cantillation
marks are also referred to as **tropes**.


## Glyph classification ##

Because `<hebr>` text runs do not involve reordering or syllable
identification, Hebrew base characters do not require further
classification for script-shaping purposes.

Five Hebrew letters have special word-final forms. Each of these is
encoded separately in the Hebrew block. They are regarded as
contextual variants, not as distinct letters. The Hebrew block also
includes several digraphs that are used only when writing the Yiddish
languages. 

Because these word-final forms and digraphs are separately encoded,
fonts do not implement <abbr title="Glyph Substitution table">GSUB</abbr> substitutions to provide access to them.


### Mark classification ###

Because Hebrew text may include several types of mark (vowel niqqud,
cantillation marks, pronunciation marks) positioned on a base
character, sequences of adjacent marks may need to be reordered.

The Unicode standard defines a _canonical combining class_ for each
codepoint that is used whenever a sequence needs to be sorted into
canonical order. 

Hebrew marks all belong to standard combining classes. Most, but not
all, cantillation marks are assigned to the generic below-base (220)
or above-base (230) combining classes. Niqqud are assigned to distinct
combining classes designed to enforce orthographically correct
ordering:

:::{table} Mark-classification table

| Codepoint | Combining class | Glyph                              |
|:----------|:----------------|:-----------------------------------|
| `U+0591`  | 220             | &#x0591; Etnahta                   |
| `U+0592`  | 230             | &#x0592; Segol                     |
| `U+05B0`  | 10              | &#x05B0; Sheva                     |
| `U+05B2`  | 12              | &#x05B2; Hataf Patah               |
| `U+05B9`  | 19              | &#x05B9; Holam                     |
| `U+05BF`  | 23              | &#x05BF; Rafe                      |
:::


The numeric values of these combining classes are used during Unicode
normalization.


### Character tables ###

The Hebrew block in Unicode contains the codepoints required to
represent text in all languages written using Hebrew.

The Alphabetic Presentation Forms block in Unicode includes 46
additional codepoints for Hebrew. Included are several precomposed
combinations of base characters and marks and the "Alef Lamed"
ligature, any of which may occur in `<hebr>` text runs. Glyphs for
these presentation forms may be provided by fonts that do not
implement the corresponding mark-to-base and ligature features in
OpenType <abbr title="Glyph Substitution table">GSUB</abbr> and <abbr title="Glyph Positioning table">GPOS</abbr> tables.

The Alphabetic Presentation Forms block also includes a set of eight
"wide" variants of standard Hebrew characters (`U+FB21` through
`U+FB28`) and a variant form of "Ayin" (`U+FB20`), for backwards
compatibility with retired file-encoding standards. New usage of these
codepoints is not recommended and they are unlikely to occur in
contemporary documents. 

Consequently, unless a software application is required to support
specific stores of documents that are known to have used these older
encodings, the shaping engine should not be expected to handle any
text runs incorporating these backwards-compatibility variant
codepoints.

Separate character tables are provided for the Hebrew block, the
Hebrew letters included in the Alphabetic Presentation Forms block,
and for other miscellaneous characters that are used in `<hebr>` text
runs:

  - [Hebrew character table](character-tables/character-tables-hebrew.md#hebrew-character-table)
  - [Alphabetic Presentation Forms (Hebrew) character table](character-tables/character-tables-hebrew.md#alphabetic-presentation-forms-character-table)
  - [Miscellaneous character table](character-tables/character-tables-hebrew.md#miscellaneous-character-table)


The tables list each codepoint along with its Unicode general
category. For marks, the table lists the codepoint's mark combining
class. The codepoint's Unicode name and an example glyph are also provided.

For example:

:::{table} Example character table

| Codepoint | Unicode category | Mark class | Glyph                        |
|:----------|:-----------------|:-----------|:-----------------------------|
|`U+05D0`   | Letter           | _0_        | &#x05D0; Alef                |
| | | | | |
|`U+05C1`   | Mark [Mn]        | 24         | &#x05C1; Point Shin Dot      |
:::


Codepoints with no assigned meaning are
designated as _unassigned_ in the _Unicode category_ column. 


<!--- Character table example and explanation --->

Other important characters that may be encountered when shaping runs
of Hebrew text include the dotted-circle placeholder (`U+25CC`), the
combining grapheme joiner (`U+034F`), the zero-width joiner (`U+200D`)
and zero-width non-joiner (`U+200C`), the left-to-right text marker
(`U+200E`) and right-to-left text marker (`U+200F`), and the no-break
space (`U+00A0`).

The dotted-circle placeholder is frequently used when displaying a
vowel or diacritical mark in isolation. Real-world text documents may
also use other characters, such as hyphens or dashes, in a similar
placeholder fashion; shaping engines should cope with this situation
gracefully.

The combining grapheme joiner (<abbr>CGJ</abbr>), zero-width joiner (<abbr>ZWJ</abbr>), and
zero-width non-joiner (<abbr>ZWNJ</abbr>) may be used to alter the
order in which adjacent marks are positioned during the
mark-reordering stage, in order to adhere to the needs of a
non-default language orthography.
<!--- combining grapheme joiner explanation --->


<!--- Zero-Width Non Joiner explanation --->

The right-to-left mark (<abbr>RLM</abbr>) and left-to-right mark (<abbr>LRM</abbr>) are used by
the Unicode bidirectionality algorithm (BiDi) to indicate the points
in a text run at which the writing direction changes.


<!--- How shaping is affected by the <abbr title="Left-To-Right">LTR</abbr> and <abbr title="Right-To-Left">RTL</abbr> markers explanation --->


The no-break space may be used to display those codepoints that
are defined as non-spacing (such as niqqud or cantillation marks) in an
isolated context, as an alternative to displaying them superimposed on
the dotted-circle placeholder.


## The `<hebr>` shaping model ##

Processing a run of `<hebr>` text involves seven top-level stages:

1. Compound character composition and decomposition
2. Composing any Alphabetic Presentation forms
3. Applying the language-form substitution features from <abbr>GSUB</abbr>
4. Applying the typographic-form substitution features from <abbr>GSUB</abbr>
5. Applying the positioning features from <abbr>GPOS</abbr>


### Stage 1: Compound character composition and decomposition ###

In this stage, the `ccmp` feature from <abbr title="Glyph Positioning table">GPOS</abbr> is applied and the
resulting sequence of codepoints should be checked for correct mark
order. 

> Note: Shaping engines may have already applied Unicode normalization
> compose or decompose codepoints before beginning the shaping
> process. Due to the Alphabetic Presentation Forms composition in
> stage two, however, the `ccmp` feature and any necessary mark
> reordering must be performed here, as Alphabetic Presentation Forms
> are not handled by Unicode normalization.


#### Stage 1, step 1: ccmp

The `ccmp` feature allows a font to substitute

 - mark-and-base sequences with a pre-composed glyph including both
   the mark and the base (as is done in with a ligature substitution)
 - individual compound glyphs with the equivalent sequence of
   decomposed glyphs
 
If present, these composition and decomposition substitutions must be
performed before applying any other <abbr title="Glyph Substitution table">GSUB</abbr> or <abbr title="Glyph Positioning table">GPOS</abbr> lookups, because
those lookups may be written to match only the `ccmp`-substituted
glyphs. 

:::{figure-md}
![ccmp composition](/images/hebrew/hebrew-ccmp.svg "ccmp composition"){.shaping-demo .inline-svg .greyscale-svg #hebrew-ccmp}

ccmp composition
:::

```{svg-color-toggle-button} hebrew-ccmp
```


#### Stage 1, step 2: Mark reordering

Sequences of adjacent marks must be reordered so that they appear in
canonical order before the mark-to-base and mark-to-mark positioning
features from <abbr title="Glyph Positioning table">GPOS</abbr> can be correctly applied.

For `<hebr>` text runs, normalizing the sequence of marks using the
Unicode _canonical combining class_ of each mark should be sufficient.


### Stage 2: Composing any Alphabetic Presentation forms ###

If the active font includes glyphs for precomposed mark-and-base
codepoints from the Alphabetic Presentation Forms block, these
precomposed glyphs should be preferred over sequences of individual
base glyphs and marks positioned with <abbr title="Glyph Positioning table">GPOS</abbr>.

The codepoints in question are not included in the canonical Unicode
compositions, so the shaping engine should substitute them at this
stage, before proceeding with the shaping process.

The individual base and mark sequences that should compose to each
precomposed Hebrew mark-and-base codepoint in the Alphabetic
Presentation Forms block is listed in _Composition_ column of the
[Alphabetic Presentation Forms character
table](character-tables/character-tables-hebrew.md#alphabetic-presentation-forms-character-table). 

For example: 

:::{table} Example character table for Alphabetic Presentation forms

| Codepoint | Unicode category | Mark class | Composition     | Glyph                                   |
|:----------|:-----------------|:-----------|:----------------|:----------------------------------------|
| `U+FB1D`  | Letter           | _0_        |`U+05D9`,`U+05B4`| &#xFB1D; Yod With Hiriq                 |
| | | | | |
| `U+FB2B`  | Letter           | _0_        |`U+05E9`,`U+05C2`| &#xFB2B; Shin With Sin Dot              |
:::


Two of the precomposed glyphs, "Shin With Dagesh And Shin Dot"
(`U+FB2C`) and "Shin With Dagesh And Sin Dot" (`U+FB2D`), have
multiple possible composing sequences. All of the other precomposed
glyphs in the block have a single composing sequence.

> Note: the active font may implement these compositions in a `ccmp`
> lookup in <abbr title="Glyph Substitution table">GSUB</abbr>, in which case this stage will involve no additional work.

:::{figure-md}
![Alphabetic Presentation forms composition](/images/hebrew/hebrew-apf.svg "Alphabetic Presentation forms composition"){.shaping-demo .inline-svg .greyscale-svg #hebrew-apf}

Alphabetic Presentation forms composition
:::

```{svg-color-toggle-button} hebrew-apf
```


### Stage 3: Applying the language-form substitution features from <abbr>GSUB</abbr> ###

The language-substitution phase applies mandatory substitution
features using the rules in the font's <abbr title="Glyph Substitution table">GSUB</abbr> table. In preparation for
this stage, glyph sequences should be tagged for possible application 
of <abbr title="Glyph Substitution table">GSUB</abbr> features.

The order in which these substitutions must be performed is fixed for
all scripts implemented in the Hebrew shaping model:

	locl
	

#### Stage 3, step 1: locl ####

The `locl` feature replaces default glyphs with any language-specific
variants, based on examining the language setting of the text run.

> Note: Strictly speaking, the use of localized-form substitutions is
> not part of the shaping process, but of the localization process,
> and could take place at an earlier point while handling the text
> run. However, shaping engines are expected to complete the
> application of the `locl` feature before applying the subsequent
> <abbr title="Glyph Substitution table">GSUB</abbr> substitutions in the following steps.


### Stage 4: Applying the typographic-form substitution features from <abbr>GSUB</abbr> ###

The typographic-substitution phase applies optional substitution
features using the rules in the font's <abbr title="Glyph Substitution table">GSUB</abbr> table.

The order in which these substitutions must be performed is fixed for
all scripts implemented in the Hebrew shaping model:

	liga
	dlig
	

#### Stage 4, step 1: liga ####

The `liga` feature substitutes standard, optional ligatures that are on
by default. Substitutions made by `liga` may be disabled by
application-level user interfaces.

:::{figure-md}
![Standard ligature substitution](/images/hebrew/hebrew-liga.svg "Standard ligature substitution"){.shaping-demo .inline-svg .greyscale-svg #hebrew-liga}

Standard ligature substitution
:::

```{svg-color-toggle-button} hebrew-liga
```


#### Stage 4, step 2: dlig ####

The `dlig` feature substitutes additional optional ligatures that are
off by default. Substitutions made by `dlig` may be disabled by
application-level user interfaces.

:::{figure-md}
![Discretionary ligature substitution](/images/hebrew/hebrew-dlig.svg "Discretionary ligature substitution"){.shaping-demo .inline-svg .greyscale-svg #hebrew-dlig}

Discretionary ligature substitution
:::

```{svg-color-toggle-button} hebrew-dlig
```


### Stage 5: Applying the positioning features from <abbr>GPOS</abbr> ###

The positioning stage adjusts the positions of mark and base
glyphs.

The order in which these features are applied is fixed for
the Hebrew shaping model:

	kern
	mark


#### Stage 5, step 1: `kern` ####

The `kern` adjusts glyph spacing between pairs of adjacent glyphs.


:::{figure-md}
![Kerning application](/images/hebrew/hebrew-kern.svg "Kerning application"){.shaping-demo .inline-svg .greyscale-svg #hebrew-kern}

Kerning application
:::

```{svg-color-toggle-button} hebrew-kern
```

#### Stage 5, step 2: `mark` ####

The `mark` feature positions marks with respect to base glyphs.

:::{figure-md}
![Mark positioning](/images/hebrew/hebrew-mark.svg "Mark positioning"){.shaping-demo .inline-svg .greyscale-svg #hebrew-mark}

Mark positioning
:::

```{svg-color-toggle-button} hebrew-mark
```


================================================
FILE: opentype-shaping-indic-general.md
================================================
# Indic script shaping in OpenType #

This document outlines the general shaping procedure shared by all
Indic scripts, and defines the common pieces that script-specific
implementations share. 


**Contents**

  - [General information](#general-information)
  - [Terminology](#terminology)
  - [Glyph classification](#glyph-classification)
      - [Shaping classes](#shaping-classes)
	  - [Mark-placement subclasses](#mark-placement-subclasses)
      - [Character tables](#character-tables)
  - [The Indic2 shaping model](#the-indic2-shaping-model)
      - [Sort ordering](#sort-ordering)
      - [Script shaping characteristics](#script-shaping-characteristics)
      - [Stage 1: Identifying syllables and other sequences](#stage-1-identifying-syllables-and-other-sequences)
      - [Stage 2: Initial reordering](#stage-2-initial-reordering)
      - [Stage 3: Applying the basic substitution features from <abbr>GSUB</abbr>](#stage-3-applying-the-basic-substitution-features-from-gsub)
      - [Stage 4: Final reordering](#stage-4-final-reordering)
      - [Stage 5: Applying all remaining substitution features from <abbr>GSUB</abbr>](#stage-5-applying-all-remaining-substitution-features-from-gsub)
      - [Stage 6: Applying remaining positioning features from <abbr>GPOS</abbr>](#stage-6-applying-remaining-positioning-features-from-gpos)
  - [The old Indic shaping model](#the-old-indic-shaping-model)


## General information ##

The Indic family of scripts includes writing systems
derived from the Brahmi script in ancient India. Although the scripts
vary considerably in appearance, their shared ancestry means that they
also share a number of important features and rules. 

This makes it possible (though, of course, not mandatory) for a
shaping engine to implement a single shaping model that covers all of
the scripts. 

The largest (by number of readers) scripts in the Indic family are:

  - [Devanagari](opentype-shaping-devanagari.md)
  - [Bengali](opentype-shaping-bengali.md)
  - [Gujarati](opentype-shaping-gujarati.md)
  - [Gurmukhi](opentype-shaping-gurmukhi.md)
  - [Kannada](opentype-shaping-kannada.md)
  - [Malayalam](opentype-shaping-malayalam.md)
  - [Oriya](opentype-shaping-oriya.md)
  - [Tamil](opentype-shaping-tamil.md)
  - [Telugu](opentype-shaping-telugu.md)
  - [Sinhala](opentype-shaping-sinhala.md)

Text runs in Indic scripts may also include characters from the Vedic
Extensions block in Unicode. This is a set of marks and punctuation
needed to accurately transcribe ancient documents in Sanskrit.

Text runs in Indic scripts also make use of joiner, non-joiner, and
placeholder characters from other Unicode blocks, in order to specify
certain alternate shaping options.

There are two sets of Indic script tags defined in OpenType. Several
from the older set (`<deva>`, `<beng>`, `<gujr>`, `<guru>`, `<knda>`,
`<mlym>`, `<orya>`, `<taml>`, and `<telu>`) were deprecated and
replaced in 2005.

The new set of replacement tags for these scripts (`<dev2>`, `<bng2>`,
`<gjr2>`, `<gur2>`, `<knd2>`, `<mlm2>`, `<ory2>`, `<tml2>`, and
`<tel2>`) was devised to overcome shortcomings found in the original model. 
Therefore, new fonts should be engineered to work with the updated
shaping model. However, if a font is encountered that supports only
an older script tag, the shaping engine should deal with it gracefully.

The `<sinh>` tag, unlike the other Indic script tags,
was not deprecated in 2005 and is still used for Sinhala text.

> Note: There are several other scripts derived from the Brahmi script
> that are often treated separately and not bundled into the "Indic"
> category by shaping engines. This is because these other scripts
> evolved to have significantly distinct rules for syllable
> construction, reordering, and shaping.
>
> The scripts include Buginese, Balinese, Javanese,
> [Khmer](opentype-shaping-khmer.md),
> [Lao](opentype-shaping-thai-lao.md),
> [Myanmar](opentype-shaping-myanmar.md),
> [Thai](opentype-shaping-thai-lao.md), and
> [Tibetan](opentype-shaping-tibetan.md). 

## Terminology ##

OpenType shaping uses a standard set of terms for Indic scripts.  The
terms used colloquially in any particular language may vary, however,
potentially causing confusion.

**Matra** is the standard term for a dependent vowel sign. 

The term "matra" is also used to refer to the headline above letters
in scripts like Devanagari, Bengali, and Gurmukhi. To avoid ambiguity,
the term **headline** is used in most Unicode and OpenType shaping
documents.

**Halant** and **Virama** are both standard terms for the below-base
"vowel-killer" sign. Unicode documents use the term "virama" most
frequently, while OpenType documents use the term "halant" most
frequently.

**Chandrabindu** (or simply **Bindu**) is the standard term for the
diacritical mark indicating that the preceding vowel should be
nasalized. 

The term **base consonant** is also critical to Indic shaping. The
base consonant of a syllable is the consonant that carries the
syllable's vowel sound, either the inherent vowel (for an unmarked
base consonant) or a dependent vowel (with the addition of a matra).

A syllable's base consonant is generally rendered in its full form
(although it may form ligatures), while other consonants in the
syllable frequently take on secondary forms. Different <abbr title="Glyph Substitution table">GSUB</abbr>
substitutions may apply to a script's **pre-base** and **post-base**
consonants. Some of these substitutions create **above-base** or
**below-base** forms. The **Reph** form of the consonant "Ra" is an
example.

Syllables may also begin with an **independent vowel** instead of a
consonant. In these syllables, the independent vowel is rendered in
full-letter form, not as a matra, and the independent vowel serves as the
syllable base, similar to a base consonant.

Where possible, using the standard terminology is preferred, as the
use of a language-specific term necessitates choosing one language
over all of the others that share a common script.


## Glyph classification ##

Shaping Indic text depends on the shaping engine correctly classifying
each glyph in the run. The classifications must distinguish between
consonants, vowels (independent and dependent), numerals, punctuation,
and various types of diacritical mark. 

For most codepoints, the `General Category` property defined in the
Unicode standard is correct, but it is not sufficient to fully capture
the expected shaping behavior (such as glyph reordering). Therefore,
Indic glyphs must additionally be classified by how they are treated
when shaping a run of text.

### Shaping classes ###

The shaping classes listed in the tables that follow are defined so
that they capture the positioning rules used by Indic scripts. 

For most codepoints, the _Shaping class_ is synonymous with the `Indic
Syllabic Category` defined in Unicode. However, there are some
distinctions, where the defined category does not fully capture the
behavior of the character in the shaping process.

Several of the diacritic and syllable-modifying marks behave according
to their own rules and, thus, have a special class. These include
`BINDU`, `VISARGA`, `AVAGRAHA`, `NUKTA`, and `VIRAMA`. Some
less-common marks behave according to rules that are similar to these
common marks, and are therefore classified with the corresponding
common mark. 

Less common mark classes include `TONE_MARKER`, `CANTILLATION`,
`GEMINATION_MARK`, `PURE_KILLER`,  and `SYLLABLE_MODIFIER`. An
explanation of each class is included in the shaping documentation of
each script in which the class occurs.

Letters generally fall into the classes `CONSONANT`,
`VOWEL_INDEPENDENT`, and `VOWEL_DEPENDENT`. These classes help the
shaping engine parse and identify key positions in a syllable. For
example, Unicode categorizes dependent vowels as `Mark [Mn]`, but the
shaping engine must be able to distinguish between dependent vowels
and diacritical marks (some of which are also categorized as `Mark [Mn]`).

There are several subclasses of consonants that arise on occasion, such as
`CONSONANT_DEAD`, `CONSONANT_MEDIAL`, `CONSONANT_PLACEHOLDER`,
`CONSONANT_WITH_STACKER`, and `CONSONANT_PRE_REPHA`. 

These subclasses indicate that the letter should match simple
tests for consonants (as in the regular expressions used during
syllable identification), but the subclass may factor into
script-specific rules encountered in later shaping stages.

For example, `CONSONANT_DEAD` indicates that, unlike standard
consonants, the dead consonant carries no inherent vowel. This lack of
an inherent vowel means that the letter is likely not accompanied by a
`VIRAMA`; failure to recognize this distinction could trick a naive
parser into mis-identifying the letter as the base consonant of a
syllable during the base-consonant-identification step. 

Not every script features an instance of each consonant subclass. A
full explanation of each subclass's behavior is explained in the
relevant stage of each script's shaping documentation.


Other characters, such as symbols and miscellaneous letters (for
example, letter-like symbols that only occur as standalone entities
and do not occur within syllables), need no special attention from the
shaping engine, so they are not assigned a shaping class.

Numbers are classified as `NUMBER`, even though they evoke no special
behavior from the Indic shaping rules, because there are OpenType
features that might affect how the respective glyphs are drawn, such
as `tnum`, which specifies the usage of tabular-width numerals, and
`sups`, which replaces the default glyphs with superscript variants.

### Mark-placement subclasses ###

Marks and dependent vowels are further labeled with a mark-placement
subclass, which indicates where the glyph will be placed with respect
to the base character to which it is attached. 

The actual attachment position of these glyphs is determined by the
lookups found in the font's <abbr title="Glyph Positioning table">GPOS</abbr> table. However, the reordering rules for
Indic scripts require that the shaping engine be able to identify
marks by their general position. 

For example, left-side dependent vowels (matras), classified
with `LEFT_POSITION`, must frequently be reordered, with the final
position determined by whether or not other letters in the syllable
have formed ligatures or combined into conjunct forms. Therefore, the
`LEFT_POSITION` subclass of the character must be tracked throughout
the shaping process.

There are four basic _mark-placement subclasses_ for dependent vowels
(matras). Each corresponds to the visual position of the matra with
respect to the syllable base to which it is attached:

  - `LEFT_POSITION` matras are positioned to the left of the syllable base.
  - `RIGHT_POSITION` matras are positioned to the right of the syllable base.
  - `TOP_POSITION` matras are positioned above the syllable base.
  - `BOTTOM_POSITION` matras are positioned below syllable base.
  
These positions may also be referred to elsewhere in shaping documents as:

  - _Pre-base_ matras
  - _Post-base_ matras
  - _Above-base_ matras
  - _Below-base_ matras
  
respectively. The `LEFT`, `RIGHT`, `TOP`, and `BOTTOM` designations
corresponds to Unicode's preferred terminology. The _Pre_, _Post_,
_Above_, and _Below_ terminology is used in the official descriptions
of OpenType <abbr title="Glyph Substitution table">GSUB</abbr> and <abbr title="Glyph Positioning table">GPOS</abbr> features. Shaping engines may, internally,
use whichever terminology is preferred.

In addition, dependent-vowel codepoints that are composed of multiple
components will be designated in character tables as having a compound
_mark-placement subclass_, such as `TOP_AND_RIGHT` or `LEFT_AND_RIGHT`. 

However, these multi-part matras are decomposed into separate matra
components during the shaping process. After the decomposition, each
matra component will belong to exactly one of the four basic
_mark-placement subclasses_.

For most mark and dependent-vowel codepoints, the _mark-placement
subclass_ is synonymous with the `Indic Positional Category` defined
in Unicode. However, there are some distinctions, where the defined
category does not fully capture the behavior of the character in the
shaping process. 

### Character tables ###

Character tables for all of the scripts, plus the Vedic Extensions and
important miscellaneous characters, are available here:

  - [Devanagari](character-tables/character-tables-devanagari.md) (Including Devanagari Extended)
  - [Bengali](character-tables/character-tables-bengali.md)
  - [Gujarati](character-tables/character-tables-gujarati.md)
  - [Gurmukhi](character-tables/character-tables-gurmukhi.md)
  - [Kannada](character-tables/character-tables-kannada.md)
  - [Malayalam](character-tables/character-tables-malayalam.md)
  - [Oriya](character-tables/character-tables-oriya.md)
  - [Tamil](character-tables/character-tables-tamil.md)
  - [Telugu](character-tables/character-tables-telugu.md)
  - [Sinhala](character-tables/character-tables-sinhala.md)


#### Special-function codepoints ####

Other important characters that may be encountered when shaping runs
of Indic-script text include the dotted-circle placeholder (`U+25CC`), the
zero-width joiner (`U+200D`) and zero-width non-joiner (`U+200C`), and
the no-break space (`U+00A0`).

Each of these is of particular importance to shaping engines, because
these codepoints interact with the shaping engine, the text run, and
the active font, either to mediate non-default shaping behavior or to
relay information about the current shaping process.

The dotted-circle placeholder is frequently used when displaying a
dependent vowel (matra) or a combining mark in isolation. Real-world
text syllables may also use other characters, such as hyphens or dashes,
in a similar placeholder fashion; shaping engines should cope with
this situation gracefully.

Dotted-circle placeholder characters (like any Unicode codepoint) can
appear anywhere in text input sequences and should be rendered
normally. <abbr title="Glyph Positioning table">GPOS</abbr> positioning lookups should attach mark glyphs to dotted
circles as they would to other non-mark characters. As visible glyphs,
dotted circles can also be involved in <abbr title="Glyph Substitution table">GSUB</abbr> substitutions.

In addition to the default input-text handling process, shaping
engines may also insert dotted-circle placeholders into the text
sequence. Dotted-circle insertions are required when a non-spacing
mark or dependent sign is formed with no base character present.

This requirement covers:

  - Dependent signs that are assigned their own individual Unicode
    codepoints (such as most dependent-vowel marks or matras)
  
  - Dependent signs that are formed only by specific sequences of
    other codepoints (such as <samp>"Reph"</samp>)


The zero-width joiner (<abbr>ZWJ</abbr>) is primarily used to prevent the formation
of a conjunct from a <samp>"_Consonant_,Halant,_Consonant_"</samp> sequence. 

  - The sequence <samp>"_Consonant_,Halant,ZWJ,_Consonant_"</samp> blocks the
    formation of a conjunct between the two consonants. 

Note, however, that the <samp>"_Consonant_,Halant"</samp> subsequence in the above
example may still trigger a half-forms feature. To prevent the
application of the half-forms feature in addition to preventing the
conjunct, the zero-width non-joiner (<abbr>ZWNJ</abbr>) must be used instead.

  - The sequence <samp>"_Consonant_,Halant,ZWNJ,_Consonant_"</samp> should produce
    the first consonant in its standard form, followed by an explicit
    <samp>"Halant"</samp>.

A secondary usage of the zero-width joiner is to prevent the formation of
<samp>"Reph"</samp> in those scripts that use an implicit sequence to request a
<samp>"Reph"</samp> form.

  - An initial <samp>"Ra,Halant,ZWJ"</samp> sequence should not produce a <samp>"Reph"</samp>,
    even where an initial <samp>"Ra,Halant"</samp> sequence without the zero-width
    joiner would otherwise produce a <samp>"Reph"</samp>.

> Note: this particular usage of <abbr title="Zero-Width Joiner">ZWJ</abbr> may not apply to scripts that
> feature an explicit <samp>"Reph"</samp> codepoint or an explicit sequence for
> requesting <samp>"Reph"</samp>. See the script-specific shaping documents for
> full details.

The <abbr title="Zero-Width Joiner">ZWJ</abbr> and <abbr title="Zero-Width Non Joiner">ZWNJ</abbr> characters are, by definition, non-printing control
characters and have the _Default_Ignorable_ property in the Unicode
Character Database. In standard text-display scenarios, their function
is to signal a request from the user to the shaping engine for some
particular non-default behavior. As such, they are not rendered
visually.

> Note: Naturally, there are special circumstances where a user or
> document might need to request that a <abbr title="Zero-Width Joiner">ZWJ</abbr> or <abbr title="Zero-Width Non Joiner">ZWNJ</abbr> be rendered
> visually, such as when illustrating the OpenType shaping process, or
> displaying Unicode tables.

Because the <abbr title="Zero-Width Joiner">ZWJ</abbr> and <abbr title="Zero-Width Non Joiner">ZWNJ</abbr> are non-printing control characters, they can
be ignored by any portion of a software text-handling stack not
involved in the shaping operations that the <abbr title="Zero-Width Joiner">ZWJ</abbr> and <abbr title="Zero-Width Non Joiner">ZWNJ</abbr> are designed
to interface with. For example, spell-checking or collation functions
will typically ignore <abbr title="Zero-Width Joiner">ZWJ</abbr> and <abbr title="Zero-Width Non Joiner">ZWNJ</abbr>.

Similarly, the <abbr title="Zero-Width Joiner">ZWJ</abbr> and <abbr title="Zero-Width Non Joiner">ZWNJ</abbr> should be ignored by the shaping engine
when matching sequences of codepoints against the backtrack and
lookahead sequences of a font's <abbr title="Glyph Substitution table">GSUB</abbr> or <abbr title="Glyph Positioning table">GPOS</abbr> lookups.

For example:

  - A lookup that substitutes an alternate version of a
    dependent-vowel (matra) glyph when it is preceded by <samp>"Ka,Halant,Tta"</samp>
    should still be applied if the dependent-vowel codepoint is preceded
    by <samp>"Ka,Halant,ZWJ,Tta"</samp> in the text run.

The no-break space (<abbr>NBSP</abbr>) is primarily used to display those
codepoints that are defined as non-spacing (marks, dependent vowels
(matras), below-base consonant forms, and post-base consonant forms)
in an isolated context, as an alternative to displaying them
superimposed on the dotted-circle placeholder. These sequences will
match <samp>"NBSP,ZWJ,Halant,_Consonant_"</samp>, <samp>"NBSP,_mark_"</samp>, or <samp>"NBSP,_matra_"</samp>.

In addition to general punctuation, runs of text in several of the
supported scripts often use the danda (`U+0964`) and double danda
(`U+0965`) punctuation marks from the Devanagari block.

	
## The Indic2 shaping model ##

Processing a run of text in any of the modern Indic script tags
involves six top-level stages: 

1. Identifying syllables and other sequences
2. Initial reordering
3. Applying the basic substitution features from <abbr>GSUB</abbr>
4. Final reordering
5. Applying all remaining substitution features from <abbr>GSUB</abbr>
6. Applying all remaining positioning features from <abbr>GPOS</abbr>


The initial reordering and final reordering stages each involve a set
of script-specific rules that dictate how characters are reordered
from their sequence in the input stream into the correct ordering for
shaping rules to apply.

Specifically, certain consonants in each script are repositioned from
their logical position (that is, their position in the input
stream). The most common example is <samp>"Ra"</samp>, which is frequently
converted into a combining mark-like form. 

The resulting mark must be correctly positioned by attaching it to the
correct base character using the active font's `mark` lookup from
<abbr title="Glyph Positioning table">GPOS</abbr>. Therefore, the mark form of the <samp>"Ra"</samp> must be moved so that it
is adjacent to the correct base character. Which character in a
syllable is the correct base character differs from script to script,
and may involve several context-sensitive tests.

Similarly, certain other consonants in each script also take on
distinct forms that require reordering so that `mark` positioning
and other lookups function correctly. Dependent vowels (matras) may
also need to be reordered so that they are adjacent to the correct
consonant. These functions, too, involve script-specific rule sets.

Because of the script-specific rules involved, it is mandatory that
the basic substitution features in stage three be applied in the order
specified. 

The remaining substitution features in stage five and the positioning
features in stage six, however, do not have a mandatory order.

### Sort ordering ###

A single, canonical sequence of ordering positions exists that
captures all of the possible positions in an Indic syllable. 

Not every position is used in every script and not every syllable will
contain a character in every position. Whenever characters in a
syllable are reordered during the shaping process, 

	POS_RA_TO_BECOME_REPH
	POS_PREBASE_MATRA
	POS_PREBASE_CONSONANT

	POS_SYLLABLE_BASE
	POS_AFTER_MAIN

	POS_ABOVEBASE_CONSONANT

	POS_BEFORE_SUBJOINED
	POS_BELOWBASE_CONSONANT
	POS_AFTER_SUBJOINED

	POS_BEFORE_POST
	POS_POSTBASE_CONSONANT
	POS_AFTER_POST

	POS_FINAL_CONSONANT
	POS_SMVD

Not every position is used in every script; the sequence merely
describes all of the possible positions at which a character in an
Indic syllable can exist. Using the same sequence for all scripts
could reduce an implementation's code size and complexity.

The basic positions (left to right) are <samp>"Reph"</samp> (`POS_RA_TO_BECOME_REPH`), dependent
vowels (matras) and consonants positioned before the base
consonant or syllable base (`POS_PREBASE_MATRA` and `POS_PREBASE_CONSONANT`), the base
consonant or syllable base (`POS_SYLLABLE_BASE`), above-base consonants
(`POS_ABOVEBASE_CONSONANT`), below-base consonants
(`POS_BELOWBASE_CONSONANT`), consonants positioned after the base consonant or syllable base
(`POS_POSTBASE_CONSONANT`), syllable-final consonants (`POS_FINAL_CONSONANT`),
and syllable-modifying or Vedic signs (`POS_SMVD`).

In addition, several secondary positions are defined to handle various
reordering rules that deal with relative, rather than absolute,
positioning. `POS_AFTER_MAIN` means that a character must be
positioned immediately after the base consonant or syllable base. `POS_BEFORE_SUBJOINED`
and `POS_AFTER_SUBJOINED` mean that a character must be positioned
before or after any below-base consonants, respectively. Similarly,
`POS_BEFORE_POST` and `POS_AFTER_POST` mean that a character must be
positioned before or after any post-base consonants, respectively. 

For shaping-engine implementers, the names used for the ordering
positions matter only in that they are unambiguous. 

The description of the general shaping process that follows will note
when a character needs to be marked for reordering into some of these
positions. The specifics for each script provide additional details,
especially for ordering positions that are only used in that script.


### Script shaping characteristics ###

Indic scripts follow many of the same shaping patterns, but they
differ in a few critical characteristics that the shaping engine must
track. These include:

  - The rules that determine the base consonant in a syllable.
  
  - The final position of <samp>"Reph"</samp>.
  
  - How <samp>"Reph"</samp> is encoded or requested in a syllable.
	
  - Whether the below-base forms feature is applied only to consonants
    after the base consonant or syllable base, or to consonants before the base
    consonant and those after the base consonant or syllable base.
	
  - The ordering positions for dependent vowels
    (matras). Specifically, the ordering for left-side, right-side, 
    above-base, and below-base matras follow different rules. The
    rules employed vary between scripts, except for left-side matras,
    where all Indic scripts follow the same rule. 

In the lists that follow, the options for each characteristic are
mutually exclusive, and they are exhaustive for the set of Indic
scripts [listed](#general-information) at the beginning of this
document (Devanagari, Bengali, Gujarati, Gurmukhi, Kannada, Malayalam,
Oriya, Tamil, Telugu, and Sinhala).

Implementers who wish to cover additional scripts using the same
method would first need to determine whether any additional options
are relevant for each characteristic.

#### Base consonant ####

Locating the base consonant of a syllable generally requires parsing
the syllable to catch and exclude certain special-treatment consonants
(such as <samp>"Ra"</samp>s that will form <samp>"Reph"</samp>s or consonants that take on
below-base forms). However, each script has a general base-consonant
position that determines the appropriate search method. The base
consonant may be, generally:

  - The first consonant. This is designated `BASE_POS_FIRST`. This is
    the simplest base-consonant rule. After eliminating any initial
    <samp>"Repha"</samp>s from consideration, the first consonant is always the
    base consonant, without exception.
  
  - The last consonant, not counting any special forms. This is
    designated `BASE_POS_LAST`. This is the most complicated
    base-consonant rule, because the type and variety of special forms
    vary considerably between scripts. 
	
	The `BASE_POS_LAST` search algorithm (described in each script's
    shaping document) accounts for these special forms in every
    script. The abundance of special forms in certain scripts may
    routinely cause the search algorithm to identify a base consonant
    that is not logically last in the syllable. This is expected
    behavior.
	
	This base-consonant position is used in Devanagari, Bengali,
	Gujarati, Gurmukhi, Kannada, Malayalam, Oriya, Tamil, and Telugu.
  
  - The last consonant that is not preceded by a <samp>"ZWJ"</samp> (zero width
    joiner) character. 
	
	This position is only used in Sinhala, and is designated
    `BASE_POS_LAST_SINHALA`.

The scripts currently described in the "Indic" script group  and their
corresponding base-consonant rules are summarized in the following
table:

:::{table} Base-consonant rules by script

| Script     | Base-consonant rule    |
|:-----------|:-----------------------|
| Devanagari | `BASE_POS_LAST`        |
| Bengali    | `BASE_POS_LAST`        |
| Gujarati   | `BASE_POS_LAST`        |
| Gurmukhi   | `BASE_POS_LAST`        |
| Kannada    | `BASE_POS_LAST`        |
| Malayalam  | `BASE_POS_LAST`        |
| Oriya      | `BASE_POS_LAST`        |
| Tamil      | `BASE_POS_LAST`        |
| Telugu     | `BASE_POS_LAST`        |
| Sinhala    | `BASE_POS_LAST_SINHALA`|
:::


> Note: None of the specific scripts currently included in the "Indic"
> script group as it is enumerated in this document make use of the
> `BASE_POS_FIRST` base-consonant rule. However, the `BASE_POS_FIRST`
> rule is employed by several Brahmi-derived scripts also used in the
> region, including both [Myanmar](opentype-shaping-myanmar.md) and
> [Khmer](opentype-shaping-khmer.md). 
>
> Because these scripts share many other characteristics and
> conventions with the Indic group described by this document,
> `BASE_POS_FIRST` is included here for comparison. 

> Note: The `BASE_POS_LAST` search algorithm is used for Kannada and
> Telugu, although the unique properties of the Kannada and Telugu
> orthographies usually result in the search terminating at the first
> non-<samp>"Reph"</samp> consonant in a syllable. Namely, all consonants in
> Kannada and Telugu have a post-base form. 
>
> This is the expected behavior for Kannada and Telugu, and still
> differs from the `BASE_POS_FIRST` rule as used in the Brahmi-derived 
> scripts mentioned above. See those individual script pages for
> further detail.


#### Reph position ####

<samp>"Reph"</samp> may be positioned:

  - at the beginning of the syllable, in the ordering position
    `POS_RA_TO_BECOME_REPH`.
	
  - immediately before the first subjoined (below-base) consonant, in
    the ordering position `POS_BEFORE_SUBJOINED`.
	
  - immediately after the base consonant or syllable base, in the ordering position `POS_AFTER_MAIN`.
	
  - immediately after the last subjoined (below-base) consonant, in
    the ordering position `POS_AFTER_SUBJOINED`.

  - immediately before the last post-base consonant, in the ordering
    position `POS_BEFORE_POST`.
	
  - immediately after the last post-base consonant, in the ordering
    position `POS_AFTER_POST`.

The scripts currently described in the "Indic" script group  and their
corresponding Reph-position rules are summarized in the following
table:

:::{table} Reph-position rules by script

| Script     | Reph-position rule         |
|:-----------|:---------------------------|
| Devanagari | `REPH_POS_BEFORE_POST`     |
| Bengali    | `REPH_POS_AFTER_SUBJOINED` |
| Gujarati   | `REPH_POS_BEFORE_POST`     |
| Gurmukhi   | `REPH_POS_BEFORE_SUBJOINED`|
| Kannada    | `REPH_POS_AFTER_POST`      |
| Malayalam  | `REPH_POS_AFTER_MAIN`      |
| Oriya      | `REPH_POS_AFTER_MAIN`      |
| Tamil      | `REPH_POS_AFTER_POST`      |
| Telugu     | `REPH_POS_AFTER_POST`      |
| Sinhala    | `REPH_POS_AFTER_POST`      |
:::


#### Reph encoding ####

<samp>"Reph"</samp> may be:

  - requested explicitly, using the sequence <samp>"Ra,Halant,ZWJ"</samp>. This is
    designated `REPH_MODE_EXPLICIT`.
  
  - Formed implicitly by the sequence <samp>"Ra,Halant"</samp> when used in certain positions
    in a syllable. This is designated `REPH_MODE_IMPLICIT`. Because a
    <samp>"Ra,Halant"</samp> does _not_ form a <samp>"Reph"</samp> in _every_ position in a
    syllable, script-specific tests are required.

  - encoded as a separate codepoint. This codepoint is generally
    called <samp>"Repha"</samp>, which distinguishes it from the <samp>"Reph"</samp>s formed by
    other sequences. A <samp>"Repha"</samp> may need reordering based on script
    specific rules, in which case `REPH_MODE_LOGICAL_REPHA` is
    used. Alternatively, the script may not reorder <samp>"Repha"</samp>s at all,
    in which case `REPH_MODE_VISUAL_REPHA` is used.


The scripts currently described in the "Indic" script group  and their
corresponding Reph-encoding rules are summarized in the following
table:

:::{table} Reph-encoding rules by script

| Script     | Reph-encoding rule         |
|:-----------|:---------------------------|
| Devanagari | `REPH_MODE_IMPLICIT`       |
| Bengali    | `REPH_MODE_IMPLICIT`       |
| Gujarati   | `REPH_MODE_IMPLICIT`       |
| Gurmukhi   | `REPH_MODE_IMPLICIT`       |
| Kannada    | `REPH_MODE_IMPLICIT`       |
| Malayalam  | `REPH_MODE_LOGICAL_REPHA`  |
| Oriya      | `REPH_MODE_IMPLICIT`       |
| Tamil      | `REPH_MODE_IMPLICIT`       |
| Telugu     | `REPH_MODE_EXPLICIT`       |
| Sinhala    | `REPH_MODE_EXPLICIT`       |
:::


> Note: None of the specific scripts currently included in the "Indic"
> group as it is enumerated in this document make use of the
> `REPH_MODE_VISUAL_REPHA` encoding. However, `REPH_MODE_VISUAL_REPHA`
> is used in the [Khmer](opentype-shaping-khmer.md) script. 
>
> Because Khmer shares many other characteristics and
> conventions with the Indic group described by this document,
> `REPH_MODE_VISUAL_REPHA` is included here for comparison. 


#### Below-base forms ####

Below-base consonant forms (the `blwf` feature) may be applied:

  - Only to consonants after the base consonant or syllable base. This is designated
    `BLWF_MODE_POST_ONLY`.
	
  - To consonants occurring before or after the base consonant or syllable base. This is
    designated `BLWF_MODE_PRE_AND_POST`.


The scripts currently described in the "Indic" script group  and their
corresponding below-base–forms rules are summarized in the following
table:

:::{table} Below-base–forms rules by script

| Script     | Below-base–forms rule    |
|:-----------|:-------------------------|
| Devanagari | `BLWF_MODE_PRE_AND_POST` |
| Bengali    | `BLWF_MODE_PRE_AND_POST` |
| Gujarati   | `BLWF_MODE_PRE_AND_POST` |
| Gurmukhi   | `BLWF_MODE_PRE_AND_POST` |
| Kannada    | `BLWF_MODE_POST_ONLY`    |
| Malayalam  | `BLWF_MODE_PRE_AND_POST` |
| Oriya      | `BLWF_MODE_PRE_AND_POST` |
| Tamil      | `BLWF_MODE_PRE_AND_POST` |
| Telugu     | `BLWF_MODE_POST_ONLY`    |
| Sinhala    | `BLWF_MODE_PRE_AND_POST` |
:::


#### Left-side matras ####

All Indic scripts position left-side matras in the same
manner, in the ordering position `POS_PREBASE_MATRA`.

#### Right-side matras ####

Right-side matras may be positioned:

  - immediately before the first subjoined (below-base) consonant, in
    the ordering position `POS_BEFORE_SUBJOINED`.
	
  - immediately after the last subjoined (below-base) consonant, in
    the ordering position `POS_AFTER_SUBJOINED`.

  - immediately after the last post-base consonant, in the ordering
    position `POS_AFTER_POST`.


The scripts currently described in the "Indic" script group  and their
corresponding right-side–matra positions are summarized in the following
table:

:::{table} Right-side–matra positions by script

| Script     | Right-side–matra position |
|:-----------|:--------------------------|
| Devanagari | `POS_AFTER_SUBJOINED`     |
| Bengali    | `POS_AFTER_POST`          |
| Gujarati   | `POS_AFTER_POST`          |
| Gurmukhi   | `POS_AFTER_POST`          |
| Kannada    | _varies_                  |
| Malayalam  | `POS_AFTER_POST`          |
| Oriya      | `POS_AFTER_POST`          |
| Tamil      | `POS_AFTER_POST`          |
| Telugu     | _varies_                  |
| Sinhala    | `POS_AFTER_SUBJOINED`     |
:::


> Note: In most scripts, all right-side matras are positioned in the
> same sort-order position. The Kannada and Telugu scripts, however,
> feature more complex positioning rules for right-side matras, in
> which different right-side matras must be sorted into different
> positions. See the script-specific shaping documents for full
> details.


#### Above-base matras ####

Above-base matras may be positioned:

  - immediately before the first subjoined (below-base) consonant, in
    the ordering position `POS_BEFORE_SUBJOINED`.
	
  - immediately after the base consonant or syllable base, in the ordering position `POS_AFTER_MAIN`.
	
  - immediately after the last subjoined (below-base) consonant, in
    the ordering position `POS_AFTER_SUBJOINED`.

  - immediately after the last post-base consonant, in the ordering
    position `POS_AFTER_POST`.


The scripts currently described in the "Indic" script group  and their
corresponding above-base–matra positions are summarized in the following
table:

:::{table} Above-base–matra positions by script

| Script     | Above-base–matra position |
|:-----------|:--------------------------|
| Devanagari | `POS_AFTER_SUBJOINED`     |
| Bengali    | _null_                    |
| Gujarati   | `POS_AFTER_SUBJOINED`     |
| Gurmukhi   | `POS_AFTER_POST`          |
| Kannada    | `POS_BEFORE_SUBJOINED`    |
| Malayalam  | _null_                    |
| Oriya      | `POS_AFTER_MAIN`          |
| Tamil      | `POS_AFTER_SUBJOINED`     |
| Telugu     | `POS_BEFORE_SUBJOINED`    |
| Sinhala    | `POS_AFTER_SUBJOINED`     |
:::


#### Below-base matras ####

Below-base matras may be positioned:

  - immediately before the first subjoined (below-base) consonant, in
    the ordering position `POS_BEFORE_SUBJOINED`.
	
  - immediately after the last subjoined (below-base) consonant, in
    the ordering position `POS_AFTER_SUBJOINED`.

  - immediately after the last post-base consonant, in the ordering
    position `POS_AFTER_POST`.


The scripts currently described in the "Indic" script group  and their
corresponding below-base–matra positions are summarized in the following
table:

:::{table} Below-base–matra positions by script

| Script     | Below-base–matra position |
|:-----------|:--------------------------|
| Devanagari | `POS_AFTER_SUBJOINED`     |
| Bengali    | `POS_AFTER_SUBJOINED`     |
| Gujarati   | `POS_AFTER_POST`          |
| Gurmukhi   | `POS_AFTER_POST`          |
| Kannada    | `POS_BEFORE_SUBJOINED`    |
| Malayalam  | `POS_AFTER_POST`          |
| Oriya      | `POS_AFTER_SUBJOINED`     |
| Tamil      | `POS_AFTER_POST`          |
| Telugu     | `POS_BEFORE_SUBJOINED`    |
| Sinhala    | `POS_AFTER_SUBJOINED`     |
:::


### Stage 1: Identifying syllables and other sequences ###

A syllable in an Indic script consists of a valid orthographic sequence
that may be followed by a "tail" of modifier signs. 

The Nukta, Halant/Virama, and Anudatta marks can affect syllable
identification. All other signs are regarded as syllable modifier
signs, including those from the Vedic Extensions block.

Generally speaking, each syllable contains exactly one vowel
sound. Valid syllables may begin with either a consonant or an
independent vowel.

> Note: A consonant that is not accompanied by a dependent vowel (matra) sign
> carries the script's inherent vowel sound. This vowel sound is changed
> by a dependent vowel (matra) sign following the consonant.

Valid consonant-based syllables may include one or more additional 
consonants that precede the base consonant. Each of these
other, pre-base consonants will be followed by the <samp>"Halant"</samp> mark, which
indicates that they carry no vowel. They affect pronunciation by
combining with the base consonant (e.g., "_str_", "_pl_") but they
do not add a vowel sound.

Some Indic scripts also include special consonants that can occur after the
base consonant or syllable base. These post-base consonants and final consonants will
also be separated from the base consonant or syllable base by a <samp>"Halant"</samp> mark; the
algorithm for correctly identifying the base consonant includes a test
to recognize these sequences and not mis-identify the base consonant.

In Indic scripts, the consonant <samp>"Ra"</samp> receives special treatment; in
many circumstances it is replaced by one of two combining mark-like forms. 

  - A <samp>"Ra,Halant"</samp> or <samp>"Ra,Halant,ZWJ"</samp> sequence at the beginning of a
    syllable may be replaced with an above-base mark called <samp>"Reph"</samp>
    (although script-specifics rules may negate this replacement if
    the <samp>"Ra"</samp> is the only consonant in the syllable). 

  - <samp>"Halant,Ra"</samp> sequences that occur elsewhere in the syllable may
    take on a below-base form (called <samp>"Rakaar"</samp> in Devanagari and most
    other scripts, and called <samp>"Raphala"</samp> in Bengali).

In addition, some scripts reorder post-base <samp>"Ra"</samp>s to a pre-base
position. These re-ordering <samp>"Ra"</samp>s may take on a different form, but
they are letter-like rather than mark-like forms.

<samp>"Reph"</samp>, <samp>"Rakaar"</samp>, <samp>"Raphala"</samp>, and reordering <samp>"Ra"</samp> characters must be
reordered after the syllable-identification stage is complete. 

> Note: Generally speaking, OpenType fonts will implement support for
> any below-base, post-base, and pre-base-reordering consonant forms
> by including the necessary substitution rules in their `blwf`,
> `pstf`, and `pref` lookups in <abbr title="Glyph Substitution table">GSUB</abbr>.
>
> Consequently, whenever shaping engines need to determine whether or 
> not a given consonant can take on such a special form, the most
> appropriate test is to check if the consonant is included in the
> relevant <abbr title="Glyph Substitution table">GSUB</abbr> lookup. Other implementations are possible, such as
> maintaining static tables of consonants, but checking for <abbr title="Glyph Substitution table">GSUB</abbr>
> support ensures that the expected behavior is implemented in the
> active font, and is therefore the most reliable approach.


In addition to valid syllables, standalone sequences may occur, such
as when an isolated codepoint is shown in example text.

> Note: Foreign loanwords, when written in Indic scripts, may
> not adhere to the syllable-formation rules described above. In
> particular, it is not uncommon to encounter foreign loanwords that
> contain a word-final suffix of consonants.
>
> Nevertheless, such word-final suffixes will be correctly matched by
> the regular expressions listed below. These loanwords are pronounced
> different, which raises issues for potential readers, but the
> character sequences do not affect the shaping process.


Syllables should be identified by examining the run and matching
glyphs, based on their shaping class, using regular expressions. 

The following general-purpose regular expressions can be
used to match Indic syllables. 

The regular expressions utilize the shaping classes from the tables
above. For the purpose of syllable identification, more general
classes can be used, as defined in the following table. This
simplifies the resulting expressions. 

```markdown
_ra_		= The consonant "Ra" 
_consonant_	= ( `CONSONANT` | `CONSONANT_DEAD` ) - _ra_
_vowel_		= `VOWEL_INDEPENDENT`
_nukta_	  	= `NUKTA`
_halant_	= `VIRAMA`
_zwj_		= `JOINER`
_zwnj_		= `NON_JOINER`
_matra_		= `VOWEL_DEPENDENT` | `PURE_KILLER`
_syllablemodifier_	= `SYLLABLE_MODIFIER` | `BINDU` | `VISARGA` | `GEMINATION_MARK`
_vedicsign_	= `CANTILLATION`
_placeholder_	= `PLACEHOLDER` | `CONSONANT_PLACEHOLDER`| `NUMBER` 
_dottedcircle_	= `DOTTED_CIRCLE`
_repha_		= `CONSONANT_PRE_REPHA`
_consonantmedial_	= `CONSONANT_MEDIAL`
_symbol_	= `SYMBOL` | `AVAGRAHA`
_consonantwithstacker_	= `CONSONANT_WITH_STACKER`
_other_		= `OTHER` | `MODIFYING_LETTER`
```


> Note: the _ra_ identification class is mutually exclusive with 
> the _consonant_ class. The union of the _consonant_ and _ra_ classes
> is used in the regular expression elements below in order to
> correctly identify <samp>"Ra"</samp> characters that do not trigger <samp>"Reph"</samp> or
> <samp>"Rakaar"</samp> shaping behavior.
>
> Note, also, that the cantillation mark "combining Ra" in the
> Devanagari Extended block does _not_ belong to the _ra_
> identification class, and that the other "combining consonant"
> cantillation marks in the Devanagari Extended block do not belong to
> the _consonant_ identification class.

> Note: The _placeholder_ identification class includes codepoints
> that are often used in place of vowels or consonants when a document
> needs to display a matra, mark, or special form in isolation or
> in another context beyond a standard syllable. Examples of
> _placeholder_ codepoints include hyphens and non-breaking
> spaces. Sequences that utilize this approach should be identified as
> "standalone" syllables.
>
> The _placeholder_ identification class also includes numerals, which
> are commonly used as word substitutes within normal text. Examples
> include ordinals (e.g., "4th").

> Note: The _other_ identification class includes codepoints that
> do not interact with adjacent characters for shaping purposes. Even
> though some of these codepoints (such as `MODIFYING_LETTER`) can
> occur within words, they evoke no behavior from the shaping
> engine and do not factor into the regular expressions that
> follow. Therefore, the shaping engine may choose to ignore them
> during syllable identification; they are listed here for completeness.

These identification classes form the bases of the following regular
expression elements:

```markdown
C	= _consonant_ | _ra_
Z	= _zwj_ | _zwnj_
REPH	= (_ra_ _halant_) | _repha_
CN		= C _zwj_? _nukta_?
FORCED_RAKAR	= _zwj_ _halant_ _zwj_ _ra_
S	= _symbol_ _nukta_?
MATRA_GROUP	= Z{0,3} _matra_ _nukta_? (_halant_ | FORCED_RAKAR)?
SYLLABLE_TAIL	= (Z? _syllablemodifier_ _syllablemodifier_? _zwnj_?)? _vedicsign_{0,3}
HALANT_GROUP	= Z? _halant_ (_zwj_ _nukta_?)?
FINAL_HALANT_GROUP	= HALANT_GROUP | (_halant_ _zwnj_)
MEDIAL_GROUP	= _consonantmedial_?
HALANT_OR_MATRA_GROUP	= FINAL_HALANT_GROUP | MATRA_GROUP*)
```

> Note: Practically speaking, shaping engines are highly unlikely to
> encounter more than 4 sequential `(MATRA_GROUP)`
> instances in any real-word syllables. Thus, implementations may
> choose to limit occurrences by limiting the above expressions to a
> finite length, such as `(MATRA_GROUP){0,4}` .


Using the above elements, the following regular expressions define the
possible syllable types:

A consonant-based syllable will match the expression:
```markdown
(_repha_|_consonantwithstacker_)? (CN HALANT_GROUP)* CN MEDIAL_GROUP HALANT_OR_MATRA_GROUP SYLLABLE_TAIL
```

> Note: Practically speaking, shaping engines are highly unlikely to
> encounter more than 4 sequential `(CN HALANT_GROUP)`
> instances in any real-word syllables. Thus, implementations may
> choose to limit occurrences by limiting the above expressions to a
> finite length, such as `(CN HALANT_GROUP){0,4}` .

A vowel-based syllable will match the expression:
```markdown
REPH? _vowel_ _nukta_? (_zwj_ | (HALANT_GROUP CN)* MEDIAL_GROUP HALANT_OR_MATRA_GROUP SYLLABLE_TAIL)
```

> Note: Practically speaking, shaping engines are highly unlikely to
> encounter more than 4 sequential `(HALANT_GROUP CN)`
> instances in any real-word syllables. Thus, implementations may
> choose to limit occurrences by limiting the above expressions to a
> finite length, such as `(HALANT_GROUP CN){0,4}` .

A standalone syllable will match the expression:
```markdown
((_repha_|_consonantwithstacker_)? _placeholder_ | REPH? _dottedcircle_) _nukta_? (HALANT_GROUP CN)* MEDIAL_GROUP HALANT_OR_MATRA_GROUP SYLLABLE_TAIL
```

> Note: Practically speaking, shaping engines are highly unlikely to
> encounter more than 4 sequential `(HALANT_GROUP CN)`
> instances in any real-word syllables. Thus, implementations may
> choose to limit occurrences by limiting the above expressions to a
> finite length, such as `(HALANT_GROUP CN){0,4}` .

> Note: Although they are labeled as "standalone syllables" here,
> many sequences that match the standalone regular expression above
> are instances where a document needs to display a matra, combining
> mark, or special form in isolation. Such sequences might not have
> any significance with regard to the definition of syllables used in
> the language or orthography of the text.

A symbol-based syllable will match the expression:
```markdown
S SYLLABLE_TAIL
```

A broken syllable will match the expression:
```markdown
REPH? _nukta_? (HALANT_GROUP CN)* MEDIAL_GROUP HALANT_OR_MATRA_GROUP SYLLABLE_TAIL
```

> Note: Practically speaking, shaping engines are highly unlikely to
> encounter more than 4 sequential `(HALANT_GROUP CN)`
> instances in any real-word syllables. Thus, implementations may
> choose to limit occurrences by limiting the above expressions to a
> finite length, such as `(HALANT_GROUP CN){0,4}` .


The primary problem involved in shaping broken syllables is the lack
of a syllable base (either a base consonant or an independent
vowel). Without a syllable base, the shaping engine cannot perform
<abbr title="Glyph Positioning table">GPOS</abbr> positioning and other contextual operations that are required
later in the shaping process.

To make up for this limitation, shaping engines should insert a
dotted-circle placeholder (`U+25CC`) character into the text stream
where the missing syllable base was expected to occur. This
placeholder allows the shaping process to proceed on a best-effort
basis at handling the broken-syllable sequence, but making guarantees
about the orthographic correctness or preferred appearance of the
final result is out of scope for this document.

Shaping engines can perform this dotted-circle insertion at any point
after the broken syllable has been recognized and before <abbr title="Glyph Substitution table">GSUB</abbr> features
are applied. However, the best results will likely be attained by
performing the insertion immediately, before proceeding to
stage 2. This will enable the maximum number of <abbr title="Glyph Substitution table">GSUB</abbr> and <abbr title="Glyph Positioning table">GPOS</abbr> features
in the active font to be correctly applied to the text run by ensuring
that all reordering, tagging, and sorting algorithms are executed as
usual.

> Note: In software stacks where other text-handling operations, such
> as Unicode normalization and localization, are performed before the
> text run is passed to the shaping engine, there is a potential for
> the dotted-circle insertion to cause unexpected effects.
>
> For example, if a `ccmp` or `locl` feature substitutes the default
> dotted-circle placeholder glyph with a variant glyph of a different
> size or weight for the (`U+25CC`) codepoint, then any shaping engine
> which relies on another software component to handle that
> functionality must take additional care to ensure consistency.


The expressions above use state-machine syntax from the Ragel
state-machine compiler. The operators represent:

```markdown
a* = zero or more copies of a
b+ = one or more copies of b
c? = optional instance of c
d{n} = exactly n copies of d
d{,n} = zero to n copies of d
d{n,} = n or more copies of d
d{n,m} = n to m copies of d
!e = not e
^f = character-level not f
g.h = concatenation of g and h
i|j = i or j
( ) = grouping of expression elements
```


> Note: "standalone" syllables can be used to display examples of
> letters, marks, and other characters without requiring full
> syllables or words.

After the syllables have been identified, each of the subsequent 
shaping stages occurs on a per-syllable basis.


### Stage 2: Initial reordering ###

The initial reordering stage is used to relocate glyphs from the
phonetic order in which they occur in a run of text to the
orthographic order in which they are presented visually.

This may mean moving dependent-vowel (matra) glyphs, <samp>"Ra,Halant"</samp>
sequences, and other consonants that take special 
treatment in some circumstances.

These reordering moves are mandatory. The final-reordering stage
may make additional moves, depending on the text and on the features
implemented in the active font.

The syllable should be processed by tagging each glyph with its
intended position based on its ordering category. After all glyphs
have been tagged, the entire syllable should be sorted in stable order,
so that glyphs of the same ordering category remain in the same
relative position with respect to each other.

The final sort order of the ordering categories should be:


	POS_RA_TO_BECOME_REPH
	POS_PREBASE_MATRA
	POS_PREBASE_CONSONANT

	POS_SYLLABLE_BASE
	POS_AFTER_MAIN

	POS_ABOVEBASE_CONSONANT

	POS_BEFORE_SUBJOINED
	POS_BELOWBASE_CONSONANT
	POS_AFTER_SUBJOINED

	POS_BEFORE_POST
	POS_POSTBASE_CONSONANT
	POS_AFTER_POST

	POS_FINAL_CONSONANT
	POS_SMVD


This sort order enumerates all of the possible final positions to
which a codepoint might be reordered, across all of the Indic
scripts. Not every position will be utilized in every script.

Additional information about the ordering positions is available in
the [sort ordering](#sort-ordering) section of this document.

#### Stage 2, step 1: Base consonant ####

The first step is to determine the base consonant of the syllable, if
there is one, and tag it as `POS_SYLLABLE_BASE`.

The algorithm used to find the base consonant varies according to the
base-consonant shaping characteristic of the script.

For `BASE_POS_FIRST` scripts, the first consonant of the syllable is
the base consonant.

> Note: None of the specific scripts currently included in the "Indic"
> group as it is enumerated in this document make use of the
> `BASE_POS_FIRST` base-consonant rule. However, the `BASE_POS_FIRST`
> rule is employed by several Brahmi-derived scripts also used in the
> region, including both [Myanmar](opentype-shaping-myanmar.md) and
> [Khmer](opentype-shaping-khmer.md). 
>
> Because these scripts share many other characteristics and
> conventions with the Indic group described by this document,
> `BASE_POS_FIRST` is included here for comparison. 


For `BASE_POS_LAST` scripts, the base consonant is the last consonant
in the syllable, excluding all consonants that will take on special
post-base, final, or below-base forms, and excluding all pre-base
reordering <samp>"Ra"</samp>s. For a detailed explanation of the search algorithm
employed, see the page for each specific script.

For Sinhala, which uses `BASE_POS_LAST_SINHALA`, the base consonant is
the last consonant that is not preceded by a zero-width joiner
(<samp>"ZWJ"</samp>).

While performing the base-consonant search, shaping engines may
also encounter special-form consonants, including below-base
consonants and post-base consonants. Each of these special-form
consonants must also be tagged (`POS_BELOWBASE_CONSONANT`,
`POS_POSTBASE_CONSONANT`, respectively). 

Any pre-base-reordering consonant (such as a pre-base-reodering <samp>"Ra"</samp>)
encountered during the base-consonant search must be tagged
`POS_POSTBASE_CONSONANT`. 
 
> Note: Shaping engines may choose any method to identify consonants that
> have below-base, post-base, or pre-base-reordering forms while
> executing the above algorithm. For example, one implementation may
> choose to maintain a static table of special-form consonants to
> compare against the text run. Another implementation might examine
> the active font to see if it includes a `blwf`, `pstf`, or `pref`
> lookup in the <abbr title="Glyph Substitution table">GSUB</abbr> table that affects the consonants encountered in
> the syllable.
>
> However, checking for <abbr title="Glyph Substitution table">GSUB</abbr> support ensures that the expected
> behavior is implemented in the active font, and is therefore the
> most reliable approach.


#### Stage 2, step 2: Matra decomposition ####

Second, any two-part or three-part dependent vowels (matras) must be decomposed
into their component parts.

Because this decomposition is a character-level operation, the shaping
engine may choose to perform it earlier, such as during an initial
Unicode-normalization stage. However, all such decompositions must be
completed before the shaping engine begins step three, below.


#### Stage 2, step 3: Tag decomposed matras ####

Third, all dependent-vowel (matra) signs must be tagged with
their final position. 

Single-part matras can be tagged with the appropriate sort-ordering
position based on the ordering position of the script's specific
script-shaping characteristics. 

In most cases, all matras of the same Mark-positioning subclass (such
as `LEFT_POSITION`) in a particular script are tagged with the same
final position (such as `POS_PREBASE_MATRA`). 

Some scripts, however, include matras that must be tagged according to
more involved rule sets. In the set of Indic scripts described here,
this includes [Kannada](opentype-shaping-kannada.md) and
[Telugu](opentype-shaping-telugu.md). See the individual
script-shaping document of each script to find a complete description
of the applicable matra-tagging rules.

> Note: The shaping engine may, as an alternative, choose to perform
> this tagging earlier, such as during an initial Unicode-normalization
> stage. 
>
> Matras that resulted from the preceding decomposition step, however,
> may not have been tagged when they were decomposed. If not, they must
> be tagged for reordering before proceeding to the next step.


#### Stage 2, step 4: Adjacent marks ####

Fourth, any subsequences of marks that include a <samp>"Nukta"</samp> and a
<samp>"Halant"</samp> or Vedic sign must be reordered so that the <samp>"Nukta"</samp> appears
first.

This means that the subsequence <samp>"Halant,Nukta"</samp> is reordered to
<samp>"Nukta,Halant"</samp> and that the subsequence <samp>"_Vedic_sign_,Nukta"</samp> is
reordered to <samp>"Nukta,_Vedic_sign"</samp>.

For subsequences of affected marks that are longer than two, the
reordering operation must be repeated until the <samp>"Nukta"</samp> is the first
character in the subsequence. No other marks in the subsequence
should be reordered.

This order is canonical in Unicode and is required so that
<samp>"_consonant_,Nukta"</samp> substitution rules from <abbr title="Glyph Substitution table">GSUB</abbr> will be correctly
matched later in the shaping process.

#### Stage 2, step 5: Pre-base consonants ####

Fifth, consonants that occur before the syllable base must be tagged
with `POS_PREBASE_CONSONANT`. Excluding initial <samp>"Ra,Halant"</samp> sequences
that will become <samp>"Reph"</samp>s: 

  - If the consonant has a below-base form, tag it as
          `POS_BELOWBASE_CONSONANT`. 
  - Otherwise, tag it as `POS_PREBASE_CONSONANT`.
  
> Note: Shaping engines may choose any method to identify consonants that
> have below-base, post-base, or pre-base-reordering forms while
> executing the above algorithm. For example, one implementation may
> choose to maintain a static table of special-form consonants to
> compare against the text run. Another implementation might examine
> the active font to see if it includes a `blwf`, `pstf`, or `pref`
> lookup in the <abbr title="Glyph Substitution table">GSUB</abbr> table that affects the consonants encountered in
> the syllable.
>
> However, checking for <abbr title="Glyph Substitution table">GSUB</abbr> support ensures that the expected
> behavior is implemented in the active font, and is therefore the
> most reliable approach.

#### Stage 2, step 6: Reph ####

Sixth, initial <samp>"Ra,Halant"</samp> (in `REPH_MODE_IMPLICIT` scripts) or
<samp>"Ra,Halant,ZWJ"</samp> (in `REPH_MODE_EXPLICIT` scripts) sequences that will
become <samp>"Reph"</samp>s must be tagged with `POS_RA_TO_BECOME_REPH`.

#### Stage 2, step 7: Post-base consonants ####

Seventh, any non-base consonants that occur after a dependent vowel
(matra) sign must be tagged with `POS_POSTBASE_CONSONANT`. Such
consonants will either be followed by a <samp>"Halant"</samp> glyph or will be in
the `CONSONANT_DEAD` shaping class. 
	
  <!--- Double check: should this be <samp>"_Consonant_,Halant"</samp> instead of
        <samp>"Halant,_Consonant_"</samp>? --->
	
#### Stage 2, step 8: Mark tagging ####

Eighth, all marks must be tagged. 

> Note: In this step, joiner and non-joiner characters must also be
> tagged according to the same rules given for marks, even though
> these characters are not categorized as marks in Unicode.

Marks in the `BINDU`, `VISARGA`, `AVAGRAHA`, `CANTILLATION`,
`SYLLABLE_MODIFIER`, `GEMINATION_MARK`, and `SYMBOL` categories should
be tagged with `POS_SMVD`. 

All <samp>"Nukta"</samp>s must be tagged with the same positioning tag as the
preceding consonant, independent vowel, placeholder, or dotted circle.

All remaining marks (not in the `POS_SMVD` category and not <samp>"Nukta"</samp>s)
must be tagged with the same positioning tag as the closest non-mark
character the mark has affinity with, so that they move together 
during the sorting step.

There are two possible cases: those marks before the syllable base
and those marks after the syllable base. In addition, an exception is
made for <samp>"Halant"</samp> marks that follow a left-side (pre-base) matra.

  1. Initially, all remaining marks should be tagged with the same
	 positioning tag as the closest preceding consonant.

  2. For each consonant after the syllable base (such as post-base
	 consonants, below-base consonants, or final consonants), all
	 remaining marks located between that current consonant and any
	 previous consonant should be tagged with the same positioning tag as
	 the current (later) consonant.
  
     In other words, all consonants preceding the syllable base "own" the
	 marks that follow them, while all consonants after the syllable base
	 "own" the marks that come before them. When a syllable does not have
	 any consonants after the syllable base, the syllable base should
	 "own" all the marks that follow it.
  
  3. Finally, <samp>"Halant"</samp> marks that follow a left-side dependent vowel
     (matra) should _not_ be tagged with the left-side matra's
     positioning tag. Instead, the <samp>"Halant"</samp> should be tagged with the
     positioning tag of the non-mark character preceding the left-side
     matra. This prevents the <samp>"Halant"</samp> mark from being moved with the
     left-side matra when the syllable is sorted.


<!--- HarfBuzz also tags everything between a post-base consonant or -->
<!--matra and another post-base consonant as belonging to the latter -->
<!--post-base consonant. --->


#### Stage 2, step 9: Sort syllable ####

With these steps completed, the syllable can be sorted into the final
sort order as listed at the beginning of stage 2.

The glyphs in the syllable should be sorted in stable order,
so that glyphs of the same ordering category remain in the same
relative position with respect to each other.


#### Stage 2, step 10: Flag sequences for possible feature applications ####

With the initial reordering complete, those glyphs in the syllable that
may have <abbr title="Glyph Substitution table">GSUB</abbr> or <abbr title="Glyph Positioning table">GPOS</abbr> features applied in stages 3, 5, and 6 should be
flagged for each potential feature. 

This flagging is preliminary; the set of potential features varies
between different scripts and which features are supported varies
between fonts. It is also possible that the application of
one feature on a glyph sequence will perform a substitution that makes
a later feature no longer applicable to the updated sequence.

Consequently, the flagging must be completed before shaping proceeds
to the stages during which features are applied.

Some shaping features, such as `locl`, can potentially apply to any
glyphs. Therefore it is not necessary to maintain a separate flag for
these features in the bitmask (or other data structure) used to track
the flags -- although shaping engines may do so if desired.

The sequences to flag are summarized in the list below; a full
description of each feature's function and interpretation is provided
in <abbr title="Glyph Substitution table">GSUB</abbr> and <abbr title="Glyph Positioning table">GPOS</abbr> application stages that follow.

  - `nukt` should match <samp>"_Consonant_,Nukta"</samp> sequences
  - `akhn` should match <samp>"Ka,Halant,Ssa"</samp> and <samp>"Ja,Halant,Nya"</samp>
  - `rphf` should match initial <samp>"Ra,Halant"</samp> sequences but _not_ match
            initial <samp>"Ra,Halant,ZWJ"</samp> sequences
  - `blwf` should match <samp>"Halant,Ra"</samp>, <samp>"Halant,Ha"</samp>, and <samp>"Halant,Va"</samp> in
            post-base positions and <samp>"Ra,Halant"</samp>, <samp>"Ha,Halant"</samp>, and
            <samp>"Va,Halant"</samp> in non-initial pre-base positions
  - `half` should match <samp>"_Consonant_,Halant"</samp> in pre-base position but
           _not_ match <samp>"Ra,Halant"</samp> sequences flagged for `rphf` and
           _not_ match <samp>"_Consonant_,Halant,ZWNJ,_Consonant_"</samp> sequences
  - `pstf` should match initial <samp>"Halant,Ya"</samp> in post-base position
  - `vatu` should match <samp>"_Consonant_,Halant,Ra"</samp>,
           <samp>"_Consonant_,Halant,Ha"</samp>, and <samp>"_Consonant_,Halant,Va"</samp>
  - `cjct` should match <samp>"_Consonant_,Halant,_Consonant_"</samp> but _not_
            match <samp>"_Consonant_,Halant,ZWJ,_Consonant_"</samp> or
            <samp>"_Consonant_,Halant,ZWNJ,_Consonant_"</samp>


### Stage 3: Applying the basic substitution features from <abbr>GSUB</abbr> ###

The basic-substitution stage applies mandatory substitution features
using the rules in the font's <abbr title="Glyph Substitution table">GSUB</abbr> table. In preparation for this
stage, glyph sequences should be flagged for possible application 
of <abbr title="Glyph Substitution table">GSUB</abbr> features in stage 2, step 10.

The order in which these substitutions must be performed is fixed for
all Indic scripts:

	locl
	nukt
	akhn
	rphf 
	rkrf 
	pref 
	blwf 
	abvf 
	half
	pstf
	vatu
	cjct
	cfar


Not every feature is used in every script. See the individual script
pages for further script-specific information.


> Note: Strictly speaking, the use of localized-form substitutions is
> not part of the shaping process, but of the localization process,
> and could take place at an earlier point while handling the text
> run. However, shaping engines are expected to complete the
> application of the `locl` feature before applying the subsequent
> <abbr title="Glyph Substitution table">GSUB</abbr> substitutions in the following steps.


### Stage 4: Final reordering ###

The final reordering stage repositions marks, dependent-vowel (matra)
signs, and <samp>"Reph"</samp> glyphs to the appropriate location with respect to
the base consonant or syllable base. Because multiple substitutions
may have occurred during the application of the basic-shaping features
in the preceding stage, these repositioning moves could not be
performed during the initial reordering stage.

Like the initial reordering stage, the steps involved in this stage
occur on a per-syllable basis.

<!--- Check that classifications have not been mangled. If the -->
<!--character is a Halant AND a ligature was formed AND a multiple
substitution was performed, restore the classification to VIRAMA
because it was almost certainly lost in the preceding <abbr title="Glyph Substitution table">GSUB</abbr> stage.
--->

#### Stage 4, step 1: Base consonant ####

The final reordering stage, like the initial reordering stage, begins
with determining the syllable base of each syllable, following the
same algorithm used in stage 2, step 1.

In a syllable that begins with an independent vowel, the independent
vowel will always serve as the syllable base. In a standalone sequence or
other syllable that begins with a placeholder or a dotted circle, the
placeholder or dotted circle will always serve as the syllable base.

In a syllable that begins with a consonant, the shaping engine must
repeat the base-consonant search algorithm used in stage 2, step 1.

The codepoint of the underlying base consonant or syllable base will
not change between the search performed in stage 2, step 1, and the
search repeated here. However, the application of <abbr title="Glyph Substitution table">GSUB</abbr> shaping
features in stage 3 means that several ligation and many-to-one
substitutions may have taken place. The final glyph produced by that
process may, therefore, be a conjunct or ligature form — in most
cases, such a glyph will not have an assigned Unicode codepoint.
   
#### Stage 4, step 2: Pre-base matras ####

Pre-base dependent vowels (matras) that were reordered during the
initial reordering stage must be moved to their final position. This
position is defined as:
   
   - after the last standalone <samp>"Halant"</samp> glyph that comes after the
     matra's starting position and also comes before the main
     consonant.
   - If a zero-width joiner follows this last standalone <samp>"Halant"</samp>, the
     final matra position is moved to after the joiner.

This means that the matra will move to the right of all explicit
<samp>"_Consonant_,Halant"</samp> subsequences, but will stop to the left of the base
consonant or syllable base, all conjuncts or ligatures that contain
the base consonant or syllable base, and all half forms.

> Note: OpenType and Unicode both state that if the syllable includes
> a <abbr title="Zero-Width Joiner">ZWJ</abbr> immediately after the last <samp>"Halant"</samp>, then the final matra
> position should be after the <abbr title="Zero-Width Joiner">ZWJ</abbr>.
>
> However, there are several test sequences indicating that
> Microsoft's Uniscribe shaping engine did not follow this rule (in,
> at least, Devanagari and Bengali text), and in these circumstances
> Uniscribe instead makes the final matra position before the final
> <samp>"Consonant,Halant,ZWJ"</samp>.
>
> Subsequently, the HarfBuzz shaping engine has also followed the same
> pattern. If other shaping engine implementations prefer to maintain
> maximum compatibility with Uniscribe and HarfBuzz, then they should
> also follow suit.

> Note: The Microsoft script-development specifications for OpenType
> shaping also state that if a zero-width non-joiner follows the last
> standalone <samp>"Halant"</samp>, the final matra position is moved to after the
> non-joiner. However, it is unneccessary to test for this condition,
> because a <samp>"Halant,ZWNJ"</samp> subsequence is, by definition, the end of a
> syllable. Consequently, a <samp>"Halant,ZWNJ"</samp> cannot be followed by a
> pre-base dependent vowel.


#### Stage 4, step 3: Reph ####

<samp>"Reph"</samp> must be moved from the beginning of the syllable to its final
position. The correct final position depends on the script's
Reph-position shaping characteristic, and is conditional upon the
presence or absence of certain characters (such as post-base
consonants or <samp>"matra,Halant"</samp> sequences) in the syllable. 

The full algorithm for determining the final Reph position has seven steps.

(a) If the script uses Reph-position rule `REPH_POS_AFTER_POST`, jump
immediately to step (e). Otherwise, proceed to step (b).

(b) Find the first explicit <samp>"Halant"</samp> between the syllable base
consonant and the first post-Reph consonant. If there is a <abbr title="Zero-Width Joiner">ZWJ</abbr> or <abbr title="Zero-Width Non Joiner">ZWNJ</abbr>
following this <samp>"Halant"</samp>, move the <samp>"Reph"</samp> to a position immediately
after the <abbr title="Zero-Width Joiner">ZWJ</abbr> or <abbr title="Zero-Width Non Joiner">ZWNJ</abbr>, then proceed to step (mH). Otherwise, move the
<samp>"Reph"</samp> to a position immediately after the <samp>"Halant"</samp>, then proceed to
step (mH). If no such explicit <samp>"Halant"</samp> is found, proceed to step
(c).

(c) If the script uses Reph-position rule `REPH_POS_AFTER_MAIN`, find
the first consonant not ligated with the syllable base, and that is
not a potential pre-base reordering <samp>"Ra"</samp>. If such a consonant is
found, move the <samp>"Reph"</samp> to a position immediately after the
consonant, then proceed to step (mH). If no such consonant is found,
proceed to step (d). If the script uses a different Reph-position
rule, proceed to step (d).

(d) If the script uses Reph-position rule `REPH_POS_BEFORE_POST`, find
the first post-base consonant not ligated with the syllable base. If
such a consonant is found, move the <samp>"Reph"</samp> to a position immediately
before the consonant, then proceed to step (mH). If no such consonant
is found, proceed to step (e). If the script uses a different
Reph-position rule, proceed to step (e).

(e) Move the <samp>"Reph"</samp> to a position immediately before the first
post-base matra, syllable modifier sign or Vedic sign that has a
reordering class after the intended Reph position in the syllable sort
order (as listed in [stage 2](#stage-2-initial-reordering)). This will be
the final <samp>"Reph"</samp> position. , then proceed to step (mH). If no such
matra or sign is found, proceed to step (f).

(f) Move the <samp>"Reph"</samp> to the end of the syllable. 

(mH) Finally, if the <samp>"Reph"</samp> position arrived at in the preceding steps
is immediately after a <samp>"matra,Halant"</samp> sequence, move the <samp>"Reph"</samp> so
that it is before the <samp>"Halant"</samp>. 


Taking the Reph-position–rule conditionals in the above algorithm into
account, the position-finding steps that may be executed in each
script are summarized in the following table:

:::{table} Summary of final–reph-positioning rules by script

| Script     | Reph-position rule        | a | b | c | d | e | f | mH |
|:-----------|:--------------------------|:--|:--|:--|:--|:--|:--|:---|
| Devanagari |`REPH_POS_BEFORE_POST`     |   | • |   | • | • | • | •  |
| Bengali    |`REPH_POS_AFTER_SUBJOINED` |   | • |   |   |   | • | •  |
| Gujarati   |`REPH_POS_BEFORE_POST`     |   | • |   | • | • | • | •  |
| Gurmukhi   |`REPH_POS_BEFORE_SUBJOINED`|   | • |   |   |   | • | •  |
| Kannada    |`REPH_POS_AFTER_POST`      |   |   |   |   | • | • | •  |
| Malayalam  |`REPH_POS_AFTER_MAIN`      |   | • | • |   | • | • | •  |
| Oriya      |`REPH_POS_AFTER_MAIN`      |   | • | • |   | • | • | •  |
| Tamil      |`REPH_POS_AFTER_POST`      |   |   |   |   | • | • | •  |
| Telugu     |`REPH_POS_AFTER_POST`      |   |   |   |   | • | • | •  |
| Sinhala    |`REPH_POS_AFTER_MAIN`      |   | • | • |   | • | • | •  |
:::


#### Stage 4, step 4: Pre-base reordering consonants ####

Any pre-base-reordering consonants must be moved to immediately before
the base consonant or syllable base.
  

#### Stage 4, step 5: Initial matras ####

Any left-side dependent vowels (matras) that are at the start of a
word must be flagged for potential substitution by the `init` feature
of <abbr title="Glyph Substitution table">GSUB</abbr>.

> Note: The `init` feature for word-initial dependent vowels (matras)
> is defined only for Bengali and should not be expected in fonts for
> any other scripts. Therefore, this step will involve no work when
> processing non-`<bng2>` text. 


### Stage 5: Applying all remaining substitution features from <abbr>GSUB</abbr> ###

In this stage, the remaining substitution features from the <abbr title="Glyph Substitution table">GSUB</abbr> table
are applied. In preparation for this stage, glyph sequences should be
flagged for possible application of <abbr title="Glyph Substitution table">GSUB</abbr> features in stage 2,
step 10.

The order in which these features are applied is not canonical; they
should be applied in the order in which they appear in the <abbr title="Glyph Substitution table">GSUB</abbr> table
in the font.

	init
	pres
	abvs
	blws
	psts
	haln

### Stage 6: Applying remaining positioning features from <abbr>GPOS</abbr> ###

In this stage, mark positioning, kerning, and other <abbr title="Glyph Positioning table">GPOS</abbr> features are
applied.

As with the preceding stage, the order in which these features are
applied is not canonical; they should be applied in the order in which
they appear in the <abbr title="Glyph Positioning table">GPOS</abbr> table in the font.

        dist
        abvm
        blwm

## The old Indic shaping model ##


The older Indic script tags (`<deva>`, `<beng>`, `<gujr>`, `<guru>`, `<knda>`,
`<mlym>`, `<orya>`, `<taml>`, and `<telu>`) have been deprecated. However,
shaping engines may still encounter fonts that were built to work with
these tags and some users may still have documents that were written to
take advantage of the original shaping rules.

### Distinctions from the Indic2 model ###

The most significant distinction between the shaping models is that the
sequence of <samp>"Halant"</samp> and consonant glyphs used to trigger shaping
features) was altered when migrating from the old to the new shaping model. 

Specifically, shaping engines were expected to reorder post-base
<samp>"Halant,_Consonant_"</samp> sequences to <samp>"_Consonant_,Halant"</samp>.

As a result, a font's <abbr title="Glyph Substitution table">GSUB</abbr> substitutions would be written to match
<samp>"_Consonant_,Halant"</samp> sequences in all pre-base and post-base positions.


The old-model Indic syllable

	Pre-baseC Halant BaseC Halant Post-baseC

would be reordered to

	Pre-baseC Halant BaseC Post-baseC Halant

before features are applied.

In Indic2 text, as described above in this document, there is no
such reordering. The correct sequence to match for <abbr title="Glyph Substitution table">GSUB</abbr> substitutions is
<samp>"_Consonant_,Halant"</samp> for pre-base consonants, but <samp>"Halant,_Consonant_"</samp>
for post-base consonants.

The old Indic shaping model also did not recognize the
`BLWF_MODE_PRE_AND_POST` shaping characteristic. Instead, all scripts
were treated as if they followed the `BLWF_MODE_POST_ONLY`
characteristic. In other words, below-base form substitutions were
only applied to consonants after the base consonant or syllable base.

In addition, left-side dependent vowel marks
(matras) were not repositioned during the final reordering
stage. For `<deva>`, `<beng>`, `<gujr>`, `<guru>`, `<knda>`,
`<orya>`, and `<telu>` text, the left-side matra was always positioned
at the beginning of the syllable. For `<mlym>` and `<taml>` text, the
left-side matra was positioned immediately before the base consonant or syllable base.


### Advice for handling fonts with old Indic features only ###

Shaping engines may choose to match post-base <samp>"_Consonant_,Halant"</samp>
sequences in order to apply <abbr title="Glyph Substitution table">GSUB</abbr> substitutions when it is known that
the font in use supports only the old shaping model.

### Advice for handling text runs composed in the old Indic format ###

Shaping engines may choose to match post-base <samp>"_Consonant_,Halant"</samp>
sequences for <abbr title="Glyph Substitution table">GSUB</abbr> substitutions or to reorder them to
<samp>"Halant,_Consonant_"</samp> when processing text runs that are tagged with
one of the old Indic script tags and it is known that the font in use supports
only the Indic2 shaping model.

Shaping engines may also choose to apply `blwf` substitutions to
below-base consonants occuring before the base consonant when it is
known that the font in use supports an applicable substitution lookup.

Shaping engines may also choose to position left-side matras according
to the old-model Indic ordering scheme; however, doing so might interfere
with matching <abbr title="Glyph Substitution table">GSUB</abbr> or <abbr title="Glyph Positioning table">GPOS</abbr> features.


================================================
FILE: opentype-shaping-kannada.md
================================================
```{include} /_global.md
```

# Kannada shaping in OpenType #

This document details the shaping procedure needed to display text
runs in the Kannada script.


**Contents**

  - [General information](#general-information)
  - [Terminology](#terminology)
  - [Glyph classification](#glyph-classification)
      - [Shaping classes and subclasses](#shaping-classes-and-subclasses)
      - [Kannada character tables](#kannada-character-tables)
  - [The `<knd2>` shaping model](#the-knd2-shaping-model)
      - [Stage 1: Identifying syllables and other sequences](#stage-1-identifying-syllables-and-other-sequences)
      - [Stage 2: Initial reordering](#stage-2-initial-reordering)
      - [Stage 3: Applying the basic substitution features from <abbr>GSUB</abbr>](#stage-3-applying-the-basic-substitution-features-from-gsub)
      - [Stage 4: Final reordering](#stage-4-final-reordering)
      - [Stage 5: Applying all remaining substitution features from <abbr>GSUB</abbr>](#stage-5-applying-all-remaining-substitution-features-from-gsub)
      - [Stage 6: Applying remaining positioning features from <abbr>GPOS</abbr>](#stage-6-applying-remaining-positioning-features-from-gpos)
  - [The `<knda>` shaping model](#the-knda-shaping-model)
      - [Distinctions from `<knd2>`](#distinctions-from-knd2)
      - [Advice for handling fonts with `<knda>` features only](#advice-for-handling-fonts-with-knda-features-only)
      - [Advice for handling text runs composed in `<knda>` format](#advice-for-handling-text-runs-composed-in-knda-format)


## General information ##

The Kannada script belongs to the Indic family, and follows
the same general patterns as the other Indic scripts. More
specifically, it belongs to the South Indic subgroup, in which
sequences of adjacent consonants are often represented as below-base forms.

The Kannada script is used to write multiple languages, most commonly
Kannada, plus several minority languages. In addition, Sanskrit may be written
in Kannada, so Kannada script runs may include glyphs from the Vedic
Extensions block of Unicode. 

There are two extant Kannada script tags defined in OpenType, `<knda>`
and `<knd2>`. The older script tag, `<knda>`, was deprecated in 2005.
Therefore, new fonts should be engineered to work with the `<knd2>`
shaping model. However, if a font is encountered that supports only
`<knda>`, the shaping engine should deal with it gracefully.

## Terminology ##

OpenType shaping uses a standard set of terms for Indic scripts.  The
terms used colloquially in any particular language may vary, however,
potentially causing confusion.

**Matra** is the standard term for a dependent vowel sign. In the Kannada
language, dependent-vowel signs may also be referred to as _swara_ forms.

The term "matra" is also used to refer to the headline in other Indic
scripts, and may be used to describe the distinctive cap stroke above most
Kannada letters by comparison. To avoid ambiguity, the term **headline** is
used in most Unicode and OpenType shaping documents.

**Halant** and **Virama** are both standard terms for the above-base "vowel-killer"
sign. Unicode documents use the term "virama" most frequently, while
OpenType documents use the term "halant" most frequently. In the Kannada
language, this sign is known as the _hrasva_.

**Chandrabindu** (or simply **Bindu**) is the standard term for the diacritical mark
indicating that the preceding vowel should be nasalized. In the Kannada
language, this mark is known as the _candrabindu_.

The term **base consonant** is also critical to Indic shaping. The
base consonant of a syllable is the consonant that carries the
syllable's vowel sound, either the inherent vowel (for an unmarked
base consonant) or a dependent vowel (with the addition of a matra).

A syllable's base consonant is generally rendered in its full form
(although it may form ligatures), while other consonants in the
syllable frequently take on secondary forms. Different <abbr title="Glyph Substitution table">GSUB</abbr>
substitutions may apply to a script's **pre-base** and **post-base**
consonants. Some of these substitutions create **above-base** or
**below-base** forms. The **Reph** form of the consonant "Ra" is an
example.

Syllables may also begin with an **independent vowel** instead of a
consonant. In these syllables, the independent vowel is rendered in
full-letter form, not as a matra, and the independent vowel serves as the
syllable base, similar to a base consonant.

Where possible, using the standard terminology is preferred, as the
use of a language-specific term necessitates choosing one language
over all of the others that share a common script.

## Glyph classification ##

Shaping Kannada text depends on the shaping engine correctly
classifying each glyph in the run. As with most other scripts, the
classifications must distinguish between consonants, vowels
(independent and dependent), numerals, punctuation, and various types
of diacritical mark.

For most codepoints, the `General Category` property defined in the Unicode
standard is correct, but it is not sufficient to fully capture the
expected shaping behavior (such as glyph reordering). Therefore,
Kannada glyphs must additionally be classified by how they are treated
when shaping a run of text.

### Shaping classes and subclasses ###

The shaping classes listed in the tables that follow are defined so
that they capture the positioning rules used by Indic scripts. 

For most codepoints, the _Shaping class_ is synonymous with the `Indic
Syllabic Category` defined in Unicode. However, there are some
distinctions, where the defined category does not fully capture the
behavior of the character in the shaping process.

Several of the diacritic and syllable-modifying marks behave according
to their own rules and, thus, have a special class. These include
`BINDU`, `VISARGA`, `AVAGRAHA`, `NUKTA`, and `VIRAMA`. Some
less-common marks behave according to rules that are similar to these
common marks, and are therefore classified with the corresponding
common mark. The Vedic Extensions also include a `CANTILLATION`
class for tone marks.

Letters generally fall into the classes `CONSONANT`,
`VOWEL_INDEPENDENT`, and `VOWEL_DEPENDENT`. These classes help the
shaping engine parse and identify key positions in a syllable. For
example, Unicode categorizes dependent vowels as `Mark [Mn]`, but the
shaping engine must be able to distinguish between dependent vowels
and diacritical marks (some of which are also categorized as `Mark [Mn]`).

Kannada uses one subclass of consonant, `CONSONANT_WITH_STACKER`. This
subclass supports two consonants, <samp>"Jihvamuliya"</samp> (`U+0CF1`) and
<samp>"Upadhmaniya"</samp> (`U+0CF2`), that are used only for Sanskrit text
runs. These consonants may form stacked ligatures with subsequent
consonants without an intervening <samp>"Halant"</samp>. Such ligature formation,
if desired, must be implemented in the font.

The letters classified as `CONSONANT_WITH_STACKER` should be treated
as consonants when [identifying
syllables](#stage-1-identifying-syllables-and-other-sequences). No
additional behavior is required.


Other characters, such as symbols and miscellaneous letters (for
example, letter-like symbols that only occur as standalone entities
and do not occur within syllables), need no special attention from the
shaping engine, so they are not assigned a shaping class.

Numbers are classified as `NUMBER`, even though they evoke no special
behavior from the Indic shaping rules, because there are OpenType features that
might affect how the respective glyphs are drawn, such as `tnum`,
which specifies the usage of tabular-width numerals, and `sups`, which
replaces the default glyphs with superscript variants.

Marks and dependent vowels are further labeled with a mark-placement
subclass, which indicates where the glyph will be placed with respect
to the base character to which it is attached. The actual position of
the glyphs is determined by the lookups found in the font's <abbr title="Glyph Positioning table">GPOS</abbr>
table, however, the shaping rules for Indic scripts require that the
shaping engine be able to identify marks by their general
position. 

For example, left-side dependent vowels (matras), classified
with `LEFT_POSITION`, must frequently be reordered, with the final
position determined by whether or not other letters in the syllable
have formed ligatures or combined into conjunct forms. Therefore, the
`LEFT_POSITION` subclass of the character must be tracked throughout
the shaping process.

There are four basic _mark-placement subclasses_ for dependent vowels
(matras). Each corresponds to the visual position of the matra with
respect to the syllable base to which it is attached:

  - `LEFT_POSITION` matras are positioned to the left of the syllable base.
  - `RIGHT_POSITION` matras are positioned to the right of the syllable base.
  - `TOP_POSITION` matras are positioned above the syllable base.
  - `BOTTOM_POSITION` matras are positioned below syllable base.
  
These positions may also be referred to elsewhere in shaping documents as:

  - _Pre-base_ matras
  - _Post-base_ matras
  - _Above-base_ matras
  - _Below-base_ matras
  
respectively. The `LEFT`, `RIGHT`, `TOP`, and `BOTTOM` designations
corresponds to Unicode's preferred terminology. The _Pre_, _Post_,
_Above_, and _Below_ terminology is used in the official descriptions
of OpenType <abbr title="Glyph Substitution table">GSUB</abbr> and <abbr title="Glyph Positioning table">GPOS</abbr> features. Shaping engines may, internally,
use whichever terminology is preferred.

In addition, dependent-vowel codepoints that are composed of multiple
components will be designated in character tables as having a compound
_mark-placement subclass_, such as `TOP_AND_RIGHT` or `LEFT_AND_RIGHT`. 

However, these multi-part matras are decomposed into separate matra
components during the shaping process. After the decomposition, each
matra component will belong to exactly one of the four basic
_mark-placement subclasses_.

For most mark and dependent-vowel codepoints, the _mark-placement
subclass_ is synonymous with the `Indic Positional Category` defined
in Unicode. However, there are some distinctions, where the defined
category does not fully capture the behavior of the character in the
shaping process. 

### Kannada character tables ###

Separate character tables are provided for the Kannada and Vedic
Extensions blocks as well as for other miscellaneous characters that
are used in `<knd2>` text runs:

  - [Kannada character table](character-tables/character-tables-kannada.md#kannada-character-table)
  - [Vedic Extensions character table](character-tables/character-tables-kannada.md#vedic-extensions-character-table)
  - [Miscellaneous character table](character-tables/character-tables-kannada.md#miscellaneous-character-table)

The tables list each codepoint along with its Unicode general
category, its shaping class, and its mark-placement subclass. The
codepoint's Unicode name and an example glyph are also provided.

For example:

:::{table} Example character table

| Codepoint | Unicode category | Shaping class     | Mark-placement subclass    | Glyph                        |
|:----------|:-----------------|:------------------|:---------------------------|:-----------------------------|
|`U+0C81`   | Mark [Mn]        | BINDU             | TOP_POSITION               | &#x0C81; Candrabindu         |
| | | | |
|`U+0C95`   | Letter           | CONSONANT         | _null_                     | &#x0C95; Ka                  |
:::


Codepoints with no assigned meaning are
designated as _unassigned_ in the _Unicode category_ column. 

Assigned codepoints with a _null_ in the _Shaping class_
column evoke no special behavior from the shaping engine.

The _Mark-placement subclass_ column indicates mark-placement
positioning for codepoints in the _Mark_ category. Assigned, non-mark
codepoints have a _null_ in this column and evoke no special
mark-placement behavior. Marks tagged with [Mn] in the _Unicode
category_ column are categorized as non-spacing; marks tagged with
[Mc] are categorized as spacing-combining.

Some codepoints in the tables use a _Shaping class_ that
differs from the codepoint's Unicode _General Category_. The _Shaping
class_ takes precedence during OpenType shaping, as it captures more
specific, script-aware behavior.


#### Special-function codepoints ####

Other important characters that may be encountered when shaping runs
of Kannada text include the dotted-circle placeholder (`U+25CC`), the
zero-width joiner (`U+200D`) and zero-width non-joiner (`U+200C`), and
the no-break space (`U+00A0`).

Each of these is of particular importance to shaping engines, because
these codepoints interact with the shaping engine, the text run, and
the active font, either to mediate non-default shaping behavior or to
relay information about the current shaping process.

The dotted-circle placeholder is frequently used when displaying a
dependent vowel (matra) or a combining mark in isolation. Real-world
text syllables may also use other characters, such as hyphens or dashes,
in a similar placeholder fashion; shaping engines should cope with
this situation gracefully.

Dotted-circle placeholder characters (like any Unicode codepoint) can
appear anywhere in text input sequences and should be rendered
normally. <abbr title="Glyph Positioning table">GPOS</abbr> positioning lookups should attach mark glyphs to dotted
circles as they would to other non-mark characters. As visible glyphs,
dotted circles can also be involved in <abbr title="Glyph Substitution table">GSUB</abbr> substitutions.

In addition to the default input-text handling process, shaping
engines may also insert dotted-circle placeholders into the text
sequence. Dotted-circle insertions are required when a non-spacing
mark or dependent sign is formed with no base character present.

This requirement covers:

  - Dependent signs that are assigned their own individual Unicode
    codepoints (such as most dependent-vowel marks or matras)
  
  - Dependent signs that are formed only by specific sequences of
    other codepoints (such as <samp>"Reph"</samp>)


The zero-width joiner (<abbr>ZWJ</abbr>) is primarily used to prevent the formation
of a conjunct from a <samp>"_Consonant_,Halant,_Consonant_"</samp> sequence. 

  - The sequence <samp>"_Consonant_,Halant,ZWJ,_Consonant_"</samp> blocks the
    formation of a conjunct between the two consonants. 

Note, however, that the <samp>"_Consonant_,Halant"</samp> subsequence in the above
example may still trigger a half-forms feature. To prevent the
application of the half-forms feature in addition to preventing the
conjunct, the zero-width non-joiner (<abbr>ZWNJ</abbr>) must be used instead. 

  - The sequence <samp>"_Consonant_,Halant,ZWNJ,_Consonant_"</samp> should produce
    the first consonant in its standard form, followed by an explicit
    <samp>"Halant"</samp>.

A secondary usage of the zero-width joiner is to prevent the formation of
<samp>"Reph"</samp>. 

  - An initial <samp>"Ra,Halant,ZWJ"</samp> sequence should not produce a <samp>"Reph"</samp>,
    even where an initial <samp>"Ra,Halant"</samp> sequence without the zero-width
    joiner would otherwise produce a <samp>"Reph"</samp>.

The <abbr title="Zero-Width Joiner">ZWJ</abbr> and <abbr title="Zero-Width Non Joiner">ZWNJ</abbr> characters are, by definition, non-printing control
characters and have the _Default_Ignorable_ property in the Unicode
Character Database. In standard text-display scenarios, their function
is to signal a request from the user to the shaping engine for some
particular non-default behavior. As such, they are not rendered
visually.

> Note: Naturally, there are special circumstances where a user or
> document might need to request that a <abbr title="Zero-Width Joiner">ZWJ</abbr> or <abbr title="Zero-Width Non Joiner">ZWNJ</abbr> be rendered
> visually, such as when illustrating the OpenType shaping process, or
> displaying Unicode tables.

Because the <abbr title="Zero-Width Joiner">ZWJ</abbr> and <abbr title="Zero-Width Non Joiner">ZWNJ</abbr> are non-printing control characters, they can
be ignored by any portion of a software text-handling stack not
involved in the shaping operations that the <abbr title="Zero-Width Joiner">ZWJ</abbr> and <abbr title="Zero-Width Non Joiner">ZWNJ</abbr> are designed
to interface with. For example, spell-checking or collation functions
will typically ignore <abbr title="Zero-Width Joiner">ZWJ</abbr> and <abbr title="Zero-Width Non Joiner">ZWNJ</abbr>.

Similarly, the <abbr title="Zero-Width Joiner">ZWJ</abbr> and <abbr title="Zero-Width Non Joiner">ZWNJ</abbr> should be ignored by the shaping engine
when matching sequences of codepoints against the backtrack and
lookahead sequences of a font's <abbr title="Glyph Substitution table">GSUB</abbr> or <abbr title="Glyph Positioning table">GPOS</abbr> lookups.

For example:

  - A lookup that substitutes an alternate version of a
    dependent-vowel (matra) glyph when it is preceded by <samp>"Ka,Halant,Tta"</samp>
    should still be applied if the dependent-vowel codepoint is preceded
    by <samp>"Ka,Halant,ZWJ,Tta"</samp> in the text run.

The no-break space (<abbr>NBSP</abbr>) is primarily used to display those
codepoints that are defined as non-spacing (marks, dependent vowels
(matras), below-base consonant forms, and post-base consonant forms)
in an isolated context, as an alternative to displaying them
superimposed on the dotted-circle placeholder. These sequences will
match <samp>"NBSP,ZWJ,Halant,_Consonant_"</samp>, <samp>"NBSP,_mark_"</samp>, or <samp>"NBSP,_matra_"</samp>.

In addition to general punctuation, runs of Kannada text often use the
danda (`U+0964`) and double danda (`U+0965`) punctuation marks from
the Devanagari block.


## The `<knd2>` shaping model ##

Processing a run of `<knd2>` text involves six top-level stages:

1. Identifying syllables and other sequences
2. Initial reordering
3. Applying the basic substitution features from <abbr>GSUB</abbr>
4. Final reordering
5. Applying all remaining substitution features from <abbr>GSUB</abbr>
6. Applying all remaining positioning features from <abbr>GPOS</abbr>


As with other Indic scripts, the initial reordering stage and the
final reordering stage each involve applying a set of several
script-specific rules. The basic substitution features must be applied
to the run in a specific order. The remaining substitution features in
stage five, however, do not have a mandatory order.

Indic scripts follow many of the same shaping patterns, but they
differ in a few critical characteristics that the shaping engine must
track. These include:

  - The position of the base consonant in a syllable.
  
  - The final position of <samp>"Reph"</samp>.
  
  - Whether <samp>"Reph"</samp> must be requested explicitly or if it is formed by
    a specific, implicit sequence.
	
  - Whether the below-base forms feature is applied only to consonants
    before the syllable base, only to consonants after the base
    consonant, or to both.
	
  - The ordering positions for dependent vowels
    (matras). Specifically, right-side, above-base, and below-base
    matras follow different rules in different scripts. 
	All Indic scripts position left-side matras in the same
    manner, in the ordering position `POS_PREBASE_MATRA`. 

With regard to these common variations, Kannada's specific shaping
characteristics include:

  - `BASE_POS_LAST` = The base consonant of a syllable is the last
     consonant, not counting any consonants with post-base forms.
	 
	 - Kannada differs somewhat from other `BASE_POS_LAST` scripts in
       that all consonants can use post-base forms. Therefore, the
       general base-consonant search algorithm should identify the first
       non-<samp>"Reph"</samp> consonant as the base. This is the expected
       behavior, as it allows the same search algorithm to be used
       with all `BASE_POS_LAST` scripts.

  - `REPH_POS_AFTER_POST` = <samp>"Reph"</samp> is ordered after the last post-base
     consonant form.

  - `REPH_MODE_IMPLICIT` = <samp>"Reph"</samp> is formed by an initial <samp>"Ra,Halant"</samp> sequence.

  - `BLWF_MODE_POST_ONLY` = The below-forms feature is applied only to
     post-base consonants.

  - `MATRA_POS_TOP` = `POS_BEFORE_SUBJOINED`  = Above-base matras are
    ordered before any subjoined (i.e., below-base) consonant forms.

  - `MATRA_POS_RIGHT` = Kannada includes right-side matras that follow two
     different reordering rules. 
	 
	 - Matras <samp>"Sign Vocalic R"</samp> (`U+0CC3`), <samp>"Sign Vocalic Rr"</samp>
       (`U+0CC4`), <samp>"Sign Ee"</samp> (`U+0CC7`), <samp>"Sign Ai"</samp> (`U+0CC8`), <samp>"Sign
       O"</samp> (`U+0CCA`), <samp>"Sign Oo"</samp> (`U+0CCB`), <samp>"Length Mark"</samp> (`U+0CD5`),
       and <samp>"Ai Length Mark"</samp> (`U+0CD6`) use `POS_AFTER_SUBJOINED` =
       These right-side matras are ordered after all subjoined (i.e.,
       below-base) consonant forms. 
	   
	 - Matras <samp>"Sign Aa"</samp>(`U+0CBE`), <samp>"Sign Ii"</samp> (`U+0CC0`), <samp>"Sign U"</samp>
       (`U+0CC1`), and <samp>"Sign Uu"</samp> (`U+0CC2`) use
       `POS_BEFORE_SUBJOINED` = These right-side matras are ordered before
       all subjoined (i.e., below-base) consonant forms.

  - `MATRA_POS_BOTTOM` = `POS_BEFORE_SUBJOINED` = Below-base matras are
     ordered before the any subjoined (i.e., below-base) consonant forms.

These characteristics determine how the shaping engine must reorder
certain glyphs, how base consonants are determined, and how <samp>"Reph"</samp>
should be encoded within a run of text.


### Stage 1: Identifying syllables and other sequences ###

A syllable in Kannada consists of a valid orthographic sequence
that may be followed by a "tail" of modifier signs. 

> Note: The Kannada Unicode block enumerates four modifier signs,
> "Candrabindu" (`U+0C81`), "Anusvara" (`U+0C82`), "Visarga" 
> (`U+0C83`), and "Avagraha" (`U+0CBD`) In addition, Sanskrit text
> written in Kannada may include additional signs from Vedic
> Extensions block. 
>
> Note also that the "Spacing Candrabindu" (`U+0C80`) is a letter, not
> a modifier sign.

Each syllable contains exactly one vowel sound. Valid syllables may
begin with either a consonant or an independent vowel. 

If the syllable begins with a consonant, then the consonant that
provides the vowel sound is referred to as the "base" consonant. If
the syllable begins with an independent vowel, that independent vowel
is the syllable's only vowel sound and serves as the "base". 

> Note: A consonant that is not accompanied by a dependent vowel (matra) sign
> carries the script's inherent vowel sound. This vowel sound is changed
> by a dependent vowel (matra) sign following the consonant.

Kannada uses the `BASE_POS_LAST` characteristic mentioned
earlier. However, because all consonants in the script can potentially
take on post-base consonant forms, the outcome of the shaping
characteristic may be counterintuitive.

Generally speaking, the base consonant is the first consonant of the
syllable, which is rendered in full form, and any subsequent
consonants are rendered in special post-base forms. 

Each of these post-base consonants will be preceded by the <samp>"Halant"</samp> mark, which
indicates that they carry no vowel. They affect pronunciation by
combining with the base consonant (e.g., "_str_", "_pl_") but they
do not add a vowel sound.

As with other Indic scripts, the consonant <samp>"Ra"</samp> receives special
treatment; in many circumstances it is replaced by a combining
mark-like form. 

  - A <samp>"Ra,Halant"</samp> sequence at the beginning of a syllable is replaced
    with a right-side mark called <samp>"Reph"</samp> (unless the <samp>"Ra"</samp> is the only
    consonant in the syllable). This rule is synonymous with the
    `REPH_MODE_IMPLICIT` characteristic mentioned earlier.
  
<samp>"Reph"</samp> characters must be reordered after the syllable-identification
stage is complete.

> Note: Generally speaking, OpenType fonts will implement support for
> any below-base, post-base, and pre-base-reordering consonant forms
> by including the necessary substitution rules in their `blwf`,
> `pstf`, and `pref` lookups in <abbr title="Glyph Substitution table">GSUB</abbr>.
>
> Consequently, whenever shaping engines need to determine whether or 
> not a given consonant can take on such a special form, the most
> appropriate test is to check if the consonant is included in the
> relevant <abbr title="Glyph Substitution table">GSUB</abbr> lookup. Other implementations are possible, such as
> maintaining static tables of consonants, but checking for <abbr title="Glyph Substitution table">GSUB</abbr>
> support ensures that the expected behavior is implemented in the
> active font, and is therefore the most reliable approach.


In addition to valid syllables, standalone sequences may occur, such
as when an isolated codepoint is shown in example text.

> Note: Foreign loanwords, when written in the Kannada script, may
> not adhere to the syllable-formation rules described above. In
> particular, it is not uncommon to encounter foreign loanwords that
> contain a word-final suffix of consonants.
>
> Nevertheless, such word-final suffixes will be correctly matched by
> the regular expressions listed below. These loanwords are pronounced
> different, which raises issues for potential readers, but the
> character sequences do not affect the shaping process.


Syllables should be identified by examining the run and matching
glyphs, based on their categorization, using regular expressions. 

The following general-purpose Indic-shaping regular expressions can be
used to match Kannada syllables. 

The regular expressions utilize the shaping classes from the tables
above. For the purpose of syllable identification, more general
classes can be used, as defined in the following table. This
simplifies the resulting expressions. 

```markdown
_ra_		= The consonant "Ra" 
_consonant_	= ( `CONSONANT` | `CONSONANT_DEAD` ) - _ra_
_vowel_		= `VOWEL_INDEPENDENT`
_nukta_	  	= `NUKTA`
_halant_	= `VIRAMA`
_zwj_		= `JOINER`
_zwnj_		= `NON_JOINER`
_matra_		= `VOWEL_DEPENDENT` | `PURE_KILLER`
_syllablemodifier_	= `SYLLABLE_MODIFIER` | `BINDU` | `VISARGA` | `GEMINATION_MARK`
_vedicsign_	= `CANTILLATION`
_placeholder_	= `PLACEHOLDER` | `CONSONANT_PLACEHOLDER` | `NUMBER`
_dottedcircle_	= `DOTTED_CIRCLE`
_repha_		= `CONSONANT_PRE_REPHA`
_consonantmedial_	= `CONSONANT_MEDIAL`
_symbol_	= `SYMBOL` | `AVAGRAHA`
_consonantwithstacker_	= `CONSONANT_WITH_STACKER`
_other_		= `OTHER` | `MODIFYING_LETTER`
```


> Note: the _ra_ identification class is mutually exclusive with 
> the _consonant_ class. The union of the _consonant_ and _ra_ classes
> is used in the regular expression elements below in order to
> correctly identify <samp>"Ra"</samp> characters that do not trigger <samp>"Reph"</samp> or
> <samp>"Rakaar"</samp> shaping behavior.
>
> Note, also, that the cantillation mark "combining Ra" in the
> Devanagari Extended block does _not_ belong to the _ra_
> identification class, and that the other "combining consonant"
> cantillation marks in the Devanagari Extended block do not belong to
> the _consonant_ identification class.

> Note: The _placeholder_ identification class includes codepoints
> that are often used in place of vowels or consonants when a document
> needs to display a matra, mark, or special form in isolation or
> in another context beyond a standard syllable. Examples of
> _placeholder_ codepoints include hyphens and non-breaking
> spaces. Sequences that utilize this approach should be identified as
> "standalone" syllables.
>
> The _placeholder_ identification class also includes numerals, which
> are commonly used as word substitutes within normal text. Examples
> include ordinals (e.g., "4th").

> Note: The _other_ identification class includes codepoints that
> do not interact with adjacent characters for shaping purposes. Even
> though some of these codepoints (such as `MODIFYING_LETTER`) can
> occur within words, they evoke no behavior from the shaping
> engine and do not factor into the regular expressions that
> follow. Therefore, the shaping engine may choose to ignore them
> during syllable identification; they are listed here for completeness.

These identification classes form the bases of the following regular
expression elements:

```markdown
C	= _consonant_ | _ra_
Z	= _zwj_ | _zwnj_
REPH	= (_ra_ _halant_) | _repha_
CN		= C _zwj_? _nukta_?
FORCED_RAKAR	= _zwj_ _halant_ _zwj_ _ra_
S	= _symbol_ _nukta_?
MATRA_GROUP	= Z{0,3} _matra_ _nukta_? (_halant_ | FORCED_RAKAR)?
SYLLABLE_TAIL	= (Z? _syllablemodifier_ _syllablemodifier_? _zwnj_?)? _vedicsign_{0,3}
HALANT_GROUP	= Z? _halant_ (_zwj_ _nukta_?)?
FINAL_HALANT_GROUP	= HALANT_GROUP | (_halant_ _zwnj_)
MEDIAL_GROUP	= _consonantmedial_?
HALANT_OR_MATRA_GROUP	= FINAL_HALANT_GROUP | MATRA_GROUP*)
```

> Note: Practically speaking, shaping engines are highly unlikely to
> encounter more than 4 sequential `(MATRA_GROUP)`
> instances in any real-word syllables. Thus, implementations may
> choose to limit occurrences by limiting the above expressions to a
> finite length, such as `(MATRA_GROUP){0,4}` .


Using the above elements, the following regular expressions define the
possible syllable types:

A consonant-based syllable will match the expression:
```markdown
(_repha_|_consonantwithstacker_)? (CN HALANT_GROUP)* CN MEDIAL_GROUP HALANT_OR_MATRA_GROUP SYLLABLE_TAIL
```

> Note: Practically speaking, shaping engines are highly unlikely to
> encounter more than 4 sequential `(CN HALANT_GROUP)`
> instances in any real-word syllables. Thus, implementations may
> choose to limit occurrences by limiting the above expressions to a
> finite length, such as `(CN HALANT_GROUP){0,4}` .

A vowel-based syllable will match the expression:
```markdown
REPH? _vowel_ _nukta_? (_zwj_ | (HALANT_GROUP CN)* MEDIAL_GROUP HALANT_OR_MATRA_GROUP SYLLABLE_TAIL)
```

> Note: Practically speaking, shaping engines are highly unlikely to
> encounter more than 4 sequential `(HALANT_GROUP CN)`
> instances in any real-word syllables. Thus, implementations may
> choose to limit occurrences by limiting the above expressions to a
> finite length, such as `(HALANT_GROUP CN){0,4}` .

A standalone syllable will match the expression:
```markdown
((_repha_|_consonantwithstacker_)? _placeholder_ | REPH? _dottedcircle_) _nukta_? (HALANT_GROUP CN)* MEDIAL_GROUP HALANT_OR_MATRA_GROUP SYLLABLE_TAIL
```

> Note: Practically speaking, shaping engines are highly unlikely to
> encounter more than 4 sequential `(HALANT_GROUP CN)`
> instances in any real-word syllables. Thus, implementations may
> choose to limit occurrences by limiting the above expressions to a
> finite length, such as `(HALANT_GROUP CN){0,4}` .

> Note: Although they are labeled as "standalone syllables" here,
> many sequences that match the standalone regular expression above
> are instances where a document needs to display a matra, combining
> mark, or special form in isolation. Such sequences might not have
> any significance with regard to the definition of syllables used in
> the language or orthography of the text.

A symbol-based syllable will match the expression:
```markdown
S SYLLABLE_TAIL
```

A broken syllable will match the expression:
```markdown
REPH? _nukta_? (HALANT_GROUP CN)* MEDIAL_GROUP HALANT_OR_MATRA_GROUP SYLLABLE_TAIL
```

> Note: Practically speaking, shaping engines are highly unlikely to
> encounter more than 4 sequential `(HALANT_GROUP CN)`
> instances in any real-word syllables. Thus, implementations may
> choose to limit occurrences by limiting the above expressions to a
> finite length, such as `(HALANT_GROUP CN){0,4}` .


The primary problem involved in shaping broken syllables is the lack
of a syllable base (either a base consonant or an independent
vowel). Without a syllable base, the shaping engine cannot perform
<abbr title="Glyph Positioning table">GPOS</abbr> positioning and other contextual operations that are required
later in the shaping process.

To make up for this limitation, shaping engines should insert a
dotted-circle placeholder (`U+25CC`) character into the text stream
where the missing syllable base was expected to occur. This
placeholder allows the shaping process to proceed on a best-effort
basis at handling the broken-syllable sequence, but making guarantees
about the orthographic correctness or preferred appearance of the
final result is out of scope for this document.

Shaping engines can perform this dotted-circle insertion at any point
after the broken syllable has been recognized and before <abbr title="Glyph Substitution table">GSUB</abbr> features
are applied. However, the best results will likely be attained by
performing the insertion immediately, before proceeding to
stage 2. This will enable the maximum number of <abbr title="Glyph Substitution table">GSUB</abbr> and <abbr title="Glyph Positioning table">GPOS</abbr> features
in the active font to be correctly applied to the text run by ensuring
that all reordering, tagging, and sorting algorithms are executed as
usual.

> Note: In software stacks where other text-handling operations, such
> as Unicode normalization and localization, are performed before the
> text run is passed to the shaping engine, there is a potential for
> the dotted-circle insertion to cause unexpected effects.
>
> For example, if a `ccmp` or `locl` feature substitutes the default
> dotted-circle placeholder glyph with a variant glyph of a different
> size or weight for the (`U+25CC`) codepoint, then any shaping engine
> which relies on another software component to handle that
> functionality must take additional care to ensure consistency.


The expressions above use state-machine syntax from the Ragel
state-machine compiler. The operators represent:

```markdown
a* = zero or more copies of a
b+ = one or more copies of b
c? = optional instance of c
d{n} = exactly n copies of d
d{,n} = zero to n copies of d
d{n,} = n or more copies of d
d{n,m} = n to m copies of d
!e = not e
^f = character-level not f
g.h = concatenation of g and h
i|j = i or j
( ) = grouping of expression elements
```


After the syllables have been identified, each of the subsequent 
shaping stages occurs on a per-syllable basis.

### Stage 2: Initial reordering ###

The initial reordering stage is used to relocate glyphs from the
phonetic order in which they occur in a run of text to the
orthographic order in which they are presented visually.

> Note: Primarily, this means moving dependent-vowel (matra) glyphs, 
> <samp>"Ra,Halant"</samp> glyph sequences, and other consonants that take special
> treatment in some circumstances. 
>
> These reordering moves are mandatory. The final-reordering stage
> may make additional moves, depending on the text and on the features
> implemented in the active font.

The syllable should be processed by tagging each glyph with its
intended position based on its ordering category. After all glyphs
have been tagged, the entire syllable should be sorted in stable order,
so that glyphs of the same ordering category remain in the same
relative position with respect to each other.

The final sort order of the ordering categories should be:


	POS_RA_TO_BECOME_REPH
	POS_PREBASE_MATRA
	POS_PREBASE_CONSONANT

	POS_SYLLABLE_BASE
	POS_AFTER_MAIN

	POS_ABOVEBASE_CONSONANT

	POS_BEFORE_SUBJOINED
	POS_BELOWBASE_CONSONANT
	POS_AFTER_SUBJOINED

	POS_BEFORE_POST
	POS_POSTBASE_CONSONANT
	POS_AFTER_POST

	POS_FINAL_CONSONANT
	POS_SMVD


This sort order enumerates all of the possible final positions to
which a codepoint might be reordered, across all of the Indic
scripts. It includes some ordering categories not utilized in
Kannada. 

The basic positions (left to right) are <samp>"Reph"</samp>
(`POS_RA_TO_BECOME_REPH`), dependent vowels (matras) and consonants
positioned before the base consonant or syllable base
(`POS_PREBASE_MATRA` and `POS_PREBASE_CONSONANT`), the base consonant
or syllable base (`POS_SYLLABLE_BASE`), above-base consonants
(`POS_ABOVEBASE_CONSONANT`), below-base consonants
(`POS_BELOWBASE_CONSONANT`), consonants positioned after the base
consonant or syllable base (`POS_POSTBASE_CONSONANT`), syllable-final
consonants (`POS_FINAL_CONSONANT`), and syllable-modifying or Vedic
signs (`POS_SMVD`).

In addition, several secondary positions are defined to handle various
reordering rules that deal with relative, rather than absolute,
positioning. `POS_AFTER_MAIN` means that a character must be
positioned immediately after the syllable base. `POS_BEFORE_SUBJOINED`
and `POS_AFTER_SUBJOINED` mean that a character must be positioned
before or after any below-base consonants, respectively. Similarly,
`POS_BEFORE_POST` and `POS_AFTER_POST` mean that a character must be
positioned before or after any post-base consonants, respectively. 

For shaping-engine implementers, the names used for the ordering
categories matter only in that they are unambiguous. 

For a definition of the "base" consonant, refer to stage 2, step 1, which
follows.

#### Stage 2, step 1: Base consonant ####

The first step is to determine the base consonant of the syllable, if
there is one, and tag it as `POS_SYLLABLE_BASE`.

In a syllable that begins with an independent vowel, the independent
vowel will always serve as the syllable base, and it should be tagged
as `POS_SYLLABLE_BASE`. The shaping engine can then proceed to step 2.

In a standalone sequence or other syllable that begins with a placeholder
or dotted circle, the placeholder or dotted circle will always serve
as the syllable base, and it should be tagged as
`POS_SYLLABLE_BASE`. The shaping engine can then proceed to step 2.

In a syllable that begins with a consonant, the shaping engine must
determine the base consonant by a script-specific algorithm.

> Note: Shaping engines may choose to treat independent-vowel bases 
> like base consonants for the sake of simplicity or code
> reuse.
>
> However, implementations that take this approach should note
> that removing the distinction between base consonants and
> independent-vowel bases entirely may have unintended
> consequences. Making guarantees about the correctness of the results
> or about language-specific tests is out of scope for this document.

The base consonant is defined as the consonant in a consonant-based
syllable that carries the syllable's vowel sound. That vowel sound
will either be provided by the script's inherent vowel (in which case
it is not written with a separate character) or the sound will be designated
by the addition of a dependent-vowel (matra) sign.


<!--- > Because vowel-based syllables will not include consonants and
> because independent vowels do not take on special forms or require
> reordering, many of the steps that follow will involve no
> work for a vowel-based syllable. However, vowel-based syllables must
> still be sorted and their marks handled correctly, and <abbr title="Glyph Substitution table">GSUB</abbr> and <abbr title="Glyph Positioning table">GPOS</abbr>
> lookups must be applied. These steps of the shaping process follow
> the same rules that are employed for consonant-based syllables.
--->

While performing the base-consonant search, shaping engines may
also encounter special-form consonants, including below-base
consonants and post-base consonants. Each of these special-form
consonants must also be tagged (`POS_BELOWBASE_CONSONANT`,
`POS_POSTBASE_CONSONANT`, respectively). 

Any pre-base-reordering consonant (such as a pre-base-reordering <samp>"Ra"</samp>)
encountered during the base-consonant search must be tagged
`POS_POSTBASE_CONSONANT`. 
 
> Note: Shaping engines may choose any method to identify consonants that
> have below-base, post-base, or pre-base-reordering forms while
> executing the above algorithm. For example, one implementation may
> choose to maintain a static table of special-form consonants to
> compare against the text run. Another implementation might examine
> the active font to see if it includes a `blwf`, `pstf`, or `pref`
> lookup in the <abbr title="Glyph Substitution table">GSUB</abbr> table that affects the consonants encountered in
> the syllable.
>
> However, checking for <abbr title="Glyph Substitution table">GSUB</abbr> support ensures that the expected
> behavior is implemented in the active font, and is therefore the
> most reliable approach.


The algorithm for determining the base consonant is

  - If the syllable starts with <samp>"Ra,Halant"</samp> and the syllable contains
    more than one consonant, exclude the starting <samp>"Ra"</samp> from the list of
    consonants to be considered. 
  - Starting from the end of the syllable, move backwards until a consonant is found.
      * If the consonant is the first consonant, stop.
      * If the consonant is preceded by the sequence <samp>"Halant,ZWJ"</samp>, stop.
      * If the consonant has a below-base form, tag it as
        `POS_BELOWBASE_CONSONANT`, then move to the previous consonant. 
      * If the consonant has a post-base form, tag it as
        `POS_POSTBASE_CONSONANT`, then move to the previous consonant. 
      * If the consonant is a pre-base-reordering <samp>"Ra"</samp>, tag it as
        `POS_POSTBASE_CONSONANT`, then move to the previous consonant. 
      * If none of the above conditions is true, stop.
  - The consonant stopped at will be the base consonant.


> Note: The algorithm is designed to work for all Indic
> scripts. However, Kannada does not utilize pre-base-reordering <samp>"Ra"</samp>.
>
> Also, it is important to note that all consonants in Kannada have a
> post-base form, therefore the backwards-search step will
> automatically move past them until it reaches either a <samp>"Ra,Halant"</samp>
> sequence or the first consonant. However, this condition is not the
> same as the shaping characteristic `BASE_POS_FIRST`, which does not
> use the above search algorithm at all.

> Note: Because Kannada employs the `BLWF_MODE_POST_ONLY` shaping
> characteristic, consonants with below-base special forms will occur
> only after the base consonant or syllable base. 
> 
> During the base-consonant search, therefore, all of these below-base
> form sequences will be encountered and tagged correctly as
> <samp>"Halant,_consonant_"</samp> patterns. Stage 2, step 5 below exists to ensure that
> the <samp>"_consonant_,Halant"</samp> pattern preceding the base consonant or syllable base
> for below-base forms in other Indic scripts will also be tagged correctly.

#### Stage 2, step 2: Matra decomposition ####

Second, any multi-part dependent vowels (matras) must be decomposed
into their independent components. Kannada has five
multi-part dependent vowels, "Ii" (`U+0CC0`), "Ee" (`U+0CC7`), "Ai"
(`U+0CC8`), "O" (`U+0CCA`), and "Oo" (`U+0CCB`). Each
has a canonical decomposition, so this step is unambiguous. 

> "Ii" (`U+0CC0`) decomposes to "`U+0CBF`,`U+0CD5`"
>
> "Ee" (`U+0CC7`) decomposes to "`U+0CC6`,`U+0CD5`"
>
> "Ai" (`U+0CC8`) decomposes to "`U+0CC6`,`U+0CD6`"
>
> "O" (`U+0CCA`) decomposes to "`U+0CC6`,`U+0CC2`"
>
> "Oo" (`U+0CCB`) decomposes to "`U+0CCA`,`U+0CD5`"
>

Because this decomposition is a character-level operation, the shaping
engine may choose to perform it earlier, such as during an initial
Unicode-normalization stage. However, all such decompositions must be
completed before the shaping engine begins step three, below.

> Note: The decomposition of "Oo" (`U+0CCB`) is atypical; Unicode
> specifies that the codepoint decomposes to "O" (`U+0CCA`) followed
> by `U+0CD5`; the "O" codepoint is then decomposed to
> "`U+0CC6`,`U+0CC2`". Shaping engines must take care not to miss this
> second decomposition.


:::{figure-md}
![Multi-part matra decomposition](/images/kannada/kannada-matra-decomposition.svg "Multi-part matra decomposition"){.shaping-demo .inline-svg .greyscale-svg #kannada-matra-decomposition}

Multi-part matra decomposition
:::

```{svg-color-toggle-button} kannada-matra-decomposition
```


#### Stage 2, step 3: Tag matras ####

Third, all dependent-vowel (matra) signs, including those that
resulted from the preceding decomposition step, must be tagged to be
moved to the correct position in the syllable.

Left-side matras should be tagged with `POS_PREBASE_MATRA`.

Above-base matras should be tagged with `POS_BEFORE_SUBJOINED`.

Right-side matras should be tagged according to two rules.

  - Matras <samp>"Sign Vocalic R"</samp> (`U+0CC3`), <samp>"Sign Vocalic Rr"</samp>
       (`U+0CC4`), <samp>"Sign Ee"</samp> (`U+0CC7`), <samp>"Sign Ai"</samp> (`U+0CC8`), <samp>"Sign
       O"</samp> (`U+0CCA`), <samp>"Sign Oo"</samp> (`U+0CCB`), <samp>"Length Mark"</samp> (`U+0CD5`),
       and <samp>"Ai Length Mark"</samp> (`U+0CD6`) should be tagged with
       `POS_AFTER_SUBJOINED`.
	   
  - Matras <samp>"Sign Aa"</samp>(`U+0CBE`), <samp>"Sign Ii"</samp> (`U+0CC0`), <samp>"Sign U"</samp>
       (`U+0CC1`), and <samp>"Sign Uu"</samp> (`U+0CC2`) use
       `POS_BEFORE_SUBJOINED`.

> Note: the right-side matras <samp>"Sign Ee"</samp> (`U+0CC7`), <samp>"Sign Ai"</samp>
> (`U+0CC8`), <samp>"Sign O"</samp> (`U+0CCA`), and <samp>"Sign Oo"</samp> (`U+0CCB`) are
> multi-part matras and were decomposed into independent components
> during stage 2, step 2. They are listed here only to ensure that the
> two position-tagging rules used in Kannada are described completely.

Below-base matras should be tagged with `POS_BEFORE_SUBJOINED`.

For simplicity, shaping engines may choose to tag single-part matras
in an earlier text-processing step, using the information in the
_Mark-placement subclass_ column of the character tables. It is
critical at this step, however, that all decomposed matras are also
correctly tagged before proceeding to the next step.

#### Stage 2, step 4: Adjacent marks ####

Fourth, any subsequences of marks that include a <samp>"Nukta"</samp> and a
<samp>"Halant"</samp> or Vedic sign must be reordered so that the <samp>"Nukta"</samp> appears
first.

This means that the subsequence <samp>"Halant,Nukta"</samp> is reordered to
<samp>"Nukta,Halant"</samp> and that the subsequence <samp>"_Vedic_sign_,Nukta"</samp> is
reordered to <samp>"Nukta,_Vedic_sign"</samp>.

For subsequences of affected marks that are longer than two, the
reordering operation must be repeated until the <samp>"Nukta"</samp> is the first
character in the subsequence. No other marks in the subsequence
should be reordered.

This order is canonical in Unicode and is required so that
<samp>"_consonant_,Nukta"</samp> substitution rules from <abbr title="Glyph Substitution table">GSUB</abbr> will be correctly
matched later in the shaping process.

#### Stage 2, step 5: Pre-base consonants ####

Fifth, consonants that occur before the syllable base must be tagged
with `POS_PREBASE_CONSONANT`. Excluding initial <samp>"Ra,Halant"</samp> sequences
that will become <samp>"Reph"</samp>s: 

  - If the consonant has a below-base form, tag it as
          `POS_BELOWBASE_CONSONANT`. 
  - Otherwise, tag it as `POS_PREBASE_CONSONANT`.
  
> Note: Shaping engines may choose any method to identify consonants that
> have below-base, post-base, or pre-base-reordering forms while
> executing the above algorithm. For example, one implementation may
> choose to maintain a static table of special-form consonants to
> compare against the text run. Another implementation might examine
> the active font to see if it includes a `blwf`, `pstf`, or `pref`
> lookup in the <abbr title="Glyph Substitution table">GSUB</abbr> table that affects the consonants encountered in
> the syllable.
>
> However, checking for <abbr title="Glyph Substitution table">GSUB</abbr> support ensures that the expected
> behavior is implemented in the active font, and is therefore the
> most reliable approach.

Kannada does not use any pre-base consonants; this step is listed here
because it is part of the general processing scheme for shaping Indic scripts.

> Note: Because Kannada employs the `BLWF_MODE_POST_ONLY` shaping
> characteristic, consonants with below-base special forms will occur
> only after the base consonant or syllable base. 
> 
> During the base-consonant search in stage 2, step 1, therefore, all of these below-base
> form sequences will be encountered and tagged correctly as
> <samp>"Halant,_consonant_"</samp> patterns. The tagging is this step ensures that
> the <samp>"_consonant_,Halant"</samp> pattern preceding the base consonant or syllable base
> for below-base forms in other Indic scripts will also be tagged correctly.

#### Stage 2, step 6: Reph ####

Sixth, initial <samp>"Ra,Halant"</samp> sequences that will become <samp>"Reph"</samp>s must be tagged with
`POS_RA_TO_BECOME_REPH`.

> Note: an initial <samp>"Ra,Halant"</samp> sequence will always become a <samp>"Reph"</samp>
> unless the <samp>"Ra"</samp> is the only consonant in the syllable.

#### Stage 2, step 7: Final consonants ####

Seventh, all final consonants must be tagged. Consonants that occur
after the syllable base _and_ after a dependent vowel (matra) sign
must be tagged with  `POS_FINAL_CONSONANT`.

> Note: Final consonants occur only in Sinhala and should not be
> expected in `<knd2>` text runs. This step is included here to
> maintain compatibility across Indic scripts.


#### Stage 2, step 8: Mark tagging ####

Eighth, all marks must be tagged. 

> Note: In this step, joiner and non-joiner characters must also be
> tagged according to the same rules given for marks, even though
> these characters are not categorized as marks in Unicode.

Marks in the `BINDU`, `VISARGA`, `AVAGRAHA`, `CANTILLATION`,
`SYLLABLE_MODIFIER`, `GEMINATION_MARK`, and `SYMBOL` categories should
be tagged with `POS_SMVD`. 

All <samp>"Nukta"</samp>s must be tagged with the same positioning tag as the
preceding consonant, independent vowel, placeholder, or dotted circle.

All remaining marks (not in the `POS_SMVD` category and not <samp>"Nukta"</samp>s)
must be tagged with the same positioning tag as the closest non-mark
character the mark has affinity with, so that they move together 
during the sorting step.

There are two possible cases: those marks before the syllable base
and those marks after the syllable base. In addition, an exception is
made for <samp>"Halant"</samp> marks that follow a left-side (pre-base) matra.

  1. Initially, all remaining marks should be tagged with the same
	 positioning tag as the closest preceding consonant.

  2. For each consonant after the syllable base (such as post-base
	 consonants, below-base consonants, or final consonants), all
	 remaining marks located between that current consonant and any
	 previous consonant should be tagged with the same positioning tag as
	 the current (later) consonant.
  
     In other words, all consonants preceding the syllable base "own" the
	 marks that follow them, while all consonants after the syllable base
	 "own" the marks that come before them. When a syllable does not have
	 any consonants after the syllable base, the syllable base should
	 "own" all the marks that follow it.
  
  3. Finally, <samp>"Halant"</samp> marks that follow a left-side dependent vowel
     (matra) should _not_ be tagged with the left-side matra's
     positioning tag. Instead, the <samp>"Halant"</samp> should be tagged with the
     positioning tag of the non-mark character preceding the left-side
     matra. This prevents the <samp>"Halant"</samp> mark from being moved with the
     left-side matra when the syllable is sorted.


<!--- HarfBuzz also tags everything between a post-base consonant or -->
<!--matra and another post-base consonant as belonging to the latter -->
<!--post-base consonant. --->


#### Stage 2, step 9: Sort syllable ####

With these steps completed, the syllable can be sorted into the final
sort order as listed at the beginning of stage 2.

The glyphs in the syllable should be sorted in stable order,
so that glyphs of the same ordering category remain in the same
relative position with respect to each other.


#### Stage 2, step 10: Flag sequences for possible feature applications ####

With the initial reordering complete, those glyphs in the syllable that
may have <abbr title="Glyph Substitution table">GSUB</abbr> or <abbr title="Glyph Positioning table">GPOS</abbr> features applied in stages 3, 5, and 6 should be
flagged for each potential feature. 

This flagging is preliminary; the set of potential features varies
between different scripts and which features are supported varies
between fonts. It is also possible that the application of
one feature on a glyph sequence will perform a substitution that makes
a later feature no longer applicable to the updated sequence.

Consequently, the flagging must be completed before shaping proceeds
to the stages during which features are applied.

Some shaping features, such as `locl`, can potentially apply to any
glyphs. Therefore it is not necessary to maintain a separate flag for
these features in the bitmask (or other data structure) used to track
the flags -- although shaping engines may do so if desired.

The sequences to flag are summarized in the list below; a full
description of each feature's function and interpretation is provided
in <abbr title="Glyph Substitution table">GSUB</abbr> and <abbr title="Glyph Positioning table">GPOS</abbr> application stages that follow.

  - `nukt` should match <samp>"_Consonant_,Nukta"</samp> sequences
  - `akhn` should match <samp>"Ka,Halant,Ssa"</samp> and <samp>"Ja,Halant,Nya"</samp>
  - `rphf` should match initial <samp>"Ra,Halant"</samp> sequences but _not_ match
            initial <samp>"Ra,Halant,ZWJ"</samp> sequences
  - `pref` should match <samp>"_Consonant_,Ra"</samp> in post-base positions
  - `blwf` should match <samp>"Halant,_Consonant_"</samp> in post-base positions
  - `half` should match <samp>"_Consonant_,Halant"</samp> in pre-base position but
           _not_ match <samp>"Ra,Halant"</samp> sequences flagged for `rphf` and
           _not_ match <samp>"_Consonant_,Halant,ZWNJ,_Consonant_"</samp> sequences
  - `pstf` should match <samp>"Halant,_Consonant_"</samp> in post-base position
  - `cjct` should match <samp>"_Consonant_,Halant,_Consonant_"</samp> but _not_
            match <samp>"_Consonant_,Halant,ZWJ,_Consonant_"</samp> or
            <samp>"_Consonant_,Halant,ZWNJ,_Consonant_"</samp>


### Stage 3: Applying the basic substitution features from <abbr>GSUB</abbr> ###

The basic-substitution stage applies mandatory substitution features
using the rules in the font's <abbr title="Glyph Substitution table">GSUB</abbr> table. In preparation for this
stage, glyph sequences should be flagged for possible application 
of <abbr title="Glyph Substitution table">GSUB</abbr> features in stage 2, step 10.

The order in which these substitutions must be performed is fixed for
all Indic scripts:

	locl
	nukt
	akhn
	rphf 
	rkrf (not used in Kannada)
	pref
	blwf 
	abvf (not used in Kannada)
	half
	pstf
	vatu (not used in Kannada)
	cjct
	cfar (not used in Kannada)

#### Stage 3, step 1: locl ####

The `locl` feature replaces default glyphs with any language-specific
variants, based on examining the language setting of the text run.

> Note: Strictly speaking, the use of localized-form substitutions is
> not part of the shaping process, but of the localization process,
> and could take place at an earlier point while handling the text
> run. However, shaping engines are expected to complete the
> application of the `locl` feature before applying the subsequent
> <abbr title="Glyph Substitution table">GSUB</abbr> substitutions in the following steps.

#### Stage 3, step 2: nukt ####

The `nukt` feature replaces <samp>"_Consonant_,Nukta"</samp> sequences with a
precomposed nukta-variant of the consonant glyph. 

  - The context defined for a `nukt` feature is:

:::{table} `nukt` feature context
    
| Backtrack     | Matching sequence             | Lookahead     |
|:--------------|:------------------------------|:--------------|
| _none_        | `_consonant_`(full),`_nukta_` | _none_        |
:::


:::{figure-md}
![Nukta composition](/images/kannada/kannada-nukt.svg "Nukta composition"){.shaping-demo .inline-svg .greyscale-svg #kannada-nukt}

Nukta composition
:::

```{svg-color-toggle-button} kannada-nukt
```


#### Stage 3, step 3: akhn ####

The `akhn` feature replaces two specific sequences with required ligatures. 

  - <samp>"Ka,Halant,Ssa"</samp> is substituted with the <samp>"KSsa"</samp> ligature. 
  - <samp>"Ja,Halant,Nya"</samp> is substituted with the <samp>"JNya"</samp> ligature. 
  
These sequences can occur anywhere in a syllable. The <samp>"KSsa"</samp> and
<samp>"JNya"</samp> characters have orthographic status equivalent to full
consonants in some languages, and fonts may have `cjct` substitution
rules designed to match them in subsequences. Therefore, this
feature must be applied before all other many-to-one substitutions.

  - The context defined for an `akhn` feature is:

:::{table} `akhn` feature context
    
| Backtrack     | Matching sequence           | Lookahead     |
|:--------------|:----------------------------|:--------------|
| _none_        | `AKHAND_CONSONANT_SEQUENCE` | _none_        |
:::


:::{figure-md}
![KSsa ligation](/images/kannada/kannada-akhn-kssa.svg "KSsa ligation"){.shaping-demo .inline-svg .greyscale-svg #kannada-akhn-kssa}

KSsa ligation
:::

```{svg-color-toggle-button} kannada-akhn-kssa
```


:::{figure-md}
![JNya ligation](/images/kannada/kannada-akhn-jnya.svg "JNya ligation"){.shaping-demo .inline-svg .greyscale-svg #kannada-akhn-jnya}

JNya ligation
:::

```{svg-color-toggle-button} kannada-akhn-jnya
```


#### Stage 3, step 4: rphf ####

The `rphf` feature replaces initial <samp>"Ra,Halant"</samp> sequences with the
<samp>"Reph"</samp> glyph.

  - An initial <samp>"Ra,Halant,ZWJ"</samp> sequence, however, must not be flagged for
    the `rphf` substitution.
	

  - The context defined for a `rphf` feature is:

:::{table} `rphf` feature context
    
| Backtrack        | Matching sequence       | Lookahead     |
|:-----------------|:------------------------|:--------------|
| `SYLLABLE_START` | "Ra"(full),`_halant_`   | _none_        |
:::


:::{figure-md}
![Reph composition](/images/kannada/kannada-rphf.svg "Reph composition"){.shaping-demo .inline-svg .greyscale-svg #kannada-rphf}

Reph composition
:::

```{svg-color-toggle-button} kannada-rphf
```


#### Stage 3, step 5: rkrf ####

> This feature is not used in Kannada.

#### Stage 3, step 6: pref ####

The `pref` feature replaces pre-base-reordering consonant glyphs with
any special forms.

The substitution of the nominal glyph for its special form takes place
at this stage. However, the actual reordering move is performed later,
in stage 4, step 4.

> Note: Kannada does not usually incorporate pre-base-reordering
> consonant forms, but it is possible for a font to implement them in
> order to provide for desired typographic variation.

#### Stage 3, step 7: blwf ####

The `blwf` feature replaces below-base-consonant glyphs with any
special forms. All consonants in Kannada can take on a below-base consonant
form.


:::{figure-md}
![Below-base form composition](/images/kannada/kannada-blwf.svg "Below-base form composition"){.shaping-demo .inline-svg .greyscale-svg #kannada-blwf}

Below-base form composition
:::

```{svg-color-toggle-button} kannada-blwf
```


#### Stage 3, step 8: abvf ####

> This feature is not used in Kannada.

#### Stage 3, step 9: half ####

The `half` feature replaces <samp>"_Consonant_,Halant"</samp> sequences before the
base consonant or syllable base with "half forms" of the consonant
glyphs.

In the most common case, this substitution applies to
<samp>"_Consonant_,Halant"</samp> sequences that are followed by another
<samp>"_Consonant_"</samp>.

In addition, a sequence matching <samp>"_Consonant_,Halant,ZWJ"</samp> must also be
flagged for potential `half` substitutions.

> Note: The presence of the <samp>"ZWJ"</samp> at the end of the sequence means
> that the sequence may match the regular-expression test in stage 1
> as the end of a syllable, even without being followed by a base
> consonant or syllable base.
>
> The fact that the regular-expression tests identify a syllable break
> after the <samp>"_Consonant_,Halant,ZWJ"</samp> is a byproduct of OpenType
> shaping and Unicode encoding, however, and might not have any
> significance with regard to the definition of syllables used in the
> language or orthography of the text.

There are two exceptions to the default behavior, for which the
shaping engine must test:

  - Initial <samp>"Ra,Halant"</samp> sequences, which should have been flagged for
    the `rphf` feature earlier, must not be flagged for potential
    `half` substitutions.

  - A sequence matching <samp>"_Consonant_,Halant,ZWNJ,_Consonant_"</samp> must not be
    flagged for potential `half` substitutions.

> Note: Kannada does not usually incorporate half forms, but it is
> possible for a font to implement them in order to provide for
> desired typographic variation.


#### Stage 3, step 10: pstf ####

The `pstf` feature replaces post-base-consonant glyphs with any special forms.


#### Stage 3, step 11: vatu ####

> This feature is not used in Kannada.

#### Stage 3, step 12: cjct ####

The `cjct` feature replaces sequences of adjacent consonants with
conjunct ligatures. These sequences must match <samp>"_Consonant_,Halant,_Consonant_"</samp>.

A sequence matching <samp>"_Consonant_,Halant,ZWJ,_Consonant_"</samp> or
<samp>"_Consonant_,Halant,ZWNJ,_Consonant_"</samp> must not be flagged to form a conjunct.

> Note: The presence of the <samp>"ZWJ"</samp> in a
> <samp>"_Consonant_,Halant,ZWJ,_Consonant_"</samp> sequence should automatically
> inhibit any `cjct` feature rules from matching the sequence as valid
> input, and thus prevent the `cjct` substitution from being applied.

> Note: The presence of the <samp>"ZWNJ"</samp> in a
> <samp>"_Consonant_,Halant,ZWNJ,_Consonant_"</samp> sequence means that the
> <samp>"_Consonant_,Halant,ZWNJ"</samp> subsequence will match the
> regular-expression test in stage 1 as the end of a syllable.
> 
> Because OpenType shaping features in `<knd2>` are defined as
> applying only within an individual syllable, this means that the
> presence of the <samp>"ZWNJ"</samp> will automatically prevent the application of
> a `cjct` feature by triggering the identification of a syllable
> break between the two consonants.
>
> The fact that the regular-expression tests identify a syllable break
> after the <samp>"_Consonant_,Halant,ZWNJ"</samp> is a byproduct of OpenType
> shaping and Unicode encoding, however, and might not have any
> significance with regard to the definition of syllables used in the
> language or orthography of the text.
>
> Note, also: The presence of the <samp>"ZWJ"</samp> means that a
> <samp>"_Consonant_,Halant,ZWJ"</samp> sequence may match the regular-expression
> test in stage 1 as the end of a syllable, even without being
> followed by a base consonant or syllable base. By definition,
> however, a <samp>"_Consonant_,Halant,ZWJ"</samp> syllable identified in stage 1
> cannot also include a <samp>"_Consonant_"</samp> after the <abbr title="Zero-Width Joiner">ZWJ</abbr>.

The font's <abbr title="Glyph Substitution table">GSUB</abbr> rules might be implemented so that `cjct`
substitutions apply to half-form consonants; therefore, this feature
must be applied after the `half` feature. 

> Note: Kannada does not usually incorporate conjuncts, but it is
> possible for a font to implement the `cjct` feature in order to
> provide for desired typographic variation.


#### Stage 3, step 13: cfar ####

> This feature is not used in Kannada.


### Stage 4: Final reordering ###

The final reordering stage repositions marks, dependent-vowel (matra)
signs, and <samp>"Reph"</samp> glyphs to the appropriate location with respect to
the base consonant or syllable base. Because multiple substitutions
may have occurred during the application of the basic-shaping features
in the preceding stage, these repositioning moves could not be
performed during the initial reordering stage.

Like the initial reordering stage, the steps involved in this stage
occur on a per-syllable basis.

<!--- Check that classifications have not been mangled. If the -->
<!--character is a Halant AND a ligature was formed AND a multiple
substitution was performed, restore the classification to VIRAMA
because it was almost certainly lost in the preceding <abbr title="Glyph Substitution table">GSUB</abbr> stage.
--->

#### Stage 4, step 1: Base consonant ####

The final reordering stage, like the initial reordering stage, begins
with determining the syllable base of each syllable, following the
same algorithm used in stage 2, step 1.

In a syllable that begins with an independent vowel, the independent
vowel will always serve as the syllable base. In a standalone sequence or
other syllable that begins with a placeholder or a dotted circle, the
placeholder or dotted circle will always serve as the syllable base.

In a syllable that begins with a consonant, the shaping engine must
repeat the base-consonant search algorithm used in stage 2, step 1.

The codepoint of the underlying base consonant or syllable base will
not change between the search performed in stage 2, step 1, and the
search repeated here. However, the application of <abbr title="Glyph Substitution table">GSUB</abbr> shaping
features in stage 3 means that several ligation and many-to-one
substitutions may have taken place. The final glyph produced by that
process may, therefore, be a conjunct or ligature form — in most
cases, such a glyph will not have an assigned Unicode codepoint.
   
#### Stage 4, step 2: Pre-base matras ####

Pre-base dependent vowels (matras) that were reordered during the
initial reordering stage must be moved to their final position. This
position is defined as:
   
   - after the last standalone <samp>"Halant"</samp> glyph that comes after the
     matra's starting position and also comes before the main
     consonant.
   - If a zero-width joiner follows this last standalone <samp>"Halant"</samp>, the
     final matra position is moved to after the joiner.

This means that the matra will move to the right of all explicit
<samp>"consonant,Halant"</samp> subsequences, but will stop to the left of the base
consonant or syllable base, all conjuncts or ligatures that contain
the base consonant or syllable base, and all half forms.

Kannada does not use pre-base matras, so this step will
involve no work when processing `<knd2>` text. It is included here in
order to maintain compatibility with the other Indic scripts.

> Note: OpenType and Unicode both state that if the syllable includes
> a <abbr title="Zero-Width Joiner">ZWJ</abbr> immediately after the last <samp>"Halant"</samp>, then the final matra
> position should be after the <abbr title="Zero-Width Joiner">ZWJ</abbr>.
>
> However, there are several test sequences indicating that
> Microsoft's Uniscribe shaping engine did not follow this rule (in,
> at least, Devanagari and Bengali text), and in these circumstances
> Uniscribe instead makes the final matra position before the final
> <samp>"Consonant,Halant,ZWJ"</samp>.
>
> Subsequently, the HarfBuzz shaping engine has also followed the same
> pattern. If other shaping engine implementations prefer to maintain
> maximum compatibility with Uniscribe and HarfBuzz, then they should
> also follow suit.

> Note: The Microsoft script-development specifications for OpenType
> shaping also state that if a zero-width non-joiner follows the last
> standalone <samp>"Halant"</samp>, the final matra position is moved to after the
> non-joiner. However, it is unnecessary to test for this condition,
> because a <samp>"Halant,ZWNJ"</samp> subsequence is, by definition, the end of a
> syllable. Consequently, a <samp>"Halant,ZWNJ"</samp> cannot be followed by a
> pre-base dependent vowel.


#### Stage 4, step 3: Reph ####

<samp>"Reph"</samp> must be moved from the beginning of the syllable to its final
position. Because Kannada incorporates the `REPH_POS_AFTER_POST`
shaping characteristic, this final position is defined to be
immediately after any post-base consonant forms.


The algorithm for finding the final <samp>"Reph"</samp> position is

  - Move the <samp>"Reph"</samp> to the position immediately before
    the first post-base matra, syllable modifier, or Vedic sign that
    has a positioning tag after the script's <samp>"Reph"</samp> position in the
    syllable sort order (as listed in [stage
    2](#stage-2-initial-reordering)). This will be the final <samp>"Reph"</samp>
    position. 
	> Note: Because Kannada incorporates the
    > `REPH_POS_AFTER_POST` shaping characteristic, this means
    > any positioning tag of `POS_FINAL_CONSONANT` or later,
    > although a post-base matra, syllable modifier, or Vedic sign
    > would not typically be tagged with `POS_FINAL_CONSONANT`.
  - If no other location has been located in the previous step, move
    the <samp>"Reph"</samp> to the end of the syllable.


Finally, if the final position of <samp>"Reph"</samp> occurs after a
<samp>"_matra_,Halant"</samp> subsequence, then <samp>"Reph"</samp> must be repositioned to the
left of <samp>"Halant"</samp>, to allow for potential matching with `abvs` or
`psts` substitutions from <abbr title="Glyph Substitution table">GSUB</abbr>.

:::{figure-md}
![Reph positioning](/images/kannada/kannada-reph-position.svg "Reph positioning"){.shaping-demo .inline-svg .greyscale-svg #kannada-reph-position}

Reph positioning
:::

```{svg-color-toggle-button} kannada-reph-position
```


#### Stage 4, step 4: Pre-base-reordering consonants ####

Any pre-base-reordering consonants must be moved to immediately before
the base consonant or syllable base.
  
Kannada does not use pre-base-reordering consonants, so this step will
involve no work when processing `<knd2>` text. It is included here in order
to maintain compatibility with the other Indic scripts.


#### Stage 4, step 5: Initial matras ####

Any left-side dependent vowels (matras) that are at the start of a
word must be flagged for potential substitution by the `init` feature
of <abbr title="Glyph Substitution table">GSUB</abbr>.

Kannada does not use the `init` feature, so this step will
involve no work when processing `<knd2>` text. It is included here in
order to maintain compatibility with the other Indic scripts.


### Stage 5: Applying all remaining substitution features from <abbr>GSUB</abbr> ###

In this stage, the remaining substitution features from the <abbr title="Glyph Substitution table">GSUB</abbr> table
are applied. In preparation for this stage, glyph sequences should be
flagged for possible application of <abbr title="Glyph Substitution table">GSUB</abbr> features in stage 2,
step 10.

The order in which these features are applied is not canonical; they
should be applied in the order in which they appear in the <abbr title="Glyph Substitution table">GSUB</abbr> table
in the font.

	init (not used in Kannada)
	pres
	abvs
	blws
	psts
	haln

The `init` feature is not used in Kannada.

The `pres` feature replaces pre-base-consonant glyphs with special
presentations forms. This can include consonant conjuncts, half-form
consonants, and stylistic variants of left-side dependent vowels
(matras). 

:::{figure-md}
![Pre-base form composition](/images/kannada/kannada-pres.svg "Pre-base form composition"){.shaping-demo .inline-svg .greyscale-svg #kannada-pres}

Pre-base form composition
:::

```{svg-color-toggle-button} kannada-pres
```


The `abvs` feature replaces above-base-consonant glyphs with special
presentation forms. This usually includes contextual variants of
above-base marks or contextually appropriate mark-and-base ligatures.

:::{figure-md}
![Above-base form composition](/images/kannada/kannada-abvs.svg "Above-base form composition"){.shaping-demo .inline-svg .greyscale-svg #kannada-abvs}

Above-base form composition
:::

```{svg-color-toggle-button} kannada-abvs
```


The `blws` feature replaces below-base-consonant glyphs with special
presentation forms. This usually involves replacing multiple
below-base glyphs (substituted earlier with the `blwf`) feature with
ligatures or conjunct forms.

:::{figure-md}
![Below-base form composition](/images/kannada/kannada-blws.svg "Below-base form composition"){.shaping-demo .inline-svg .greyscale-svg #kannada-blws}

Below-base form composition
:::

```{svg-color-toggle-button} kannada-blws
```


The `psts` feature replaces post-base-consonant glyphs with special
presentation forms. This usually includes replacing right-side
dependent vowels (matras) with stylistic variants or replacing
post-base-consonant/matra pairs with contextual ligatures. 

:::{figure-md}
![Post-base form composition](/images/kannada/kannada-psts.svg "Post-base form composition"){.shaping-demo .inline-svg .greyscale-svg #kannada-psts}

Post-base form composition
:::

```{svg-color-toggle-button} kannada-psts
```


The `haln` feature replaces syllable-final <samp>"_Consonant_,Halant"</samp> pairs with
special presentation forms. This can include stylistic variants of the
consonant where placing the <samp>"Halant"</samp> mark on its own is
typographically problematic.

:::{figure-md}
![Halant form composition](/images/kannada/kannada-haln.svg "Halant form composition"){.shaping-demo .inline-svg .greyscale-svg #kannada-haln}

Halant form composition
:::

```{svg-color-toggle-button} kannada-haln
```

> Note: The `calt` feature, which allows for generalized application
> of contextual alternate substitutions, is usually applied at this
> point. However, `calt` is not mandatory for correct Kannada shaping
> and may be disabled in the application by user preference.

### Stage 6: Applying remaining positioning features from <abbr>GPOS</abbr> ###

In this stage, mark positioning, kerning, and other <abbr title="Glyph Positioning table">GPOS</abbr> features are
applied.

As with the preceding stage, the order in which these features are
applied is not canonical; they should be applied in the order in which
they appear in the <abbr title="Glyph Positioning table">GPOS</abbr> table in the font.

        dist
        abvm
        blwm

> Note: The `kern` feature is usually applied at this stage, if it is
> present in the font. However, `kern` (like `calt`, above) is not
> mandatory for shaping Kannada text and may be disabled by user preference.

The `dist` feature adjusts the horizontal positioning of
glyphs. Unlike `kern`, adjustments made with `dist` do not require the
application or the user to enable any software _kerning_ features, if
such features are optional. 

:::{figure-md}
![dist feature application](/images/kannada/kannada-dist.svg "dist feature application"){.shaping-demo .inline-svg .greyscale-svg #kannada-dist}

Application of the `dist` feature
:::

```{svg-color-toggle-button} kannada-dist
```

The `abvm` feature positions above-base marks for attachment to base
characters. In Kannada, this includes above-base dependent vowels (matras),
diacritical marks, and Vedic signs. 

The `blwm` feature positions below-base marks for attachment to base
characters. In Kannada, this includes below-base dependent vowels
(matras) as well as below-base diacritical marks.

:::{figure-md}
![Below-base mark positioning](/images/kannada/kannada-blwm.svg "Below-base mark positioning"){.shaping-demo .inline-svg .greyscale-svg #kannada-blwm}

Below-base mark positioning
:::

```{svg-color-toggle-button} kannada-blwm
```


## The `<knda>` shaping model ##

The older Kannada script tag, `<knda>`, has been deprecated. However,
shaping engines may still encounter fonts that were built to work with
`<knda>` and some users may still have documents that were written to
take advantage of `<knda>` shaping.

### Distinctions from `<knd2>` ###

The most significant distinction between the shaping models is that the
sequence of <samp>"Halant"</samp> and consonant glyphs used to trigger shaping
features) was altered when migrating from `<knda>` to
`<knd2>`. 

Specifically, shaping engines were expected to reorder post-base
<samp>"Halant,_Consonant_"</samp> sequences to <samp>"_Consonant_,Halant"</samp>.

As a result, a font's <abbr title="Glyph Substitution table">GSUB</abbr> substitutions would be written to match
<samp>"_Consonant_,Halant"</samp> sequences in all pre-base and post-base positions.


The `<knda>` syllable

	Pre-baseC Halant BaseC Halant Post-baseC

would be reordered to

	Pre-baseC Halant BaseC Post-baseC Halant

before features are applied.

In `<knd2>` text, as described above in this document, there is no
such reordering. The correct sequence to match for <abbr title="Glyph Substitution table">GSUB</abbr> substitutions is
<samp>"_Consonant_,Halant"</samp> for pre-base consonants, but <samp>"Halant,_Consonant_"</samp>
for post-base consonants.

> Note: Uniscribe is known to make an exception to this reordering
> operation for `<knda>` syllables that end in a <samp>"Halant"</samp>
> codepoint. For example:
>
>     BaseC Halant Post-baseC Halant
>
> is _not_ reordered to <samp>"BaseC Post-baseC Halant Halant"</samp>. Further
> details are provided in the [Uniscribe compatibility](notes/uniscribe-bug-compatibility.md#kannada-final-double-halants) 
> document. 

In addition, for some scripts, left-side dependent vowel marks
(matras) were not repositioned during the final reordering
stage. For `<knda>` text, the left-side matra was always positioned
at the beginning of the syllable.

Finally, in `<knda>` text, the sequence <samp>"Ra,Halant,ZWJ,_consonant_"</samp>
was treated as equivalent to the sequence
<samp>"Ra,ZWJ,Halant,_consonant_"</samp>. The current version of the Unicode
standard states that <samp>"Ra,ZWJ,Halant,_consonant_"</samp> is the correct
sequence, which is meant to trigger the full form of <samp>"Ra"</samp> followed by
the subjoined form of <samp>"_consonant_"</samp>. 

However, Unicode 4.0 specified <samp>"Ra,Halant,ZWJ,_consonant_"</samp>
instead, which was inconsistent with the needs of other Indic
scripts. Even though this sequence was changed with the release of
Unicode 5.0, legacy documents and systems might still be encountered
that use the Unicode 4.0 sequence.

Consequently, shaping engines that encounter a
<samp>"Ra,Halant,ZWJ,_consonant_"</samp> sequence in `<knda>` text should reorder
the sequence to <samp>"Ra,ZWJ,Halant,_consonant_"</samp> or otherwise produce the
same behavior as <samp>"Ra,ZWJ,Halant,_consonant_"</samp>.


### Advice for handling fonts with `<knda>` features only ###

Shaping engines may choose to match post-base <samp>"_Consonant_,Halant"</samp>
sequences in order to apply <abbr title="Glyph Substitution table">GSUB</abbr> substitutions when it is known that
the font in use supports only the `<knda>` shaping model.

### Advice for handling text runs composed in `<knda>` format ###

Shaping engines may choose to match post-base <samp>"_Consonant_,Halant"</samp>
sequences for <abbr title="Glyph Substitution table">GSUB</abbr> substitutions or to reorder them to
<samp>"Halant,_Consonant_"</samp> when processing text runs that are tagged with
the `<knda>` script tag and it is known that the font in use supports
only the `<knd2>` shaping model.

Shaping engines may also choose to position left-side matras according
to the `<knda>` ordering scheme; however, doing so might interfere
with matching <abbr title="Glyph Substitution table">GSUB</abbr> or <abbr title="Glyph Positioning table">GPOS</abbr> features.


================================================
FILE: opentype-shaping-khmer.md
================================================
```{include} /_global.md
```

# Khmer shaping in OpenType #

This document details the shaping procedure needed to display text
runs in the Khmer script.


**Contents**

  - [General information](#general-information)
  - [Terminology](#terminology)
  - [Glyph classification](#glyph-classification)
      - [Shaping classes and subclasses](#shaping-classes-and-subclasses)
      - [Khmer character tables](#khmer-character-tables)
  - [The `<khmr>` shaping model](#the-khmr-shaping-model)
      - [Stage 1: Identifying syllables and other sequences](#stage-1-identifying-syllables-and-other-sequences)
      - [Stage 2: Initial reordering](#stage-2-initial-reordering)
      - [Stage 3: Applying the basic substitution features from <abbr>GSUB</abbr>](#stage-3-applying-the-basic-substitution-features-from-gsub)
      - [Stage 4: Applying all remaining substitution features from <abbr>GSUB</abbr>](#stage-4-applying-all-remaining-substitution-features-from-gsub)
      - [Stage 5: Applying remaining positioning features from <abbr>GPOS</abbr>](#stage-5-applying-remaining-positioning-features-from-gpos)


## General information ##

The Khmer or Cambodian script is a descendant of the Brahmi script,
and follows many of the same general patterns found in [Indic
scripts](opentype-shaping-indic-general.md). However, Khmer
incorporates enough distinctions of its own that it is generally not
advisable to attempt supporting it in a general-purpose Indic shaping
engine. 

The Khmer script is used to write multiple languages, most commonly
Khmer, Tampuan, Krung, Cham, and Pali. In addition,
Sanskrit may be written in Khmer, but the Khmer script is not used
for Vedic texts, therefore Khmer text runs are not expected to
include any glyphs from the Vedic Extensions block of Unicode. 

The Khmer script tag defined in OpenType is `<khmr>`.


## Terminology ##

OpenType shaping uses a standard set of terms for Brahmi-derived and
Indic scripts.  The terms used colloquially in any particular language
may vary, however, potentially causing confusion.

**Matra** is the standard term for a dependent vowel sign. Syllables
in Khmer script can include sequences of multiple vowels and,
therefore, multiple matras.

**Halant** and **Virama** are both standard terms for the
vowel-killer" mark. Unicode documents use the term "virama" most 
frequently, while OpenType documents use the term "halant" most
frequently.

The Khmer block does include a version of "halant" mark, "Viriam"
(`U+17D1`). Its usage in Khmer text, however, differs significantly
from the usage of "halant" in other Brahmi-derived and Indic
orthographies.

**Chandrabindu** (or simply **Bindu**) is the standard term for the
diacritical mark indicating that the preceding vowel should be
nasalized. In Khmer, the chandrabindu mark is known as the "nikahit".

The term **base consonant** is also critical to Khmer shaping. The
base consonant of a syllable is the consonant that carries the
syllable's vowel sound, either the inherent vowel (for an unmarked
base consonant) or a dependent vowel (with the addition of a matra). 

Each consonant in Khmer bears one of two inherent vowels. The
two sets of letters that correspond to these inherent vowels are
referred to as **registers**. In Khmer text, a **register shifter**
mark can be used to replace the letter's inherent vowel with the
inherent vowel of the other register.

Some consonants in one register have a corresponding consonant in the
other register; for these consonant pairs, a register shifter is not
employed. 

A syllable's base consonant is generally rendered in its full form
(although it may form ligatures), while other consonants in the
syllable frequently take on secondary forms. The **below-base**,
subscript forms of letters are the most frequently seen secondary 
forms.  However, some secondary forms are **post-base**, and the
secondary form of "Ro" is a **pre-base**-reordering form.

These secondary letter forms are known in Khmer as **coeng**
forms. Most coengs are consonants, although, in certain cases,
independent vowels may take on coeng forms. 

The Khmer block of Unicode does not encode the coeng forms of
letters as separate codepoints. Instead, the "Sign Coeng" (`U+17D2`)
codepoint is a control character used to indicate that the following
letter should be rendered in its coeng form. The "Sign Coeng" has no
visual representation of its own. 

> Note: Despite the potentially confusing name in the Unicode
> standard, "Sign Coeng" (`U+17D2`) itself should _not_ be referred to
> as a **coeng**. The term "coeng" refers to the form of the letter
> that follows the "Sign Coeng" control character.

:::{figure-md}
![Coeng form of Kha](images/khmer/khmer-coeng-kha.svg "Coeng form of Kha"){.shaping-demo .inline-svg .greyscale-svg #khmer-coeng-kha}

Coeng form of Kha
:::

```{svg-color-toggle-button} khmer-coeng-kha
```


Although coengs are typically attached to the base consonant of a
syllable, in certain circumstances coengs may also be attached to an
independent vowel. 

Where possible, using the standard terminology is preferred, as the
use of a language-specific term necessitates choosing one language
over all of the others that share a common script.


## Glyph classification ##

Shaping Khmer text depends on the shaping engine correctly
classifying each glyph in the run. As with most other scripts, the
classifications must distinguish between consonants, vowels
(independent and dependent), numerals, punctuation, and various types
of diacritical mark. 

For most codepoints, the `General Category` property defined in the Unicode
standard is correct, but it is not sufficient to fully capture the
expected shaping behavior (such as glyph reordering). Therefore,
Khmer glyphs must additionally be classified by how they are treated
when shaping a run of text.

### Shaping classes and subclasses ###

The shaping classes listed in the tables that follow are defined so
that they capture the positioning rules used by Khmer script. 

For most codepoints, the _Shaping class_ is synonymous with the `Indic
Syllabic Category` defined in Unicode. However, there are some
distinctions, where the defined category does not fully capture the
behavior of the character in the shaping process.

Several of the diacritic and syllable-modifying marks behave according
to their own rules and, thus, have a special class. These include
`NUKTA` and `VISARGA`. Some less-common marks behave according to
rules that are similar to these common marks, and are therefore
classified with the corresponding common mark. 

> Note: In Khmer, the `NUKTA` class is used for the chandrabindu mark,
> <samp>"Nikahit"</samp> (U+17C6). This more correctly reflects the shaping
> behavior of the nikahit mark than does the `BINDU` class used in
> other scripts.

<samp>"Viriam"</samp> (`U+17D1`), Khmer's "halant"-like codepoint, is classified as
`PURE_KILLER` rather than the more common `VIRAMA`. This is to
indicate that the <samp>"Viriam"</samp> suppresses the inherent vowel of a
consonant but is not used between consonants to trigger the formation
of a subjoined form.

<samp>"Sign Coeng"</samp>, the coeng-form generator, is classified as
`INVISIBLE_STACKER`. This is to indicate that the <samp>"Sign Coeng"</samp>
codepoint itself is never rendered as a visible glyph. 

<samp>"Toandakhiat"</samp> (`U+17CD`) is classified as `CONSONANT_KILLER`. This
mark indicates that the previous consonant is not pronounced. Note
that <samp>"Toandakhiat"</samp> is a diacritic mark, and that its class,
`CONSONANT_KILLER` is not a subclass of consonant.

Letters generally fall into the classes `CONSONANT`,
`VOWEL_INDEPENDENT`, and `VOWEL_DEPENDENT`. These classes help the
shaping engine parse and identify key positions in a syllable. For
example, Unicode categorizes dependent vowels as `Mark [Mn]`, but the
shaping engine must be able to distinguish between dependent vowels
and diacritical marks (which are categorized as `Mark [Mn]`).

Khmer uses one subclass of consonant, `CONSONANT_POST_REPHA`. This
subclass is used only for <samp>"Robat"</samp>, the above-base form of <samp>"Ro"</samp>. The
<samp>"Robat"</samp> is similar to the <samp>"Reph"</samp> found in many Indic scripts but,
unlike <samp>"Reph"</samp>, <samp>"Robat"</samp> is encoded as a separate codepoint; therefore, it is
not formed by a special sequence of control characters.

:::{figure-md}
![Robat](images/khmer/khmer-robat.svg "Robat"){.shaping-demo .inline-svg .greyscale-svg #khmer-robat}

Robat
:::

```{svg-color-toggle-button} khmer-robat
```


<samp>"Robat"</samp> is a consonant, but it is classified as a combining mark in
Unicode. For shaping purposes, <samp>"Robat"</samp> behaves like the <samp>"Nukta"</samp> mark
found in many Indic scripts.

The Khmer glottal-stop consonant "Qa" (`U+17A2`) carries an inherent
vowel and is also capable of accepting dependent vowels (matras). It
is sometimes used in place of an independent vowel. For shaping
purposes, however, this usage of "Qa" does not demand any special
treatment.

Other characters, such as symbols and punctuation, need no special
attention from the shaping engine, so they are not assigned a shaping
class.

Numbers are classified as `NUMBER`, even though they evoke no special
behavior from the Indic shaping rules, because there are OpenType features that
might affect how the respective glyphs are drawn, such as `tnum`,
which specifies the usage of tabular-width numerals, and `sups`, which
replaces the default glyphs with superscript variants.

Marks and dependent vowels are further labeled with a mark-placement
subclass, which indicates where the glyph will be placed with respect
to the base character to which it is attached. The actual position of
the glyphs is determined by the lookups found in the font's <abbr title="Glyph Positioning table">GPOS</abbr>
table, however, the shaping rules for Indic scripts require that the
shaping engine be able to identify marks by their general
position. 

For example, left-side dependent vowels (matras), classified
with `LEFT_POSITION`, must frequently be reordered. Therefore, the
`LEFT_POSITION` subclass of the character must be tracked throughout
the shaping process.

There are four basic _mark-placement subclasses_ for dependent vowels
(matras). Each corresponds to the visual position of the matra with
respect to the base consonant to which it is attached:

  - `LEFT_POSITION` matras are positioned to the left of the base consonant.
  - `RIGHT_POSITION` matras are positioned to the right of the base consonant.
  - `TOP_POSITION` matras are positioned above the base consonant.
  - `BOTTOM_POSITION` matras are positioned below base consonant.
  
These positions may also be referred to elsewhere in shaping documents as:

  - _Pre-base_ matras
  - _Post-base_ matras
  - _Above-base_ matras
  - _Below-base_ matras
  
respectively. The `LEFT`, `RIGHT`, `TOP`, and `BOTTOM` designations
corresponds to Unicode's preferred terminology. The _Pre_, _Post_,
_Above_, and _Below_ terminology is used in the official descriptions
of OpenType <abbr title="Glyph Substitution table">GSUB</abbr> and <abbr title="Glyph Positioning table">GPOS</abbr> features. Shaping engines may, internally,
use whichever terminology is preferred.

Multi-part dependent vowels (matras) may be designated with compound
mark-placement subclasses (such as `TOP_AND_LEFT_POSITION`) that
denote all of the mark-placement positions occupied.

For most mark and dependent-vowel codepoints, the _mark-placement
subclass_ is synonymous with the `Indic Positional Category` defined
in Unicode. However, there are some distinctions, where the defined
category does not fully capture the behavior of the character in the
shaping process. 


### Khmer character tables ###

The Khmer block in Unicode includes all of the codepoints necessary to
write Khmer language text. The Khmer Symbols block contains
miscellaneous symbols used for lunar-date calendars. The Khmer Symbols
codepoints do not evoke any special behavior from the shaping engine.

Separate character tables are provided for the Khmer and Khmer Symbols
blocks as well as for other miscellaneous characters that 
are used in `<khmr>` text runs:

  - [Khmer character table](character-tables/character-tables-khmer.md#khmer-character-table)
  - [Khmer Symbols character table](character-tables/character-tables-khmer.md#khmer-symbols-character-table)
  - [Miscellaneous character table](character-tables/character-tables-khmer.md#miscellaneous-character-table)

The tables list each codepoint along with its Unicode general
category, its shaping class, and its mark-placement subclass. The
codepoint's Unicode name and an example glyph are also provided.

For example:

:::{table} Example character table

| Codepoint | Unicode category | Shaping class     | Mark-placement subclass    | Glyph                        |
|:----------|:-----------------|:------------------|:---------------------------|:-----------------------------|
|`U+1780`   | Letter           | CONSONANT         | _null_                     | &#x1780; Ka                  |
| | | | | |
|`U+17C6`   | Mark [Mn]        | NUKTA             | TOP_POSITION               | &#x17C6; Nikahit             |
:::


Codepoints with no assigned meaning are
designated as _unassigned_ in the _Unicode category_ column. 

Assigned codepoints with a _null_ in the _Shaping class_
column evoke no special behavior from the shaping engine.

The _Mark-placement subclass_ column indicates mark-placement
positioning for codepoints in the _Mark_ category. Assigned, non-mark
codepoints have a _null_ in this column and evoke no special
mark-placement behavior. Marks tagged with [Mn] in the _Unicode
category_ column are categorized as non-spacing; marks tagged with
[Mc] are categorized as spacing-combining.

Some codepoints in the tables use a _Shaping class_ that
differs from the codepoint's Unicode _General Category_. The _Shaping
class_ takes precedence during OpenType shaping, as it captures more
specific, script-aware behavior.

Other important characters that may be encountered when shaping runs
of Khmer text include the dotted-circle placeholder (`U+25CC`), 
the no-break space (`U+00A0`), and the zero-width space (`U+200B`).

The dotted-circle placeholder is frequently used when displaying a
dependent vowel (matra) or a combining mark in isolation. Real-world
text syllables may also use other characters, such as hyphens or dashes,
in a similar placeholder fashion; shaping engines should cope with
this situation gracefully.

<!--- The zero-width joiner is primarily used to prevent the formation of a
subjoining form from a <samp>"_Consonant_,Halant,_Consonant_"</samp> sequence. The sequence
<samp>"_Consonant_,Halant,ZWJ,_Consonant_"</samp> blocks the substitution of a
subjoined form for the second consonant. --->

<!---
A secondary usage of the zero-width joiner is to prevent the formation of
<samp>"Reph"</samp>. An initial <samp>"Ra,Halant,ZWJ"</samp> sequence should not produce a <samp>"Reph"</samp>,
where an initial <samp>"Ra,Halant"</samp> sequence without the zero-width joiner
otherwise would.
--->

The no-break space (<abbr>NBSP</abbr>) is primarily used to display
those codepoints that are defined as non-spacing (marks, dependent
vowels (matras), below-base consonant forms, and post-base consonant
forms) in an isolated context, as an alternative to displaying them
superimposed on the dotted-circle placeholder. These sequences will
match <samp>"NBSP,ZWJ,Sign_Coeng,_Consonant_"</samp>, <samp>"NBSP,_mark_"</samp>, or
<samp>"NBSP,_matra_"</samp>.

The zero-width space may be used between words — even though no visual
word spacing results — in order to indicate word breaks within a text
that can be used by line-breaking algorithms in a hgher-level
typesetting environment.

Several codepoints in the Khmer block are deprecated and their usage
in new documents is officially discouraged. The deprecated codepoints
are:

  - "Qaq" (`U+17A3`)
  - "Qaa" (`U+17A4`)
  
Usage of three other codepoints is also discouraged, although the
codepoints have not been deprecated. These codepoints are:

  - "Inherent Aq" (`U+17B4`)
  - "Inherent Aaa" (`U+17B5`)
  - "Sign Beyyal" (`U+17D8`)

Although usage of these codepoints in text is discouraged, shaping
engines encountering them in a text run should handle the situation
gracefully.


## The `<khmr>` shaping model ##

Processing a run of `<khmr>` text involves five top-level stages:

1. Identifying syllables and other sequences
2. Initial reordering
3. Applying the basic substitution features from <abbr>GSUB</abbr>
4. Applying all remaining substitution features from <abbr>GSUB</abbr>
5. Applying all remaining positioning features from <abbr>GPOS</abbr>


As with other Brahmi-derived and Indic scripts, the initial reordering
stage involves applying a set of several script-specific rules. The
basic substitution features must be applied to the run in a specific
order. The remaining substitution features in stage four, however, do
not have a mandatory order.

Khmer exhibits many of the same shaping patterns found in Indic
scripts, but it differs in a few critical characteristics. With regard
to these common variations, Khmer's specific shaping 
characteristics include:


  - The first consonant of a syllable is always the base consonant.

> Note: For comparison with the General Indic shaping model, this
> characteristic would correspond to `BASE_POS_FIRST`.
  
  - The <samp>"Robat"</samp> form, which is analogous to <samp>"Reph"</samp> or <samp>"Repha"</samp> in Indic
    scripts, in separately encoded as a non-spacing mark
    codepoint. The <samp>"Robat"</samp> form does not require reordering.

> Note: For comparison with the General Indic shaping model, the Robat
> -encoding characteristic would correspond to `REPH_MODE_VISUAL_REPHA`,
> and the reordering characteristic would be _null_ or some other
> designation indicating that the <samp>"Robat"</samp> is not reordered. 
<!---Because 
> <samp>"Robat"</samp> is typically a syllable-initial feature, shaping engines may
> also choose to --->
  
  - The below-base forms feature is applied to consonants
    after the base consonant. 

> Note: For comparison with the General Indic shaping model, this
> characteristic would correspond to `BLWF_MODE_POST_ONLY`.

  - The ordering position for left-side matras, as with Indic scripts,
    is `POS_PREBASE_MATRA`.

  - The ordering positions for right-side, below-base, and above-base matras is the
    same. All are reordered to immediately after the last post-base consonant.
   	
> Note: For comparison with the General Indic shaping model, this
> characteristic would correspond to `MATRA_POS_TOP`,
> `MATRA_POS_BOTTOM`, and `MATRA_POS_RIGHT` taking the ordering position 
> `POS_AFTER_POST`.


### Stage 1: Identifying syllables and other sequences ###

A syllable in Khmer consists of a valid orthographic sequence
that may be followed by a "tail" of modifier signs. 

> Note: The Khmer Unicode block enumerates eight modifier signs,
> "Nikahit" (`U+17C6`), "Reahmuk" (`U+17C7`), "Bantoc" (`U+17CB`),
> "Kakabat" (`U+17CE`),  "Ahsda" (`U+17CF`), "Samyok Sannya"
> (`U+17D0`), "Bathamasat" (`U+17D3`), and "Atthacan" (`U+17DD`). 

Because texts written in Khmer script do not generally employ
inter-word spaces, however, shaping engines must rely on
syllable-identification algorithms to recognize word-boundary
patterns — distinguishing numeric sequences, symbols, punctuation, and other
miscellaneous script characters from syllables within words.

Valid syllables may begin with either a consonant or an independent vowel. 

If the syllable begins with a consonant, then the consonant that
provides the vowel sound is referred to as the "base" consonant. If
the syllable begins with an independent vowel, that vowel is the
syllable's only vowel sound and, by definition, there is no "base"
consonant. 

> Note: A consonant that is not accompanied by a dependent vowel (matra) sign
> carries the consonant's inherent vowel sound. This vowel sound can be changed
> by a dependent vowel (matra) sign or by a register shifter following the consonant.

For a syllable beginning with a consonant, the base consonant is the
first consonant of the syllable.

Unlike Indic scripts, where the vowel sound designates the end of the
syllable, Khmer syllables can end with final consonants that occur
after a dependent vowel (matra).

All post-base consonants in a valid syllable will be preceded by <samp>"Sign Coeng"</samp>
marks. This includes final consonants.

	BaseC Sign-Coeng Post-baseC Matra Sign-Coeng FinalC
	
In some Khmer words, an independent vowel can occur in a subjoined
position like a post-base consonant. In such instances, the
independent vowel will be preceded by <samp>"Sign Coeng"</samp>.

	BaseC Sign-Coeng IndependentVowel

The algorithms for identifying syllables and for correctly identifying
the base consonant include test to recognize these sequences.


As with other Brahmi-derived and Indic scripts, the consonant <samp>"Ro"</samp> receives
special treatment. 

  - A post-base <samp>"Ro"</samp> must be reordered to a visually pre-base
    position. This move is performed during the initial reordering
    stage.
  - <samp>"Robat"</samp>, the above-base variant of <samp>"Ro"</samp>, is encoded as a combining
    mark rather than as a full consonant. <samp>"Robat"</samp> does not, however,
    require reordering by the shaping engine.

In addition to valid syllables, standalone sequences may occur, such
as when an isolated codepoint is shown in example text.

> Note: Foreign loanwords, when written in the Khmer script, may
> not adhere to the syllable-formation rules described above. 


Syllables should be identified by examining the run and matching
glyphs, based on their categorization, using regular expressions. 

The following regular expressions can be used to match Khmer-script
syllables. 

The regular expressions utilize the shaping classes from the tables
above. For the purpose of syllable identification, more general
classes can be used, as defined in the following table. This
simplifies the resulting expressions. 

```markdown
_ra_		= The consonant "Ro" 
_consonant_	= `CONSONANT` - _ra_
_vowel_		= `VOWEL_INDEPENDENT`
_nukta_	  	= `NUKTA` | `CONSONANT_POST_REPHA`
_zwj_		= `JOINER`
_zwnj_		= `NON_JOINER`
_matra_		= `VOWEL_DEPENDENT` | `PURE_KILLER` | `CONSONANT_KILLER`
_syllablemodifier_	= `SYLLABLE_MODIFIER` | `BINDU` | `VISARGA`
_placeholder_	= `PLACEHOLDER` | `CONSONANT_PLACEHOLDER`
_dottedcircle_	= `DOTTED_CIRCLE`
_registershifter_ = `REGISTER_SHIFTER`
_coeng_		= `INVISIBLE_STACKER`
_symbol_	= `SYMBOL` | `AVAGRAHA`
```

> Note: The `CONSONANT_POST_REPHA` shaping class is merged with the
> `NUKTA` shaping class to reflect the correct orthographic behavior
> of <samp>"Robat"</samp>.

These identification classes form the bases of the following regular
expression elements:

```markdown
C	= _consonant_ | _ra_ | _vowel_
N	= (_zwnj_? _registershifter_)? (_nukta_ _nukta_?)?
Z	= _zwj_ | _zwnj_
CN	= C N?
MATRA_GROUP	= Z? _matra_ N?
SYLLABLE_TAIL	= (_syllablemodifier_ _syllablemodifier_?)?
PARTIAL_CLUSTER	= N? (_coeng_ CN)* MATRA_GROUP* (_coeng_ CN)? SYLLABLE_TAIL
```

Using the above elements, the following regular expressions define the
possible syllable types:

A valid syllable will match the expression:
```markdown
(C | _placeholder_ | _dottedcircle_) PARTIAL_CLUSTER
```

The expressions above use state-machine syntax from the Ragel
state-machine compiler. The operators represent:

```markdown
a* = zero or more copies of a
b+ = one or more copies of b
c? = optional instance of c
d{n} = exactly n copies of d
d{,n} = zero to n copies of d
d{n,} = n or more copies of d
d{n,m} = n to m copies of d
!e = not e
^f = character-level not f
g.h = concatenation of g and h
i|j = i or j
( ) = grouping of expression elements
```


A sequence that does not match any of these expressions should be
regarded as broken. The shaping engine may make a best-effort attempt
to shape the broken sequence, but making guarantees about the
correctness or appearance of the final result is out of scope for this
document.

After the syllables have been identified, each of the subsequent 
shaping stages occurs on a per-syllable basis.


### Stage 2: Initial reordering ###

The initial reordering stage is used to relocate glyphs from the
phonetic order in which they occur in a run of text to the
orthographic order in which they are presented visually.

> Note: Primarily, this means moving dependent-vowel (matra) glyphs
> and pre-base-reordering <samp>"Ro"</samp>. 
>
> These reordering moves are mandatory. The final-reordering stage
> may make additional moves, depending on the text and on the features
> implemented in the active font.

The syllable should be processed by tagging each glyph with its
intended position based on its ordering category. After all glyphs
have been tagged, the entire syllable should be sorted in stable order,
so that glyphs of the same ordering category remain in the same
relative position with respect to each other.

The final sort order of the ordering categories should be:


	POS_RA_TO_BECOME_REPH
	POS_PREBASE_MATRA
	POS_PREBASE_CONSONANT

	POS_BASE_CONSONANT
	POS_AFTER_MAIN

	POS_ABOVEBASE_CONSONANT

	POS_BEFORE_SUBJOINED
	POS_BELOWBASE_CONSONANT
	POS_AFTER_SUBJOINED

	POS_BEFORE_POST
	POS_POSTBASE_CONSONANT
	POS_AFTER_POST

	POS_FINAL_CONSONANT
	POS_SMVD 


This sort order enumerates all of the possible final positions to
which a codepoint might be reordered in Khmer script. 

The position names mimic those used in the General Indic shaping
model, for ease of implementation. However, shaping engines are free
to use any naming scheme they choose. It includes some categories not
utilized in Khmer.

The basic positions (left to right) are <samp>"Reph"</samp> (`POS_RA_TO_BECOME_REPH`), dependent
vowels (matras) and consonants positioned before the base
consonant (`POS_PREBASE_MATRA` and `POS_PREBASE_CONSONANT`), the base
consonant (`POS_BASE_CONSONANT`), above-base consonants
(`POS_ABOVEBASE_CONSONANT`), below-base consonants
(`POS_BELOWBASE_CONSONANT`), consonants positioned after the base consonant
(`POS_POSTBASE_CONSONANT`), syllable-final consonants (`POS_FINAL_CONSONANT`),
and syllable-modifying or Vedic signs (`POS_SMVD`).

In addition, several secondary positions are defined to handle various
reordering rules that deal with relative, rather than absolute,
positioning. `POS_AFTER_MAIN` means that a character must be
positioned immediately after the base consonant. `POS_BEFORE_SUBJOINED`
and `POS_AFTER_SUBJOINED` mean that a character must be positioned
before or after any below-base consonants, respectively. Similarly,
`POS_BEFORE_POST` and `POS_AFTER_POST` mean that a character must be
positioned before or after any post-base consonants, respectively. 

For shaping-engine implementers, the names used for the ordering
categories matter only in that they are unambiguous. 

For a definition of the "base" consonant, refer to stage 2, step 1, which follows.


#### Stage 2, step 1: Base consonant ####

The first step is to determine the base consonant of the syllable, if
there is one, and tag it as `POS_BASE_CONSONANT`.

The base consonant is defined as the consonant in a consonant-based
syllable that carries the syllable's vowel sound. That vowel sound
will either be provided by the script's inherent vowel (in which case
it is not written with a separate character) or the sound will be designated
by the addition of a dependent-vowel (matra) sign.

Vowel-based syllables, standalone sequences, and broken text runs will
not have base consonants.

The algorithm for determining the base consonant is

  - Starting from the beginning of the syllable, move forwards until a
    `CONSONANT` is found. 
  - The consonant stopped at will be the base consonant.


#### Stage 2, step 2: Matra decomposition ####

Second, any multi-part dependent vowels (matras) must be decomposed
into their left-side and right-side components. Khmer has five
two-part dependent vowels, "Oe" (`U+17BE`), "Ya" (`U+17BF`), "Ie"
(`U+17C0`), "Oo" (`U+17C4`), and "Au" (`U+17C5`).

Each of these dependent vowels decomposes into a left-side component
identical to the single-part dependent vowel "E" (`U+17C1`) plus a
right-side component.

Unlike many other scripts, the decompositions of multi-part dependent
vowels in Khmer are not defined as canonical in Unicode. Some of the
right-side components that would result from these decompositions do not
correspond to assigned Unicode codepoints.

Instead, fonts often substitute the default glyph with a
right-side-component glyph using <abbr title="Glyph Substitution table">GSUB</abbr> substitutions. The decomposition
step performed here allows the left-side component to be correctly
reordered by the shaping engine.

> "Oe" (`U+17BE`) decomposes to "`U+17C1`,`U+17BE`"
>
> "Ya" (`U+17BF`) decomposes to "`U+17C1`,`U+17BF`"
>
> "Ie" (`U+17C0`) decomposes to "`U+17C1`,`U+17C0`"
>
> "Oo" (`U+17C4`) decomposes to "`U+17C1`,`U+17C4`"
>
> "Au" (`U+17C5`) decomposes to "`U+17C1`,`U+17C5`"

Two of the multi-part dependent vowels, "Oe" (`U+17BE`) and "Oo"
(`U+17C4`), can be decomposed into existing Unicode codepoints. If
desired, the corresponding decompositions are: 

> "Oe" (`U+17BE`) decomposes to "`U+17C1`,`U+17B8`"
>
> "Oo" (`U+17C4`) decomposes to "`U+17C1`,`U+17B6`"

However, shaping engines should take note of the fact that these
decompositions are non-canonical and therefore, if the active font's
design employs non-standard stylistic choices, the results may not
appear as expected.

:::{figure-md}
![Multi-part matra decomposition](images/khmer/khmer-matra-decomposition.svg "Multi-part matra decomposition"){.shaping-demo .inline-svg .greyscale-svg #khmer-matra-decomposition}

Multi-part matra decomposition
:::

```{svg-color-toggle-button} khmer-matra-decomposition
```


Because this decomposition is a character-level operation, the shaping
engine may choose to perform it earlier, such as during an initial
Unicode-normalization stage. However, all such decompositions must be
completed before the shaping engine begins step three, below.


#### Stage 2, step 3: Tag matras ####

Third, all left-side dependent-vowel (matra) signs must be tagged to be
moved to the beginning of the syllable, with `POS_PREBASE_MATRA`.

All right-side, above-base, and below-base dependent-vowel (matra)
signs are tagged `POS_AFTER_POST`.

For simplicity, shaping engines may choose to tag matras
in an earlier text-processing step, using the information in the
_Mark-placement subclass_ column of the character tables. It is
critical at this step, however, that all matras correctly tagged
before proceeding to the next step. 


<!--- #### Stage 2, step 4: Adjacent marks #### --->
<!--- This does not seem to happen in Khmer. --->
<!--- Commenting out & renumbering. --->


#### Stage 2, step 4: Pre-base-reordering consonants ####

Fourth, all pre-base-reordering consonants must be tagged with
`POS_PREBASE_CONSONANT`. 

Khmer has one pre-base-reordering consonant: <samp>"Ro"</samp>.

:::{figure-md}
![Pre-base-reordering Ro](images/khmer/khmer-pref.svg "Pre-base-reordering Ro"){.shaping-demo .inline-svg .greyscale-svg #khmer-pref}

Pre-base-reordering Ro
:::

```{svg-color-toggle-button} khmer-pref
```


#### Stage 2, step 5: Tag remaining consonants ####

Fifth, the remaining consonants and independent vowels should be
tagged with the appropriate positions.

  - All `VOWEL_INDEPENDENT`s and all `CONSONANT`s other than <samp>"Ro"</samp> occurring
    after the base consonant (found in step one) and must be tagged as 
    `POS_BELOWBASE_CONSONANT`. 
  - A `CONSONANT`s or `VOWEL_INDEPENDENT`s in the syllable occurring
    after a dependent vowel (matra) must be tagged as `POS_FINAL_CONSONANT`.

In a valid syllable, such post-base consonants (of class `CONSONANT`)
and independent vowels (of class `VOWEL_INDEPENDENT`) will be preceded by a
<samp>"Sign_Coeng"</samp> glyph. 

> Note: The consonant <samp>"Robat"</samp>, of class `CONSONANT_POST_REPHA`, is not
> included in the classes checked here and must not be tagged in this
> step. <samp>"Robat"</samp> should not appear in a post-base position in a valid
> syllable.


#### Stage 2, step 6: Mark tagging ####

<!--- not sure this is done!!! --->

Sixth, all marks must be tagged. 

Several Khmer marks that are categorized in Unicode as syllable
modifiers or that modify consonants are allowed to occur mid-syllable
in Khmer words. Therefore, they are not tagged for the `POS_SMVD`
position that is typically reserved for syllable modifiers and Vedic
signs.

:::{table} {{khmer_midsyllable_mark_table_workaround}}

| Codepoint | Sorting Position        | Glyph                  |
|:----------|:------------------------|:-----------------------|
|`U+17CB`   |`POS_ABOVEBASE_CONSONANT`| &#x17CB; Bantoc        |
|`U+17CD`   |`POS_ABOVEBASE_CONSONANT`| &#x17CD; Toandakhiat   |
|`U+17CE`   |`POS_ABOVEBASE_CONSONANT`| &#x17CE; Kakabat       |
|`U+17CF`   |`POS_ABOVEBASE_CONSONANT`| &#x17CF; Ahsda         |
|`U+17D0`   |`POS_ABOVEBASE_CONSONANT`| &#x17D0; Samyok Sannya |
|`U+17D1`   |`POS_ABOVEBASE_CONSONANT`| &#x17D1; Viriam        |
|`U+17D3`   |`POS_ABOVEBASE_CONSONANT`| &#x17D3; Bathamasat    |
|`U+17DD`   |`POS_ABOVEBASE_CONSONANT`| &#x17DD; Atthacan      |
:::


All remaining marks, including <samp>"Sign Coeng"</samp>, must be tagged with the
same positioning tag as the closest non-mark character the mark has
affinity with, so that they move together during the sorting step.

There are two possible cases: those marks before the base consonant
and those marks after the base consonant.

  1. Initially, all remaining marks should be tagged with the same
  positioning tag as the closest preceding consonant.

  2. For each consonant after the base consonant (such as post-base
  consonants, below-base consonants, or final consonants), all
  remaining marks located between that current consonant and any
  previous consonant should be tagged with the same positioning tag as
  the current (later) consonant.
  
In other words, all consonants preceding the base consonant "own" the
marks that follow them, while all consonants after the base consonant
"own" the marks that come before them. When a syllable does not have
any consonants after the base consonant, the base consonant should
"own" all the marks that follow it.

> Note: In this step, joiner and non-joiner characters must also be
> tagged according to the same rules given for marks, even though
> these characters are not categorized as marks in Unicode.
>
> Note: No marks will precede the base consonant in a valid syllable.

With these steps completed, the syllable can be sorted into the final sort order.


### Stage 3: Applying the basic substitution features from <abbr>GSUB</abbr> ###

The basic-substitution stage applies mandatory substitution features
using the rules in the font's <abbr title="Glyph Substitution table">GSUB</abbr> table. In preparation for this
stage, glyph sequences should be tagged for possible application 
of <abbr title="Glyph Substitution table">GSUB</abbr> features.

The order in which these substitutions must be performed is fixed:

	locl
	ccmp 
	pref 
	blwf
	abvf
	pstf
	cfar


#### Stage 3, step 1: locl ####

The `locl` feature replaces default glyphs with any language-specific
variants, based on examining the language setting of the text run.

> Note: Strictly speaking, the use of localized-form substitutions is
> not part of the shaping process, but of the localization process,
> and could take place at an earlier point while handling the text
> run. However, shaping engines are expected to complete the
> application of the `locl` feature before applying the subsequent
> <abbr title="Glyph Substitution table">GSUB</abbr> substitutions in the following steps.

<!--- :::{figure-md}
![Local forms substitution](images/khmer/khmer-locl.svg "Local forms substitution"){.shaping-demo .inline-svg .greyscale-svg #khmer-locl}

Local forms substitution
::: --->


#### Stage 3, step 2: ccmp ####

The `ccmp` feature allows a font to substitute mark-and-base sequences
with a pre-composed glyph including the mark and the base, or to
substitute a single glyph into an equivalent decomposed sequence of glyphs. 
 
If present, these composition and decomposition substitutions must be
performed before applying any other <abbr title="Glyph Substitution table">GSUB</abbr> lookups, because
those lookups may be written to match only the `ccmp`-substituted
glyphs. 

> Note: `ccmp` usage is uncommon in Khmer fonts. Nevertheless,
> shaping engines must apply any `ccmp` substitutions if they are
> present in the active font.


#### Stage 3, step 3: pref ####

The `pref` feature replaces pre-base-consonant glyphs with
any special forms. In Khmer, this typically includes the
pre-base-reordering form of <samp>"Ro"</samp>.

:::{figure-md}
![Pre-base form substitution](/images/khmer/khmer-pref-1.svg "Pre-base form substitution"){.shaping-demo .inline-svg .greyscale-svg #khmer-pref-1}

Pre-base form substitution
:::

```{svg-color-toggle-button} khmer-pref-1
```


<!--- be sure to show initial form with Ro BEFORE the base consonant, --->
<!--- since initial reordering has been done already. --->


#### Stage 3, step 4: blwf ####

The `blwf` feature replaces below-base-consonant glyphs with any
special forms. In Khmer, this usually means replacing the default
forms of letters with coeng (or subscript) forms.


<!--- Check below!  --->

The below-base forms feature is applied to glyphs occurring after the
base consonant.

:::{figure-md}
![Below-base form substitution](/images/khmer/khmer-blwf.svg "Below-base form substitution"){.shaping-demo .inline-svg .greyscale-svg #khmer-blwf}

Below-base form substitution
:::

```{svg-color-toggle-button} khmer-blwf
```


#### Stage 3, step 5: abvf ####

The `abvf` feature replaces above-base-consonant glyphs with any
special forms. In Khmer, this may include variant forms of above-base
dependent vowels and marks.
<!--- single-sub-lookup 25, 28 --->

:::{figure-md}
![Above-base form substitution](images/khmer/khmer-abvf.svg "Above-base form substitution"){.shaping-demo .inline-svg .greyscale-svg #khmer-abvf}

Above-base form substitution
:::

```{svg-color-toggle-button} khmer-abvf
```


#### Stage 3, step 6: pstf ####

The `pstf` feature replaces post-base-consonant glyphs with any
special forms. In Khmer, this can include coeng forms of certain
consonants that include an ascending "arm" on the right-hand side as
well as variant forms for right-side matras and marks.

:::{figure-md}
![Post-base form substitution](/images/khmer/khmer-pstf.svg "Post-base form substitution"){.shaping-demo .inline-svg .greyscale-svg #khmer-pstf}

Post-base form substitution
:::

```{svg-color-toggle-button} khmer-pstf
```


#### Stage 3, step 7: cfar ####

The `cfar` feature replaces any below-base-consonant or
post-base-consonant glyphs that occur immediately after a <samp>"Sign
Coeng,Ro"</samp> sequence with special presentation forms. This can include
contextual variants of post-base and below-base glyphs designed to
better interact, visually, with the final position of
pre-base-reordering <samp>"Ro"</samp>.

<!--- Try TRYo--->

<!--- ### 4: Final reordering ### --->
<!--- Is there any? --->

### Stage 4. Applying all remaining substitution features from <abbr>GSUB</abbr> ###

In this stage, the remaining substitution features from the <abbr title="Glyph Substitution table">GSUB</abbr> table
are applied. The order in which these features are applied is not
canonical; they should be applied in the order in which they appear in
the <abbr title="Glyph Substitution table">GSUB</abbr> table in the font. 

	pres
	blws
	abvs
	psts
	calt
	clig
	liga


The `pres` feature replaces pre-base-consonant glyphs with special
presentations forms. In Khmer, this can include stylistic variants
of left-side dependent vowels (matras) or of pre-base-reordering <samp>"Ro"</samp>. 

:::{figure-md}
![Pre-base presentation form substitution](/images/khmer/khmer-pres.svg "Pre-base presentation form substitution"){.shaping-demo .inline-svg .greyscale-svg #khmer-pres}

Pre-base presentation form substitution
:::

```{svg-color-toggle-button} khmer-pres
```


The `abvs` feature replaces above-base-consonant glyphs with special
presentation forms. This usually includes contextual variants of
above-base marks or contextually appropriate mark-and-base ligatures.

:::{figure-md}
![Above-base presentation form substitution](/images/khmer/khmer-abvs.svg "Above-base presentation form substitution"){.shaping-demo .inline-svg .greyscale-svg #khmer-abvs}

Above-base presentation form substitution
:::

```{svg-color-toggle-button} khmer-abvs
```


The `blws` feature replaces below-base-consonant glyphs with special
presentation forms. In Khmer, this can include contextual ligatures
involving below-base dependent vowel marks (matras) or subjoined letters.

:::{figure-md}
![Below-base presentation form substitution](/images/khmer/khmer-blws.svg "Below-base presentation form substitution"){.shaping-demo .inline-svg .greyscale-svg #khmer-blws}

Below-base presentation form substitution
:::

```{svg-color-toggle-button} khmer-blws
```


The `psts` feature replaces post-base-consonant glyphs with special
presentation forms. This usually includes stylistic variants of
right-side dependent vowels (matras) or of subjoined letters featuring
right-side ascenders.


:::{figure-md}
![Post-base presentation form substitution](/images/khmer/khmer-psts.svg "Post-base presentation form substitution"){.shaping-demo .inline-svg .greyscale-svg #khmer-psts}

Post-base presentation form substitution
:::

```{svg-color-toggle-button} khmer-psts
```


The `clig` feature substitutes optional ligatures that are on by
default, but which are activated only in certain contexts. 

> Note: In some other scripts, substitutions made by `clig` may be
> disabled by application-level user interfaces. For Khmer, however,
> application of `clig` substitutions in mandatory because these
> substitutions are important for typographic correctness, not merely
> for user preference.

:::{figure-md}
![Contextual ligature substitution](images/khmer/khmer-clig.svg "Contextual ligature substitution"){.shaping-demo .inline-svg .greyscale-svg #khmer-clig}

Contextual ligature substitution
:::

```{svg-color-toggle-button} khmer-clig
```


The `liga` feature substitutes standard, optional ligatures that are on
by default. Substitutions made by `liga` may be disabled by
application-level user interfaces.

:::{figure-md}
![Standard ligature substitution](/images/khmer/khmer-liga.svg "Standard ligature substitution"){.shaping-demo .inline-svg .greyscale-svg #khmer-liga}

Standard ligature substitution
:::

```{svg-color-toggle-button} khmer-liga
```


### Stage 5: Applying remaining positioning features from <abbr>GPOS</abbr> ###

In this stage, mark positioning, kerning, and other <abbr title="Glyph Positioning table">GPOS</abbr> features are
applied. As with the preceding stage, the order in which these
features are applied is not canonical; they should be applied in the
order in which they appear in the <abbr title="Glyph Positioning table">GPOS</abbr> table in the font.

	dist
	kern
	blwm
	abvm
	mkmk

> Note: The `kern` feature is usually applied at this stage, if it is
> present in the font. However, `kern` is not mandatory for shaping
> Khmer text and may be disabled by user preference.
>
> Notably, the Microsoft Uniscribe shaping engine does not apply
> `kern` lookups even if they are present in the font. For more
> information on Uniscribe compatibility, see the
> [Uniscribe-bug-compatibility note](/notes/uniscribe-bug-compatibility.md).


The `dist` feature adjusts the horizontal positioning of
glyphs. Unlike `kern`, adjustments made with `dist` do not require the
application or the user to enable any software _kerning_ features, if
such features are optional. 

:::{figure-md}
![Application of the dist feature](/images/khmer/khmer-dist.svg "Application of the dist feature"){.shaping-demo .inline-svg .greyscale-svg #khmer-dist}

Application of the dist feature
:::

```{svg-color-toggle-button} khmer-dist
```


The `abvm` feature positions above-base glyphs for attachment to base
characters. In Khmer, this includes register shifters and syllable
modifiers, in addition to diacritical marks and above-base dependent
vowels (matras).

:::{figure-md}
![Above-base mark positioning](/images/khmer/khmer-abvm.svg "Above-base mark positioning"){.shaping-demo .inline-svg .greyscale-svg #khmer-abvm}

Above-base mark positioning
:::

```{svg-color-toggle-button} khmer-abvm
```


The `blwm` feature positions below-base glyphs for attachment to base
characters. In Khmer, this can include coeng forms of letters as well as
below-base dependent vowels (matras).

:::{figure-md}
![Below-base mark positioning](/images/khmer/khmer-blwm.svg "Below-base mark positioning"){.shaping-demo .inline-svg .greyscale-svg #khmer-blwm}

Below-base mark positioning
:::

```{svg-color-toggle-button} khmer-blwm
```


The `mkmk` feature positions marks with respect to preceding marks,
providing proper positioning for sequences of marks that attach to the
same base glyph.


================================================
FILE: opentype-shaping-malayalam.md
================================================
```{include} /_global.md
```

# Malayalam shaping in OpenType #

This document details the shaping procedure needed to display text
runs in the Malayalam script.


**Contents**

  - [General information](#general-information)
  - [Terminology](#terminology)
  - [Glyph classification](#glyph-classification)
      - [Shaping classes and subclasses](#shaping-classes-and-subclasses)
      - [Malayalam character tables](#malayalam-character-tables)
  - [The `<mlm2>` shaping model](#the-mlm2-shaping-model)
      - [Stage 1: Identifying syllables and other sequences](#stage-1-identifying-syllables-and-other-sequences)
      - [Stage 2: Initial reordering](#stage-2-initial-reordering)
      - [Stage 3: Applying the basic substitution features from <abbr>GSUB</abbr>](#stage-3-applying-the-basic-substitution-features-from-gsub)
      - [Stage 4: Final reordering](#stage-4-final-reordering)
      - [Stage 5: Applying all remaining substitution features from <abbr>GSUB</abbr>](#stage-5-applying-all-remaining-substitution-features-from-gsub)
      - [Stage 6: Applying remaining positioning features from <abbr>GPOS</abbr>](#stage-6-applying-remaining-positioning-features-from-gpos)
  - [The `<mlym>` shaping model](#the-mlym-shaping-model)
      - [Distinctions from `<mlm2>`](#distinctions-from-mlm2)
      - [Advice for handling fonts with `<mlym>` features only](#advice-for-handling-fonts-with-mlym-features-only)
      - [Advice for handling text runs composed in `<mlym>` format](#advice-for-handling-text-runs-composed-in-mlym-format)


## General information ##

The Malayalam script belongs to the Indic family, and follows
the same general patterns as the other Indic scripts. More
specifically, it belongs to the South Indic subgroup.

The Malayalam script is used to write multiple languages, most commonly
Malayalam and Paniya. In addition, Sanskrit may be written
in Malayalam, so Malayalam script runs may include glyphs from the Vedic
Extensions block of Unicode. 

There are two extant Malayalam script tags defined in OpenType, `<mlym>`
and `<mlm2>`. The older script tag, `<mlym>`, was deprecated in 2005.
Therefore, new fonts should be engineered to work with the `<mlm2>`
shaping model. However, if a font is encountered that supports only
`<mlym>`, the shaping engine should deal with it gracefully.

## Terminology ##

OpenType shaping uses a standard set of terms for Indic scripts.  The
terms used colloquially in any particular language may vary, however,
potentially causing confusion.

**Matra** is the standard term for a dependent vowel sign. 

**Halant** and **Virama** are both standard terms for the "vowel-killer"
sign. Unicode documents use the term "virama" most frequently, while
OpenType documents use the term "halant" most frequently. In the Malayalam
language, this sign is known as the _chandrakkala_.

**Chandrabindu** (or simply **Bindu**) is the standard term for the diacritical mark
indicating that the preceding vowel should be nasalized. In the Malayalam
language, this mark is known as the _candrabindu_.

The term **base consonant** is also critical to Indic shaping. The
base consonant of a syllable is the consonant that carries the
syllable's vowel sound, either the inherent vowel (for an unmarked
base consonant) or a dependent vowel (with the addition of a matra).

A syllable's base consonant is generally rendered in its full form
(although it may form ligatures), while other consonants in the
syllable frequently take on secondary forms. Different <abbr title="Glyph Substitution table">GSUB</abbr>
substitutions may apply to a script's **pre-base** and **post-base**
consonants. The **Reph** form of the consonant "Ra" is an
example (post-base in traditional orthography and pre-base in
reformed orthography). Some of these substitutions create **above-base**
or **below-base** forms. For instance "La" takes a `below-base` form.

Syllables may also begin with an **independent vowel** instead of a
consonant. In these syllables, the independent vowel is rendered in
full-letter form, not as a matra, and the independent vowel serves as the
syllable base, similar to a base consonant.

Where possible, using the standard terminology is preferred, as the
use of a language-specific term necessitates choosing one language
over all of the others that share a common script.

## Glyph classification ##

Shaping Malayalam text depends on the shaping engine correctly
classifying each glyph in the run. As with most other scripts, the
classifications must distinguish between consonants, vowels
(independent and dependent), numerals, punctuation, and various types
of diacritical mark. 

For most codepoints, the `General Category` property defined in the Unicode
standard is correct, but it is not sufficient to fully capture the
expected shaping behavior (such as glyph reordering). Therefore,
Malayalam glyphs must additionally be classified by how they are treated
when shaping a run of text.

### Shaping classes and subclasses ###

The shaping classes listed in the tables that follow are defined so
that they capture the positioning rules used by Indic scripts. 

For most codepoints, the _Shaping class_ is synonymous with the `Indic
Syllabic Category` defined in Unicode. However, there are some
distinctions, where the defined category does not fully capture the
behavior of the character in the shaping process.

Several of the diacritic and syllable-modifying marks behave according
to their own rules and, thus, have a special class. These include
`BINDU`, `VISARGA`, `AVAGRAHA`, `NUKTA`, and `VIRAMA`. Some
less-common marks behave according to rules that are similar to these
common marks, and are therefore classified with the corresponding
common mark. The Vedic Extensions also include a `CANTILLATION`
class for tone marks.

Letters generally fall into the classes `CONSONANT`,
`VOWEL_INDEPENDENT`, and `VOWEL_DEPENDENT`. These classes help the
shaping engine parse and identify key positions in a syllable. For
example, Unicode categorizes dependent vowels as `Mark [Mn]`, but the
shaping engine must be able to distinguish between dependent vowels
and diacritical marks (which are categorized as `Mark [Mn]`).

Malayalam uses two subclasses of consonant, `CONSONANT_DEAD` and
`CONSONANT_PRE_REPHA`. 

The `CONSONANT_DEAD` subclass is used for the Malayalam _chillu_
variants of certain consonants. It indicates that the characters
should match tests for consonants, such as when [identifying 
syllables](#stage-1-identifying-syllables-and-other-sequences), but that, unlike
standard consonants, they carry no inherent vowel. The lack of an
inherent vowel is important during the [initial
reordering](#stage-2-initial-reordering) stage.

The `CONSONANT_PRE_REPHA` subclass is used only for the "Dot Reph"
(`U+0D4E`), a dead-consonant version of "Reph" (or "Repha"). In modern
Malayalam orthography, "Dot Reph" is uncommon. As with
`CONSONANT_DEAD`, this subclass should match tests for
consonants. Because the "Dot Reph" character is a "Reph", however, it
must be treated as a "Reph" during the initial and final reordering stages.

Other characters, such as symbols and miscellaneous letters (for
example, letter-like symbols that only occur as standalone entities
and do not occur within syllables), need no special attention from the
shaping engine, so they are not assigned a shaping class.

Numbers are classified as `NUMBER`, even though they evoke no special
behavior from the Indic shaping rules, because there are OpenType features that
might affect how the respective glyphs are drawn, such as `tnum`,
which specifies the usage of tabular-width numerals, and `sups`, which
replaces the default glyphs with superscript variants.

Marks and dependent vowels are further labeled with a mark-placement
subclass, which indicates where the glyph will be placed with respect
to the base character to which it is attached. The actual position of
the glyphs is determined by the lookups found in the font's <abbr title="Glyph Positioning table">GPOS</abbr>
table, however, the shaping rules for Indic scripts require that the
shaping engine be able to identify marks by their general
position. 

For example, left-side dependent vowels (matras), classified
with `LEFT_POSITION`, must frequently be reordered, with the final
position determined by whether or not other letters in the syllable
have formed ligatures or combined into conjunct forms. Therefore, the
`LEFT_POSITION` subclass of the character must be tracked throughout
the shaping process.

There are four basic _mark-placement subclasses_ for dependent vowels
(matras). Each corresponds to the visual position of the matra with
respect to the syllable base to which it is attached:

  - `LEFT_POSITION` matras are positioned to the left of the syllable base.
  - `RIGHT_POSITION` matras are positioned to the right of the syllable base.
  - `TOP_POSITION` matras are positioned above the syllable base.
  - `BOTTOM_POSITION` matras are positioned below syllable base.
  
These positions may also be referred to elsewhere in shaping documents as:

  - _Pre-base_ matras
  - _Post-base_ matras
  - _Above-base_ matras
  - _Below-base_ matras
  
respectively. The `LEFT`, `RIGHT`, `TOP`, and `BOTTOM` designations
corresponds to Unicode's preferred terminology. The _Pre_, _Post_,
_Above_, and _Below_ terminology is used in the official descriptions
of OpenType <abbr title="Glyph Substitution table">GSUB</abbr> and <abbr title="Glyph Positioning table">GPOS</abbr> features. Shaping engines may, internally,
use whichever terminology is preferred.

In addition, dependent-vowel codepoints that are composed of multiple
components will be designated in character tables as having a compound
_mark-placement subclass_, such as `TOP_AND_RIGHT` or `LEFT_AND_RIGHT`. 

However, these multi-part matras are decomposed into separate matra
components during the shaping process. After the decomposition, each
matra component will belong to exactly one of the four basic
_mark-placement subclasses_.

For most mark and dependent-vowel codepoints, the _mark-placement
subclass_ is synonymous with the `Indic Positional Category` defined
in Unicode. However, there are some distinctions, where the defined
category does not fully capture the behavior of the character in the
shaping process. 

Malayalam includes two special marks that are classified as
`PURE_KILLER`, "Vertical Bar Virama" (`U+0D3B`) and "Circular Virama"
(`U+0D3C`). These marks, like the Virama or "Halant", suppress the
inherent vowel of a consonant. However, unlike "Halant", the use of a
`PURE_KILLER` prevents the formation of ligatures and conjuncts, and
the mark itself is always rendered explicitly. 

Consequently, these marks behave like dependent-vowel marks
(matras). Shaping engines may choose to treat them as matras for simplicity.

### Malayalam character tables ###

Separate character tables are provided for the Malayalam and Vedic
Extensions blocks as well as for other miscellaneous characters that
are used in `<mlm2>` text runs:

  - [Malayalam character table](character-tables/character-tables-malayalam.md#malayalam-character-table)
  - [Vedic Extensions character table](character-tables/character-tables-malayalam.md#vedic-extensions-character-table)
  - [Miscellaneous character table](character-tables/character-tables-malayalam.md#miscellaneous-character-table)

The tables list each codepoint along with its Unicode general
category, its shaping class, and its mark-placement subclass. The
codepoint's Unicode name and an example glyph are also provided.

For example:

:::{table} Example character table

| Codepoint | Unicode category | Shaping class     | Mark-placement subclass    | Glyph                        |
|:----------|:-----------------|:------------------|:---------------------------|:-----------------------------|
|`U+0D01`   | Mark [Mn]        | BINDU             | TOP_POSITION               | &#x0D01; Candrabindu         |
| | | | |
|`U+0D15`   | Letter           | CONSONANT         | _null_                     | &#x0D15; Ka                  |
:::


Codepoints with no assigned meaning are
designated as _unassigned_ in the _Unicode category_ column. 

Assigned codepoints with a _null_ in the _Shaping class_
column evoke no special behavior from the shaping engine.

The _Mark-placement subclass_ column indicates mark-placement
positioning for codepoints in the _Mark_ category. Assigned, non-mark
codepoints have a _null_ in this column and evoke no special
mark-placement behavior. Marks tagged with [Mn] in the _Unicode
category_ column are categorized as non-spacing; marks tagged with
[Mc] are categorized as spacing-combining.

Some codepoints in the tables use a _Shaping class_ that
differs from the codepoint's Unicode _General Category_. The _Shaping
class_ takes precedence during OpenType shaping, as it captures more
specific, script-aware behavior.


#### Special-function codepoints ####

Other important characters that may be encountered when shaping runs
of Malayalam text include the dotted-circle placeholder (`U+25CC`), the
zero-width joiner (`U+200D`) and zero-width non-joiner (`U+200C`), and
the no-break space (`U+00A0`).

Each of these is of particular importance to shaping engines, because
these codepoints interact with the shaping engine, the text run, and
the active font, either to mediate non-default shaping behavior or to
relay information about the current shaping process.

The dotted-circle placeholder is frequently used when displaying a
dependent vowel (matra) or a combining mark in isolation. Real-world
text syllables may also use other characters, such as hyphens or dashes,
in a similar placeholder fashion; shaping engines should cope with
this situation gracefully.

Dotted-circle placeholder characters (like any Unicode codepoint) can
appear anywhere in text input sequences and should be rendered
normally. <abbr title="Glyph Positioning table">GPOS</abbr> positioning lookups should attach mark glyphs to dotted
circles as they would to other non-mark characters. As visible glyphs,
dotted circles can also be involved in <abbr title="Glyph Substitution table">GSUB</abbr> substitutions.

In addition to the default input-text handling process, shaping
engines may also insert dotted-circle placeholders into the text
sequence. Dotted-circle insertions are required when a non-spacing
mark or dependent sign is formed with no base character present.

This requirement covers:

  - Dependent signs that are assigned their own individual Unicode
    codepoints (such as most dependent-vowel marks or matras)
  
  - Dependent signs that are formed only by specific sequences of
    other codepoints (such as <samp>"Reph"</samp>)


The zero-width joiner (<abbr>ZWJ</abbr>) is primarily used to prevent the formation
of a conjunct from a <samp>"_Consonant_,Halant,_Consonant_"</samp> sequence.

  - The sequence <samp>"_Consonant_,Halant,ZWJ,_Consonant_"</samp> blocks the
    formation of a conjunct between the two consonants. 

Note, however, that the <samp>"_Consonant_,Halant"</samp> subsequence in the above
example may still trigger a half-forms feature. To prevent the
application of the half-forms feature in addition to preventing the
conjunct, the zero-width non-joiner (<abbr>ZWNJ</abbr>) must be used instead.

  - The sequence <samp>"_Consonant_,Halant,ZWNJ,_Consonant_"</samp> should produce
    the first consonant in its standard form, followed by an explicit
    <samp>"Halant"</samp>. 

A secondary usage of the zero-width joiner is to prevent the formation of
<samp>"Reph"</samp>.

  - An initial <samp>"Ra,Halant,ZWJ"</samp> sequence should not produce a <samp>"Reph"</samp>,
    even where an initial <samp>"Ra,Halant"</samp> sequence without the zero-width
    joiner would otherwise produce a <samp>"Reph"</samp>.
    
> Note: Malayalam differs from many Indic scripts in that <samp>"Reph"</samp>
> usage is rare in the modern orthography. In word-initial positions, a
> <samp>"Ra,Halant"</samp> sequence is typically replaced by a dead-consonant form,
> <samp>"Chillu R"</samp>.


The <abbr title="Zero-Width Joiner">ZWJ</abbr> and <abbr title="Zero-Width Non Joiner">ZWNJ</abbr> characters are, by definition, non-printing control
characters and have the _Default_Ignorable_ property in the Unicode
Character Database. In standard text-display scenarios, their function
is to signal a request from the user to the shaping engine for some
particular non-default behavior. As such, they are not rendered
visually.

> Note: Naturally, there are special circumstances where a user or
> document might need to request that a <abbr title="Zero-Width Joiner">ZWJ</abbr> or <abbr title="Zero-Width Non Joiner">ZWNJ</abbr> be rendered
> visually, such as when illustrating the OpenType shaping process, or
> displaying Unicode tables.

Because the <abbr title="Zero-Width Joiner">ZWJ</abbr> and <abbr title="Zero-Width Non Joiner">ZWNJ</abbr> are non-printing control characters, they can
be ignored by any portion of a software text-handling stack not
involved in the shaping operations that the <abbr title="Zero-Width Joiner">ZWJ</abbr> and <abbr title="Zero-Width Non Joiner">ZWNJ</abbr> are designed
to interface with. For example, spell-checking or collation functions
will typically ignore <abbr title="Zero-Width Joiner">ZWJ</abbr> and <abbr title="Zero-Width Non Joiner">ZWNJ</abbr>.

Similarly, the <abbr title="Zero-Width Joiner">ZWJ</abbr> and <abbr title="Zero-Width Non Joiner">ZWNJ</abbr> should be ignored by the shaping engine
when matching sequences of codepoints against the backtrack and
lookahead sequences of a font's <abbr title="Glyph Substitution table">GSUB</abbr> or <abbr title="Glyph Positioning table">GPOS</abbr> lookups.

For example:

  - A lookup that substitutes an alternate version of a
    dependent-vowel (matra) glyph when it is preceded by <samp>"Ka,Halant,Tta"</samp>
    should still be applied if the dependent-vowel codepoint is preceded
    by <samp>"Ka,Halant,ZWJ,Tta"</samp> in the text run.

The no-break space (<abbr>NBSP</abbr>) is primarily used to display those
codepoints that are defined as non-spacing (marks, dependent vowels
(matras), below-base consonant forms, and post-base consonant forms)
in an isolated context, as an alternative to displaying them
superimposed on the dotted-circle placeholder. These sequences will
match <samp>"NBSP,ZWJ,Halant,_Consonant_"</samp>, <samp>"NBSP,_mark_"</samp>, or <samp>"NBSP,_matra_"</samp>. 

In addition to general punctuation, runs of Malayalam text often use the
danda (`U+0964`) and double danda (`U+0965`) punctuation marks from
the Devanagari block.


## The `<mlm2>` shaping model ##

Processing a run of `<mlm2>` text involves six top-level stages:

1. Identifying syllables and other sequences
2. Initial reordering
3. Applying the basic substitution features from <abbr>GSUB</abbr>
4. Final reordering
5. Applying all remaining substitution features from <abbr>GSUB</abbr>
6. Applying all remaining positioning features from <abbr>GPOS</abbr>


As with other Indic scripts, the initial reordering stage and the
final reordering stage each involve applying a set of several
script-specific rules. The basic substitution features must be applied
to the run in a specific order. The remaining substitution features in
stage five, however, do not have a mandatory order.

Indic scripts follow many of the same shaping patterns, but they
differ in a few critical characteristics that the shaping engine must
track. These include:

  - The position of the base consonant in a syllable.
  
  - The final position of <samp>"Reph"</samp>.
  
  - Whether <samp>"Reph"</samp> must be requested explicitly or if it is formed by
    a specific, implicit sequence.
	
  - Whether the below-base forms feature is applied only to consonants
    before the syllable base, only to consonants after the base
    consonant, or to both.
	
  - The ordering positions for dependent vowels
    (matras). Specifically, right-side, above-base, and below-base
    matras follow different rules in different scripts. 
	All Indic scripts position left-side matras in the same
    manner, in the ordering position `POS_PREBASE_MATRA`. 

With regard to these common variations, Malayalam's specific shaping
characteristics include:

  - `BASE_POS_LAST` = The base consonant of a syllable is the last
     consonant, not counting any special final-consonant forms.

  - `REPH_POS_AFTER_MAIN` = <samp>"Reph"</samp> is ordered after the syllable base.

  - `REPH_MODE_LOGICAL_REPHA` = <samp>"Reph"</samp> is encoded as its own Unicode
     codepoint (<samp>"Repha"</samp>), but it must still be reordered. 

  - `BLWF_MODE_PRE_AND_POST` = The below-forms feature is applied both to
     pre-base consonants and to post-base consonants.

  - `MATRA_POS_TOP` = _null_  = Unlike most other Indic scripts, Malayalam
     does not use any above-base matras. Therefore, this shaping
     characteristic does not apply.

  - `MATRA_POS_RIGHT` = `POS_AFTER_POST` = Right-side matras are
     ordered after all post-base consonant forms.

  - `MATRA_POS_BOTTOM` = `POS_AFTER_POST` = Below-base matras are
     ordered after all post-base consonant forms.

These characteristics determine how the shaping engine must reorder
certain glyphs, how base consonants are determined, and how <samp>"Reph"</samp>
should be encoded within a run of text.

> Note: Unlike most other Indic scripts, Malayalam does not use
> above-base matras. Therefore `MATRA_POS_TOP` can be set to _null_.

### Stage 1: Identifying syllables and other sequences ###

A syllable in Malayalam consists of a valid orthographic sequence
that may be followed by a "tail" of modifier signs. 

> Note: The Malayalam Unicode block enumerates five modifier signs,
> "Combining Anusvara Above" (`U+0D00`), "Candrabindu" (`U+0D01`),
> "Anusvara" (`U+0D02`), "Visarga" (`U+0D03`), and "Avagraha"
> (`U+0D3D`). In addition, Sanskrit text written in Malayalam may 
> include additional signs from Vedic Extensions block.

Each syllable contains exactly one vowel sound. Valid syllables may
begin with either a consonant or an independent vowel. 

If the syllable begins with a consonant, then the consonant that
provides the vowel sound is referred to as the "base" consonant. If
the syllable begins with an independent vowel, that independent vowel
is the syllable's only vowel sound and serves as the "base". 

> Note: A consonant that is not accompanied by a dependent vowel (matra) sign
> carries the script's inherent vowel sound. This vowel sound is changed
> by a dependent vowel (matra) sign following the consonant.

From the shaping engine's perspective, the main distinction between a
syllable with a base consonant and a syllable with an
independent-vowel base is that a syllable with an independent-vowel
base is less likely to include additional consonants in special forms
and less likely to include dependent vowel signs
(matras). Therefore, in the common case, vowel-based syllables may
involve less reordering, substitution feature applications, and other
processing than consonant-based syllables.

In some languages and orthographies, vowel-based syllables are
not permitted to include additional consonants or matras, and certain
<abbr title="Glyph Substitution table">GSUB</abbr> substitution features do not occur. However, there are often
known exceptions, and real-world text makes no such guarantees. 

> Note: Shaping engines may choose to treat independent-vowel bases 
> like base consonants for the sake of simplicity or code
> reuse.
>
> However, implementations that take this approach should note
> that removing the distinction between base consonants and
> independent-vowel bases entirely may have unintended
> consequences. Making guarantees about the correctness of the results
> or about language-specific tests is out of scope for this document.

Generally speaking, the base consonant is the final consonant of the
syllable and its vowel sound designates the end of the syllable. This
rule is synonymous with the `BASE_POS_LAST` characteristic mentioned
earlier. 

Non-base consonants in a valid syllable will be separated by <samp>"Halant"</samp>
marks. Pre-base consonants will be followed by <samp>"Halant"</samp>, while
post-base consonants will be preceded by <samp>"Halant"</samp>.

	Pre-baseC Halant BaseC Halant Post-baseC
	
The algorithm for correctly identifying the base consonant includes a
test to recognize these sequences and not mis-identify the base
consonant.

All consonants in Malayalam can potentially occur in pre-base
position. The <samp>"Halant"</samp> marks on pre-base consonants indicate that they
carry no vowel. Instead, they affect syllable pronunciation by
combining with the base consonant (e.g., "_thr_" or "_spl_").

Three consonants in Malayalam are allowed to occur in post-base
position: <samp>"Ya"</samp>, <samp>"Va"</samp>, and <samp>"Ra"</samp>. The post-base <samp>"Ra"</samp> is reordered to
before the base consonant or syllable base during the final-reordering stage of the
shaping process. The post-base forms of <samp>"Ya"</samp> and <samp>"Va"</samp>
remain in post-base position.

Malayalam also includes one consonant that can take on a below-base
form, <samp>"La"</samp>.

As with other Indic scripts, the consonant <samp>"Ra"</samp> receives special
treatment. Malayalam differs from many Indic scripts in that <samp>"Reph"</samp>
usage is rare in the modern orthography.

In word-initial positions, a <samp>"Ra,Halant"</samp> sequence is typically
replaced by a dead-consonant form, <samp>"Chillu R"</samp>. 

Malayalam text runs may also include the explicit variant of <samp>"Reph"</samp>,
the <samp>"Dot Reph"</samp> (`U+0D4E`), also known as <samp>"Repha"</samp>. 

> Note: Modern Malayalam orthography prefers using the <samp>"Chillu R"</samp>
> instead of <samp>"Reph"</samp>. Therefore, Malayalam fonts may omit
> implementation of the <samp>"Reph"</samp> substitution entirely.

As is the case with <samp>"Reph"</samp>, <samp>"Repha"</samp> characters must be reordered after the
syllable-identification stage is complete. This is the
`REPH_MODE_LOGICAL_REPHA` shaping characteristic.

> Note: Generally speaking, OpenType fonts will implement support for
> any below-base, post-base, and pre-base-reordering consonant forms
> by including the necessary substitution rules in their `blwf`,
> `pstf`, and `pref` lookups in <abbr title="Glyph Substitution table">GSUB</abbr>.
>
> Consequently, whenever shaping engines need to determine whether or 
> not a given consonant can take on such a special form, the most
> appropriate test is to check if the consonant is included in the
> relevant <abbr title="Glyph Substitution table">GSUB</abbr> lookup. Other implementations are possible, such as
> maintaining static tables of consonants, but checking for <abbr title="Glyph Substitution table">GSUB</abbr>
> support ensures that the expected behavior is implemented in the
> active font, and is therefore the most reliable approach.


In addition to valid syllables, standalone sequences may occur, such
as when an isolated codepoint is shown in example text.

> Note: Foreign loanwords, when written in the Malayalam script, may
> not adhere to the syllable-formation rules described above. In
> particular, it is not uncommon to encounter foreign loanwords that
> contain a word-final suffix of consonants.
>
> Nevertheless, such word-final suffixes will be correctly matched by
> the regular expressions listed below. These loanwords are pronounced
> different, which raises issues for potential readers, but the
> character sequences do not affect the shaping process.


Syllables should be identified by examining the run and matching
glyphs, based on their categorization, using regular expressions. 

The following general-purpose Indic-shaping regular expressions can be
used to match Malayalam syllables.

The regular expressions utilize the shaping classes from the tables
above. For the purpose of syllable identification, more general
classes can be used, as defined in the following table. This
simplifies the resulting expressions. 

```markdown
_ra_		= The consonant "Ra" 
_consonant_	= ( `CONSONANT` | `CONSONANT_DEAD` ) - _ra_
_vowel_		= `VOWEL_INDEPENDENT`
_nukta_	  	= `NUKTA`
_halant_	= `VIRAMA`
_zwj_		= `JOINER`
_zwnj_		= `NON_JOINER`
_matra_		= `VOWEL_DEPENDENT` | `PURE_KILLER`
_syllablemodifier_	= `SYLLABLE_MODIFIER` | `BINDU` | `VISARGA` | `GEMINATION_MARK`
_vedicsign_	= `CANTILLATION`
_placeholder_	= `PLACEHOLDER` | `CONSONANT_PLACEHOLDER` | `NUMBER`
_dottedcircle_	= `DOTTED_CIRCLE`
_repha_		= `CONSONANT_PRE_REPHA`
_consonantmedial_	= `CONSONANT_MEDIAL`
_symbol_	= `SYMBOL` | `AVAGRAHA`
_consonantwithstacker_	= `CONSONANT_WITH_STACKER`
_other_		= `OTHER` | `MODIFYING_LETTER`
```


> Note: the _ra_ identification class is mutually exclusive with 
> the _consonant_ class. The union of the _consonant_ and _ra_ classes
> is used in the regular expression elements below in order to
> correctly identify <samp>"Ra"</samp> characters that do not trigger <samp>"Reph"</samp> or
> <samp>"Rakaar"</samp> shaping behavior.
>
> Note, also, that the cantillation mark "combining Ra" in the
> Devanagari Extended block does _not_ belong to the _ra_
> identification class, and that the other "combining consonant"
> cantillation marks in the Devanagari Extended block do not belong to
> the _consonant_ identification class.

> Note: The _placeholder_ identification class includes codepoints
> that are often used in place of vowels or consonants when a document
> needs to display a matra, mark, or special form in isolation or
> in another context beyond a standard syllable. Examples of
> _placeholder_ codepoints include hyphens and non-breaking
> spaces. Sequences that utilize this approach should be identified as
> "standalone" syllables.
>
> The _placeholder_ identification class also includes numerals, which
> are commonly used as word substitutes within normal text. Examples
> include ordinals (e.g., "4th").

> Note: The _other_ identification class includes codepoints that
> do not interact with adjacent characters for shaping purposes. Even
> though some of these codepoints (such as `MODIFYING_LETTER`) can
> occur within words, they evoke no behavior from the shaping
> engine and do not factor into the regular expressions that
> follow. Therefore, the shaping engine may choose to ignore them
> during syllable identification; they are listed here for completeness.

These identification classes form the bases of the following regular
expression elements:

```markdown
C	= _consonant_ | _ra_
Z	= _zwj_ | _zwnj_
REPH	= (_ra_ _halant_) | _repha_
CN		= C _zwj_? _nukta_?
FORCED_RAKAR	= _zwj_ _halant_ _zwj_ _ra_
S	= _symbol_ _nukta_?
MATRA_GROUP	= Z{0,3} _matra_ _nukta_? (_halant_ | FORCED_RAKAR)?
SYLLABLE_TAIL	= (Z? _syllablemodifier_ _syllablemodifier_? _zwnj_?)? _vedicsign_{0,3}
HALANT_GROUP	= Z? _halant_ (_zwj_ _nukta_?)?
FINAL_HALANT_GROUP	= HALANT_GROUP | (_halant_ _zwnj_)
MEDIAL_GROUP	= _consonantmedial_?
HALANT_OR_MATRA_GROUP	= FINAL_HALANT_GROUP | MATRA_GROUP*)
```

> Note: Practically speaking, shaping engines are highly unlikely to
> encounter more than 4 sequential `(MATRA_GROUP)`
> instances in any real-word syllables. Thus, implementations may
> choose to limit occurrences by limiting the above expressions to a
> finite length, such as `(MATRA_GROUP){0,4}` .


Using the above elements, the following regular expressions define the
possible syllable types:

A consonant-based syllable will match the expression:
```markdown
(_repha_|_consonantwithstacker_)? (CN HALANT_GROUP)* CN MEDIAL_GROUP HALANT_OR_MATRA_GROUP SYLLABLE_TAIL
```

> Note: Practically speaking, shaping engines are highly unlikely to
> encounter more than 4 sequential `(CN HALANT_GROUP)`
> instances in any real-word syllables. Thus, implementations may
> choose to limit occurrences by limiting the above expressions to a
> finite length, such as `(CN HALANT_GROUP){0,4}` .

A vowel-based syllable will match the expression:
```markdown
REPH? _vowel_ _nukta_? (_zwj_ | (HALANT_GROUP CN)* MEDIAL_GROUP HALANT_OR_MATRA_GROUP SYLLABLE_TAIL)
```

> Note: Practically speaking, shaping engines are highly unlikely to
> encounter more than 4 sequential `(HALANT_GROUP CN)`
> instances in any real-word syllables. Thus, implementations may
> choose to limit occurrences by limiting the above expressions to a
> finite length, such as `(HALANT_GROUP CN){0,4}` .

A standalone syllable will match the expression:
```markdown
((_repha_|_consonantwithstacker_)? _placeholder_ | REPH? _dottedcircle_) _nukta_? (HALANT_GROUP CN)* MEDIAL_GROUP HALANT_OR_MATRA_GROUP SYLLABLE_TAIL
```

> Note: Practically speaking, shaping engines are highly unlikely to
> encounter more than 4 sequential `(HALANT_GROUP CN)`
> instances in any real-word syllables. Thus, implementations may
> choose to limit occurrences by limiting the above expressions to a
> finite length, such as `(HALANT_GROUP CN){0,4}` .

> Note: Although they are labeled as "standalone syllables" here,
> many sequences that match the standalone regular expression above
> are instances where a document needs to display a matra, combining
> mark, or special form in isolation. Such sequences might not have
> any significance with regard to the definition of syllables used in
> the language or orthography of the text.

A symbol-based syllable will match the expression:
```markdown
S SYLLABLE_TAIL
```

A broken syllable will match the expression:
```markdown
REPH? _nukta_? (HALANT_GROUP CN)* MEDIAL_GROUP HALANT_OR_MATRA_GROUP SYLLABLE_TAIL
```

> Note: Practically speaking, shaping engines are highly unlikely to
> encounter more than 4 sequential `(HALANT_GROUP CN)`
> instances in any real-word syllables. Thus, implementations may
> choose to limit occurrences by limiting the above expressions to a
> finite length, such as `(HALANT_GROUP CN){0,4}` .


The primary problem involved in shaping broken syllables is the lack
of a syllable base (either a base consonant or an independent
vowel). Without a syllable base, the shaping engine cannot perform
<abbr title="Glyph Positioning table">GPOS</abbr> positioning and other contextual operations that are required
later in the shaping process.

To make up for this limitation, shaping engines should insert a
dotted-circle placeholder (`U+25CC`) character into the text stream
where the missing syllable base was expected to occur. This
placeholder allows the shaping process to proceed on a best-effort
basis at handling the broken-syllable sequence, but making guarantees
about the orthographic correctness or preferred appearance of the
final result is out of scope for this document.

Shaping engines can perform this dotted-circle insertion at any point
after the broken syllable has been recognized and before <abbr title="Glyph Substitution table">GSUB</abbr> features
are applied. However, the best results will likely be attained by
performing the insertion immediately, before proceeding to
stage 2. This will enable the maximum number of <abbr title="Glyph Substitution table">GSUB</abbr> and <abbr title="Glyph Positioning table">GPOS</abbr> features
in the active font to be correctly applied to the text run by ensuring
that all reordering, tagging, and sorting algorithms are executed as
usual.

> Note: In software stacks where other text-handling operations, such
> as Unicode normalization and localization, are performed before the
> text run is passed to the shaping engine, there is a potential for
> the dotted-circle insertion to cause unexpected effects.
>
> For example, if a `ccmp` or `locl` feature substitutes the default
> dotted-circle placeholder glyph with a variant glyph of a different
> size or weight for the (`U+25CC`) codepoint, then any shaping engine
> which relies on another software component to handle that
> functionality must take additional care to ensure consistency.


The expressions above use state-machine syntax from the Ragel
state-machine compiler. The operators represent:

```markdown
a* = zero or more copies of a
b+ = one or more copies of b
c? = optional instance of c
d{n} = exactly n copies of d
d{,n} = zero to n copies of d
d{n,} = n or more copies of d
d{n,m} = n to m copies of d
!e = not e
^f = character-level not f
g.h = concatenation of g and h
i|j = i or j
( ) = grouping of expression elements
```


After the syllables have been identified, each of the subsequent 
shaping stages occurs on a per-syllable basis.

### Stage 2: Initial reordering ###

The initial reordering stage is used to relocate glyphs from the
phonetic order in which they occur in a run of text to the
orthographic order in which they are presented visually.

> Note: Primarily, this means moving dependent-vowel (matra) glyphs, 
> <samp>"Repha"</samp> glyphs, and other consonants that take special
> treatment in some circumstances. <samp>"Ra"</samp>, <samp>"Va"</samp>, <samp>"La"</samp>, and <samp>"Ya"</samp> occasionally
> take on special forms, depending on their position in the syllable.
>
> These reordering moves are mandatory. The final-reordering stage
> may make additional moves, depending on the text and on the features
> implemented in the active font.

The syllable should be processed by tagging each glyph with its
intended position based on its ordering category. After all glyphs
have been tagged, the entire syllable should be sorted in stable order,
so that glyphs of the same ordering category remain in the same
relative position with respect to each other.

The final sort order of the ordering categories should be:


	POS_RA_TO_BECOME_REPH
	POS_PREBASE_MATRA
	POS_PREBASE_CONSONANT

	POS_SYLLABLE_BASE
	POS_AFTER_MAIN

	POS_ABOVEBASE_CONSONANT

	POS_BEFORE_SUBJOINED
	POS_BELOWBASE_CONSONANT
	POS_AFTER_SUBJOINED

	POS_BEFORE_POST
	POS_POSTBASE_CONSONANT
	POS_AFTER_POST

	POS_FINAL_CONSONANT
	POS_SMVD


This sort order enumerates all of the possible final positions to
which a codepoint might be reordered, across all of the Indic
scripts. It includes some ordering categories not utilized in
Malayalam. 

The basic positions (left to right) are <samp>"Reph"</samp> (`POS_RA_TO_BECOME_REPH`), dependent
vowels (matras) and consonants positioned before the base
consonant or syllable base (`POS_PREBASE_MATRA` and `POS_PREBASE_CONSONANT`), the base
consonant or syllable base (`POS_SYLLABLE_BASE`), above-base consonants
(`POS_ABOVEBASE_CONSONANT`), below-base consonants
(`POS_BELOWBASE_CONSONANT`), consonants positioned after the base consonant or syllable base
(`POS_POSTBASE_CONSONANT`), syllable-final consonants (`POS_FINAL_CONSONANT`),
and syllable-modifying or Vedic signs (`POS_SMVD`).

In addition, several secondary positions are defined to handle various
reordering rules that deal with relative, rather than absolute,
positioning. `POS_AFTER_MAIN` means that a character must be
positioned immediately after the syllable base. `POS_BEFORE_SUBJOINED`
and `POS_AFTER_SUBJOINED` mean that a character must be positioned
before or after any below-base consonants, respectively. Similarly,
`POS_BEFORE_POST` and `POS_AFTER_POST` mean that a character must be
positioned before or after any post-base consonants, respectively. 

For shaping-engine implementers, the names used for the ordering
categories matter only in that they are unambiguous. 

For a definition of the "base" consonant, refer to stage 2, step 1, which
follows.

#### Stage 2, step 1: Base consonant ####

The first step is to determine the base consonant of the syllable, if
there is one, and tag it as `POS_SYLLABLE_BASE`.

In a syllable that begins with an independent vowel, the independent
vowel will always serve as the syllable base, and it should be tagged
as `POS_SYLLABLE_BASE`. The shaping engine can then proceed to step 2.

In a standalone sequence or other syllable that begins with a placeholder
or dotted circle, the placeholder or dotted circle will always serve
as the syllable base, and it should be tagged as
`POS_SYLLABLE_BASE`. The shaping engine can then proceed to step 2.

In a syllable that begins with a consonant, the shaping engine must
determine the base consonant by a script-specific algorithm.

> Note: Shaping engines may choose to treat independent-vowel bases 
> like base consonants for the sake of simplicity or code
> reuse.
>
> However, implementations that take this approach should note
> that removing the distinction between base consonants and
> independent-vowel bases entirely may have unintended
> consequences. Making guarantees about the correctness of the results
> or about language-specific tests is out of scope for this document.

The base consonant is defined as the consonant in a consonant-based
syllable that carries the syllable's vowel sound. That vowel sound
will either be provided by the script's inherent vowel (in which case
it is not written with a separate character) or the sound will be designated
by the addition of a dependent-vowel (matra) sign.


<!--- > Because vowel-based syllables will not include consonants and
> because independent vowels do not take on special forms or require
> reordering, many of the steps that follow will involve no
> work for a vowel-based syllable. However, vowel-based syllables must
> still be sorted and their marks handled correctly, and <abbr title="Glyph Substitution table">GSUB</abbr> and <abbr title="Glyph Positioning table">GPOS</abbr>
> lookups must be applied. These steps of the shaping process follow
> the same rules that are employed for consonant-based syllables.
--->

While performing the base-consonant search, shaping engines may
also encounter special-form consonants, including below-base
consonants and post-base consonants. Each of these special-form
consonants must also be tagged (`POS_BELOWBASE_CONSONANT`,
`POS_POSTBASE_CONSONANT`, respectively). 

Any pre-base-reordering consonant (such as a pre-base-reordering <samp>"Ra"</samp>)
encountered during the base-consonant search must be tagged
`POS_POSTBASE_CONSONANT`. 
 
> Note: Shaping engines may choose any method to identify consonants that
> have below-base, post-base, or pre-base-reordering forms while
> executing the above algorithm. For example, one implementation may
> choose to maintain a static table of special-form consonants to
> compare against the text run. Another implementation might examine
> the active font to see if it includes a `blwf`, `pstf`, or `pref`
> lookup in the <abbr title="Glyph Substitution table">GSUB</abbr> table that affects the consonants encountered in
> the syllable.
>
> However, checking for <abbr title="Glyph Substitution table">GSUB</abbr> support ensures that the expected
> behavior is implemented in the active font, and is therefore the
> most reliable approach.


The algorithm for determining the base consonant is

  - If the syllable starts with <samp>"Ra,Halant"</samp> and the syllable contains
    more than one consonant, exclude the starting <samp>"Ra"</samp> from the list of
    consonants to be considered. 
  - Starting from the end of the syllable, move backwards until a consonant is found.
      * If the consonant is the first consonant, stop.
      * If the consonant is preceded by the sequence <samp>"Halant,ZWJ"</samp>, stop.
      * If the consonant has a below-base form, tag it as
        `POS_BELOWBASE_CONSONANT`, then move to the previous consonant. 
      * If the consonant has a post-base form, tag it as
        `POS_POSTBASE_CONSONANT`, then move to the previous consonant. 
      * If the consonant is a pre-base-reordering <samp>"Ra"</samp>, tag it as
        `POS_POSTBASE_CONSONANT`, then move to the previous consonant. 
      * If none of the above conditions is true, stop.
  - The consonant stopped at will be the base consonant.

Malayalam includes a pre-base-reordering <samp>"Ra"</samp>.  A <samp>"Halant,Ra"</samp> sequence
after the base consonant or syllable base will be reordered to a pre-base position
during the final-reordering stage.

Malayalam includes two consonants that can take on
post-base form: <samp>"Ya"</samp> and <samp>"Va"</samp>.

:::{figure-md}
![Post-base Ya formation](/images/malayalam/malayalam-pstf-ya.svg "Post-base Ya formation"){.shaping-demo .inline-svg .greyscale-svg #malayalam-pstf-ya}

Post-base Ya formation
:::

```{svg-color-toggle-button} malayalam-pstf-ya
```


:::{figure-md}
![Post-base Va formation](/images/malayalam/malayalam-pstf-va.svg "Post-base Va formation"){.shaping-demo .inline-svg .greyscale-svg #malayalam-pstf-va}

Post-base Va formation
:::

```{svg-color-toggle-button} malayalam-pstf-va
```


Malayalam includes one consonant that can take on a below-base form:

  - <samp>"Halant,La"</samp> (after the base consonant or syllable base) takes on
    a below-base form.

:::{figure-md}
![Below-base La formation](/images/malayalam/malayalam-blwf.svg "Below-base La formation"){.shaping-demo .inline-svg .greyscale-svg #malayalam-blwf}

Below-base La formation
:::

```{svg-color-toggle-button} malayalam-blwf
```


> Note: Because Malayalam employs the `BLWF_MODE_PRE_AND_POST` shaping
> characteristic, consonants with below-base special forms may occur
> before or after the syllable base. 
> 
> During the base-consonant search, only the <samp>"Halant,_consonant_"</samp>
> pattern following the syllable base for these below-base forms will
> be encountered. Stage 2, step 5 below ensures that the <samp>"_consonant_,Halant"</samp>
> pattern preceding the syllable base for these below-base forms will
> also be tagged correctly.


#### Stage 2, step 2: Matra decomposition ####

Second, any two-part dependent vowels (matras) must be decomposed
into their left-side and right-side components. Malayalam has three
two-part dependent vowels, "O" (`U+0D4A`), "Oo" (`U+0D4B`), and "Au"
(`U+0D4C`). Each has a canonical decomposition, so this step is
unambiguous. 

> "O" (`U+0D4A`) decomposes to "`U+0D46`,`U+0D3E`"
>
> "Oo" (`U+0D4B`) decomposes to "`U+0D47`,`U+0D3E`"
>
> "Au" (`U+0D4C`) decomposes to "`U+0D46`,`U+0D57`"

Because this decomposition is a character-level operation, the shaping
engine may choose to perform it earlier, such as during an initial
Unicode-normalization stage. However, all such decompositions must be
completed before the shaping engine begins step three, below.

:::{figure-md}
![Two-part matra decomposition](/images/malayalam/malayalam-matra-decompose.svg "Two-part matra decomposition"){.shaping-demo .inline-svg .greyscale-svg #malayalam-matra-decompose}

Two-part matra decomposition
:::

```{svg-color-toggle-button} malayalam-matra-decompose
```


#### Stage 2, step 3: Tag matras ####

Third, all left-side dependent-vowel (matra) signs, including those that
resulted from the preceding decomposition step, must be tagged to be
moved to the beginning of the syllable, with `POS_PREBASE_MATRA`.

All right-side dependent-vowel (matra) signs are tagged
`POS_AFTER_POST`.

All below-base dependent-vowel (matra) signs are tagged
`POS_AFTER_POST`.

For simplicity, shaping engines may choose to tag single-part matras
in an earlier text-processing step, using the information in the
_Mark-placement subclass_ column of the character tables. It is
critical at this step, however, that all decomposed matras are also
correctly tagged before proceeding to the next step.

#### Stage 2, step 4: Adjacent marks ####

Fourth, any subsequences of marks that include a <samp>"Nukta"</samp> and a
<samp>"Halant"</samp> or Vedic sign must be reordered so that the <samp>"Nukta"</samp> appears
first.

This means that the subsequence <samp>"Halant,Nukta"</samp> is reordered to
<samp>"Nukta,Halant"</samp> and that the subsequence <samp>"_Vedic_sign_,Nukta"</samp> is
reordered to <samp>"Nukta,_Vedic_sign"</samp>.

For subsequences of affected marks that are longer than two, the
reordering operation must be repeated until the <samp>"Nukta"</samp> is the first
character in the subsequence. No other marks in the subsequence
should be reordered.

This order is canonical in Unicode and is required so that
<samp>"_consonant_,Nukta"</samp> substitution rules from <abbr title="Glyph Substitution table">GSUB</abbr> will be correctly
matched later in the shaping process.

#### Stage 2, step 5: Pre-base consonants ####

Fifth, consonants that occur before the syllable base must be tagged
with `POS_PREBASE_CONSONANT`. Excluding initial <samp>"Ra,Halant"</samp> sequences
that will become <samp>"Reph"</samp>s: 

  - If the consonant has a below-base form, tag it as
          `POS_BELOWBASE_CONSONANT`. 
  - Otherwise, tag it as `POS_PREBASE_CONSONANT`.
  
> Note: Shaping engines may choose any method to identify consonants that
> have below-base, post-base, or pre-base-reordering forms while
> executing the above algorithm. For example, one implementation may
> choose to maintain a static table of special-form consonants to
> compare against the text run. Another implementation might examine
> the active font to see if it includes a `blwf`, `pstf`, or `pref`
> lookup in the <abbr title="Glyph Substitution table">GSUB</abbr> table that affects the consonants encountered in
> the syllable.
>
> However, checking for <abbr title="Glyph Substitution table">GSUB</abbr> support ensures that the expected
> behavior is implemented in the active font, and is therefore the
> most reliable approach.

Malayalam includes one consonant that can take on a below-base form:

  - <samp>"Halant,La"</samp> (after the base consonant or syllable base) takes on
    a below-base form.

> Note: Because Malayalam employs the `BLWF_MODE_PRE_AND_POST` shaping
> characteristic, consonants with below-base special forms may occur
> before or after the syllable base. 
> 
> During the base-consonant search in stage 2, step 1, any instances of the
> <samp>"Halant,_consonant_"</samp>  pattern following the syllable base for these
> below-base forms will be encountered. The tagging in this step
> ensures that the <samp>"_consonant_,Halant"</samp> pattern preceding the syllable
> base for these below-base forms will also be tagged correctly.


#### Stage 2, step 6: Reph ####

Sixth, initial <samp>"Ra,Halant"</samp> sequences that will become <samp>"Reph"</samp>s must be tagged with
`POS_RA_TO_BECOME_REPH`.

> Note: Malayalam differs from many Indic scripts in that <samp>"Reph"</samp>
> usage is rare in the modern orthography. In word-initial positions, a
> <samp>"Ra,Halant"</samp> sequence is typically replaced by a dead-consonant form,
> <samp>"Chillu R"</samp>. 

#### Stage 2, step 7: Final consonants ####

Seventh, all final consonants must be tagged. Consonants that occur
after the syllable base _and_ after a dependent vowel (matra) sign
must be tagged with  `POS_FINAL_CONSONANT`.

> Note: Final consonants occur only in Sinhala and should not be
> expected in `<mlm2>` text runs. This step is included here to
> maintain compatibility across Indic scripts.


#### Stage 2, step 8: Mark tagging ####

Eighth, all marks must be tagged. 

> Note: In this step, joiner and non-joiner characters must also be
> tagged according to the same rules given for marks, even though
> these characters are not categorized as marks in Unicode.

Marks in the `BINDU`, `VISARGA`, `AVAGRAHA`, `CANTILLATION`,
`SYLLABLE_MODIFIER`, `GEMINATION_MARK`, and `SYMBOL` categories should
be tagged with `POS_SMVD`. 

All <samp>"Nukta"</samp>s must be tagged with the same positioning tag as the
preceding consonant, independent vowel, placeholder, or dotted circle.

All remaining marks (not in the `POS_SMVD` category and not <samp>"Nukta"</samp>s)
must be tagged with the same positioning tag as the closest non-mark
character the mark has affinity with, so that they move together 
during the sorting step.

There are two possible cases: those marks before the syllable base
and those marks after the syllable base. In addition, an exception is
made for <samp>"Halant"</samp> marks that follow a left-side (pre-base) matra.

  1. Initially, all remaining marks should be tagged with the same
	 positioning tag as the closest preceding consonant.

  2. For each consonant after the syllable base (such as post-base
	 consonants, below-base consonants, or final consonants), all
	 remaining marks located between that current consonant and any
	 previous consonant should be tagged with the same positioning tag as
	 the current (later) consonant.
  
     In other words, all consonants preceding the syllable base "own" the
	 marks that follow them, while all consonants after the syllable base
	 "own" the marks that come before them. When a syllable does not have
	 any consonants after the syllable base, the syllable base should
	 "own" all the marks that follow it.
  
  3. Finally, <samp>"Halant"</samp> marks that follow a left-side dependent vowel
     (matra) should _not_ be tagged with the left-side matra's
     positioning tag. Instead, the <samp>"Halant"</samp> should be tagged with the
     positioning tag of the non-mark character preceding the left-side
     matra. This prevents the <samp>"Halant"</samp> mark from being moved with the
     left-side matra when the syllable is sorted.


<!--- HarfBuzz also tags everything between a post-base consonant or -->
<!--matra and another post-base consonant as belonging to the latter -->
<!--post-base consonant. --->


#### Stage 2, step 9: Sort syllable ####

With these steps completed, the syllable can be sorted into the final
sort order as listed at the beginning of stage 2.

The glyphs in the syllable should be sorted in stable order,
so that glyphs of the same ordering category remain in the same
relative position with respect to each other.


#### Stage 2, step 10: Flag sequences for possible feature applications ####

With the initial reordering complete, those glyphs in the syllable that
may have <abbr title="Glyph Substitution table">GSUB</abbr> or <abbr title="Glyph Positioning table">GPOS</abbr> features applied in stages 3, 5, and 6 should be
flagged for each potential feature. 

This flagging is preliminary; the set of potential features varies
between different scripts and which features are supported varies
between fonts. It is also possible that the application of
one feature on a glyph sequence will perform a substitution that makes
a later feature no longer applicable to the updated sequence.

Consequently, the flagging must be completed before shaping proceeds
to the stages during which features are applied.

Some shaping features, such as `locl`, can potentially apply to any
glyphs. Therefore it is not necessary to maintain a separate flag for
these features in the bitmask (or other data structure) used to track
the flags -- although shaping engines may do so if desired.

The sequences to flag are summarized in the list below; a full
description of each feature's function and interpretation is provided
in <abbr title="Glyph Substitution table">GSUB</abbr> and <abbr title="Glyph Positioning table">GPOS</abbr> application stages that follow.

  - `nukt` should match <samp>"_Consonant_,Nukta"</samp> sequences
  - `akhn` should match <samp>"Ka,Halant,Ssa"</samp> and <samp>"Ja,Halant,Nya"</samp>
  - `rphf` should match initial <samp>"Ra,Halant"</samp> sequences but _not_ match
            initial <samp>"Ra,Halant,ZWJ"</samp> sequences
  - `pref` should match <samp>"_Consonant_,Halant"</samp> in pre-base positions
  - `blwf` should match <samp>"Halant,La"</samp> in post-base positions and 
            <samp>"La,Halant"</samp> in non-initial pre-base positions
  - `half` should match <samp>"_Consonant_,Halant"</samp> in pre-base position but
           _not_ match <samp>"Ra,Halant"</samp> sequences flagged for `rphf` and
           _not_ match <samp>"_Consonant_,Halant,ZWNJ,_Consonant_"</samp> sequences
  - `pstf` should match <samp>"Halant,Ya"</samp>, <samp>"Halant,Va"</samp>, and <samp>"Halant,Ra"</samp> in
            post-base position
  - `cjct` should match <samp>"_Consonant_,Halant,_Consonant_"</samp> but _not_
            match <samp>"_Consonant_,Halant,ZWJ,_Consonant_"</samp> or
            <samp>"_Consonant_,Halant,ZWNJ,_Consonant_"</samp>


### Stage 3: Applying the basic substitution features from <abbr>GSUB</abbr> ###

The basic-substitution stage applies mandatory substitution features
using the rules in the font's <abbr title="Glyph Substitution table">GSUB</abbr> table. In preparation for this
stage, glyph sequences should be flagged for possible application 
of <abbr title="Glyph Substitution table">GSUB</abbr> features in stage 2, step 10.

The order in which these substitutions must be performed is fixed for
all Indic scripts:

	locl
	nukt
	akhn
	rphf 
	rkrf (not used in Malayalam)
	pref 
	blwf 
	abvf (not used in Malayalam)
	half
	pstf
	vatu (not used in Malayalam)
	cjct
	cfar (not used in Malayalam)

#### Stage 3, step 1: locl ####

The `locl` feature replaces default glyphs with any language-specific
variants, based on examining the language setting of the text run.

> Note: Strictly speaking, the use of localized-form substitutions is
> not part of the shaping process, but of the localization process,
> and could take place at an earlier point while handling the text
> run. However, shaping engines are expected to complete the
> application of the `locl` feature before applying the subsequent
> <abbr title="Glyph Substitution table">GSUB</abbr> substitutions in the following steps.

#### Stage 3, step 2: nukt ####

The `nukt` feature replaces <samp>"_Consonant_,Nukta"</samp> sequences with a
precomposed nukta-variant of the consonant glyph. 

> Note: The Malayalam Unicode block does not include a Nukta
> codepoint, but Malayalam fonts may implement the `nukt` lookup using
> similar characters from other blocks.

  - The context defined for a `nukt` feature is:

:::{table} `nukt` feature context
    
| Backtrack     | Matching sequence             | Lookahead     |
|:--------------|:------------------------------|:--------------|
| _none_        | `_consonant_`(full),`_nukta_` | _none_        |
:::


:::{figure-md}
![Nukta composition](/images/malayalam/malayalam-nukt.svg "Nukta composition"){.shaping-demo .inline-svg .greyscale-svg #malayalam-nukt}

Nukta composition
:::

```{svg-color-toggle-button} malayalam-nukt
```


#### Stage 3, step 3: akhn ####

The `akhn` feature replaces specific sequences with required
ligatures. Malayalam differs from many other Indic scripts in that
there are typically many ligatures in a font that are implemented as
`akhn` substitutions.

These sequences can occur anywhere in a syllable. Therefore, this
feature must be applied before all other many-to-one substitutions.

  - The context defined for an `akhn` feature is:

:::{table} `akhn` feature context
    
| Backtrack     | Matching sequence           | Lookahead     |
|:--------------|:----------------------------|:--------------|
| _none_        | `AKHAND_CONSONANT_SEQUENCE` | _none_        |
:::


:::{figure-md}
![Akhand KSsa ligation](/images/malayalam/malayalam-akhn-kssa.svg "Akhand KSsa ligation"){.shaping-demo .inline-svg .greyscale-svg #malayalam-akhn-kssa}

Akhand KSsa ligation
:::

```{svg-color-toggle-button} malayalam-akhn-kssa
```


:::{figure-md}
![Akhand NnTta ligation](/images/malayalam/malayalam-akhn-nntta.svg "Akhand NnTta ligation"){.shaping-demo .inline-svg .greyscale-svg #malayalam-akhn-nntta}

Akhand NnTta ligation
:::

```{svg-color-toggle-button} malayalam-akhn-nntta
```


> Note: Modern Malayalam orthography prefers using the <samp>"Chillu R"</samp>
> instead of <samp>"Reph"</samp>. Therefore, Malayalam fonts may implement <samp>"Chillu
> R"</samp> as a substitution for <samp>"Ra,Halant"</samp> in the `akhn` feature. This
> ensures that the substitution takes place before the `rphf` feature
> is applied, so the font may omit the `rphf` feature entirely.

:::{figure-md}
![Akhand Chillu R ligation](/images/malayalam/malayalam-akhn-chillu-r.svg "Akhand Chillu R ligation"){.shaping-demo .inline-svg .greyscale-svg #malayalam-akhn-chillu-r}

Akhand Chillu R ligation
:::

```{svg-color-toggle-button} malayalam-akhn-chillu-r
```


#### Stage 3, step 4: rphf ####

The `rphf` feature replaces initial <samp>"Ra,Halant"</samp> sequences with the
<samp>"Reph"</samp> glyph.

  - An initial <samp>"Ra,Halant,ZWJ"</samp> sequence, however, must not be flagged for
    the `rphf` substitution.
	
> Note: The <samp>"Dot Reph"</samp> substitution shown here is typically found only
> in old-orthography Malayalam writing.

:::{figure-md}
![Dot Reph composition](/images/malayalam/malayalam-dot-reph.svg "Dot Reph composition"){.shaping-demo .inline-svg .greyscale-svg #malayalam-dot-reph}

Dot Reph composition
:::

```{svg-color-toggle-button} malayalam-dot-reph
```


  - The context defined for a `rphf` feature is:

:::{table} `rphf` feature context
    
| Backtrack        | Matching sequence       | Lookahead     |
|:-----------------|:------------------------|:--------------|
| `SYLLABLE_START` | "Ra"(full),`_halant_`   | _none_        |
:::


> Note: Modern Malayalam orthography prefers using the <samp>"Chillu R"</samp>
> instead of <samp>"Reph"</samp>. Therefore, Malayalam fonts may implement <samp>"Chillu
> R"</samp> as a substitution for <samp>"Ra,Halant"</samp> in the `akhn` feature. This
> ensures that the substitution takes place before the `rphf` feature
> is applied, so the font may omit the `rphf` feature entirely.

:::{figure-md}
![Chillu R ligation](/images/malayalam/malayalam-akhn-chillu-r-1.svg "Chillu R ligation"){.shaping-demo .inline-svg .greyscale-svg #malayalam-akhn-chillu-r-1}

Chillu R ligation
:::

```{svg-color-toggle-button} malayalam-akhn-chillu-r-1
```


#### Stage 3, step 5: rkrf ####

> This feature is not used in Malayalam.

#### Stage 3, step 6: pref ####

The `pref` feature replaces pre-base-reordering consonant glyphs with
any special forms. Malayalam includes one such reordering consonant,
<samp>"Ra"</samp> when it occurs in post-base position.

The substitution of the nominal glyph for its special form takes place
at this stage. However, the actual reordering move is performed later,
in stage 4, step 4.

:::{figure-md}
![Pre-base Ra formation](/images/malayalam/malayalam-pstf-ra.svg "Pre-base Ra formation"){.shaping-demo .inline-svg .greyscale-svg #malayalam-pstf-ra}

Pre-base Ra formation
:::

```{svg-color-toggle-button} malayalam-pstf-ra
```


#### Stage 3, step 7: blwf ####

The `blwf` feature replaces below-base-consonant glyphs with any
special forms. Malayalam includes one consonant that can take on a
below-base form:, <samp>"Halant,La"</samp>.

Because Malayalam incorporates the `BLWF_MODE_PRE_AND_POST` shaping
characteristic, any pre-base consonants and any post-base consonants
may potentially match a `blwf` substitution; therefore, both cases must
be flagged for comparison. Note that this is not necessarily the case in other
Indic scripts that use a different `BLWF_MODE_` shaping
characteristic. 

  - The context defined for a `blwf` feature is:

:::{table} `blwf` feature context
    
| Backtrack     | Matching sequence        | Lookahead     |
|:--------------|:-------------------------|:--------------|
| `_consonant_` | `_halant_`,"La"          | _none_        |
:::


:::{figure-md}
![Below-base La formation](/images/malayalam/malayalam-blwf-1.svg "Below-base La formation"){.shaping-demo .inline-svg .greyscale-svg #malayalam-blwf-1}

Below-base La formation
:::

```{svg-color-toggle-button} malayalam-blwf-1
```


#### Stage 3, step 8: abvf ####

> This feature is not used in Malayalam.

#### Stage 3, step 9: half ####

The `half` feature replaces <samp>"_Consonant_,Halant"</samp> sequences before the
base consonant or syllable base with "half forms" of the consonant
glyphs.

In the most common case, this substitution applies to
<samp>"_Consonant_,Halant"</samp> sequences that are followed by another
_Consonant_.

In addition, a sequence matching <samp>"_Consonant_,Halant,ZWJ"</samp> must also be
flagged for potential `half` substitutions.

> Note: The presence of the <samp>"ZWJ"</samp> at the end of the sequence means
> that the sequence may match the regular-expression test in stage 1
> as the end of a syllable, even without being followed by a base
> consonant or syllable base.
>
> The fact that the regular-expression tests identify a syllable break
> after the <samp>"_Consonant_,Halant,ZWJ"</samp> is a byproduct of OpenType
> shaping and Unicode encoding, however, and might not have any
> significance with regard to the definition of syllables used in the
> language or orthography of the text.

There are two exceptions to the default behavior, for which the
shaping engine must test:

  - Initial <samp>"Ra,Halant"</samp> sequences, which should have been flagged for
    the `rphf` feature earlier, must not be flagged for potential
    `half` substitutions.

  - A sequence matching <samp>"_Consonant_,Halant,ZWNJ,_Consonant_"</samp> must not be
    flagged for potential `half` substitutions.

> Note: Malayalam does not usually incorporate half forms, but it is
> possible for a font to implement them in order to provide for
> desired typographic variation.
>
> Note: Some `<mlm2>` fonts may use the `half` feature to implement
> Chillu substitutions, as in the example below


:::{figure-md}
![Half-form formation](/images/malayalam/malayalam-half.svg "Half-form formation"){.shaping-demo .inline-svg .greyscale-svg #malayalam-half}

Half-form formation
:::

```{svg-color-toggle-button} malayalam-half
```


#### Stage 3, step 10: pstf ####

The `pstf` feature replaces post-base-consonant glyphs with any
special forms. Malayalam includes two consonants that can take on
post-base form: <samp>"Ya"</samp> and <samp>"Va"</samp>.

  - The context defined for a `pstf` feature is:

:::{table} `pstf` feature context
    
| Backtrack       | Matching sequence        | Lookahead     |
|:----------------|:-------------------------|:--------------|
| `SYLLABLE_BASE` | `_halant_`,`_consonant_` | _none_        |
:::


:::{figure-md}
![Post-base Ya formation](/images/malayalam/malayalam-pstf-ya-1.svg "Post-base Ya formation"){.shaping-demo .inline-svg .greyscale-svg #malayalam-pstf-ya-1}

Post-base Ya formation
:::

```{svg-color-toggle-button} malayalam-pstf-ya-1
```

:::{figure-md}
![Post-base Va formation](/images/malayalam/malayalam-pstf-va-1.svg "Post-base Va formation"){.shaping-demo .inline-svg .greyscale-svg #malayalam-pstf-va-1}

Post-base Va formation
:::

```{svg-color-toggle-button} malayalam-pstf-va-1
```


#### Stage 3, step 11: vatu ####

> This feature is not used in Malayalam.

#### Stage 3, step 12: cjct ####

The `cjct` feature replaces sequences of adjacent consonants with
conjunct ligatures. These sequences must match <samp>"_Consonant_,Halant,_Consonant_"</samp>.

A sequence matching <samp>"_Consonant_,Halant,ZWJ,_Consonant_"</samp> or
<samp>"_Consonant_,Halant,ZWNJ,_Consonant_"</samp> must not be flagged to form a conjunct.

> Note: The presence of the <samp>"ZWJ"</samp> in a
> <samp>"_Consonant_,Halant,ZWJ,_Consonant_"</samp> sequence should automatically
> inhibit any `cjct` feature rules from matching the sequence as valid
> input, and thus prevent the `cjct` substitution from being applied.

> Note: The presence of the <samp>"ZWNJ"</samp> in a
> <samp>"_Consonant_,Halant,ZWNJ,_Consonant_"</samp> sequence means that the
> <samp>"_Consonant_,Halant,ZWNJ"</samp> subsequence will match the
> regular-expression test in stage 1 as the end of a syllable.
> 
> Because OpenType shaping features in `<mlm2>` are defined as
> applying only within an individual syllable, this means that the
> presence of the <samp>"ZWNJ"</samp> will automatically prevent the application of
> a `cjct` feature by triggering the identification of a syllable
> break between the two consonants.
>
> The fact that the regular-expression tests identify a syllable break
> after the <samp>"_Consonant_,Halant,ZWNJ"</samp> is a byproduct of OpenType
> shaping and Unicode encoding, however, and might not have any
> significance with regard to the definition of syllables used in the
> language or orthography of the text.
>
> Note, also: The presence of the <samp>"ZWJ"</samp> means that a
> <samp>"_Consonant_,Halant,ZWJ"</samp> sequence may match the regular-expression
> test in stage 1 as the end of a syllable, even without being
> followed by a base consonant or syllable base. By definition,
> however, a <samp>"_Consonant_,Halant,ZWJ"</samp> syllable identified in stage 1
> cannot also include a <samp>"_Consonant_"</samp> after the <abbr title="Zero-Width Joiner">ZWJ</abbr>.

The font's <abbr title="Glyph Substitution table">GSUB</abbr> rules might be implemented so that `cjct`
substitutions apply to half-form consonants; therefore, this feature
must be applied after the `half` feature. 

> Note: Malayalam does not usually incorporate conjunct forms, but it is
> possible for a font to implement them in order to provide for
> desired typographic variation.

:::{figure-md}
![Conjunct ligation](/images/malayalam/malayalam-cjct.svg "Conjunct ligation"){.shaping-demo .inline-svg .greyscale-svg #malayalam-cjct}

Conjunct ligation
:::

```{svg-color-toggle-button} malayalam-cjct
```


#### Stage 3, step 13: cfar ####

> This feature is not used in Malayalam.


### Stage 4: Final reordering ###

The final reordering stage repositions marks, dependent-vowel (matra)
signs, and <samp>"Reph"</samp> glyphs to the appropriate location with respect to
the base consonant or syllable base. Because multiple substitutions
may have occurred during the application of the basic-shaping features
in the preceding stage, these repositioning moves could not be
performed during the initial reordering stage.

Like the initial reordering stage, the steps involved in this stage
occur on a per-syllable basis.

<!--- Check that classifications have not been mangled. If the -->
<!--character is a Halant AND a ligature was formed AND a multiple
substitution was performed, restore the classification to VIRAMA
because it was almost certainly lost in the preceding <abbr title="Glyph Substitution table">GSUB</abbr> stage.
--->

#### Stage 4, step 1: Base consonant ####

The final reordering stage, like the initial reordering stage, begins
with determining the syllable base of each syllable, following the
same algorithm used in stage 2, step 1.

In a syllable that begins with an independent vowel, the independent
vowel will always serve as the syllable base. In a standalone sequence or
other syllable that begins with a placeholder or a dotted circle, the
placeholder or dotted circle will always serve as the syllable base.

In a syllable that begins with a consonant, the shaping engine must
repeat the base-consonant search algorithm used in stage 2, step 1.

The codepoint of the underlying base consonant or syllable base will
not change between the search performed in stage 2, step 1, and the
search repeated here. However, the application of <abbr title="Glyph Substitution table">GSUB</abbr> shaping
features in stage 3 means that several ligation and many-to-one
substitutions may have taken place. The final glyph produced by that
process may, therefore, be a conjunct or ligature form — in most
cases, such a glyph will not have an assigned Unicode codepoint.
   
#### Stage 4, step 2: Pre-base matras ####

Pre-base dependent vowels (matras) that were reordered during the
initial reordering stage must be moved to their final position. This
position is defined as:

   - after all <samp>"Chillu"</samp> glyphs
   - after the last standalone <samp>"Halant"</samp> glyph that comes after the
     matra's starting position and also comes before the main
     consonant.
   - If a zero-width joiner follows this last standalone <samp>"Halant"</samp>, the
     final matra position is moved to after the joiner.

This means that the matra will move to the right of all explicit
<samp>"_Consonant_,Halant"</samp> subsequences and all glyphs that resulted from a
substitution on a <samp>"_Consonant_,Halant,ZWJ"</samp> subsequence, but will stop
to the left of the base consonant or syllable base, and all conjuncts
or ligatures that contain the base consonant or syllable base.

:::{figure-md}
![Matra positioning](/images/malayalam/malayalam-matra-position.svg "Matra positioning"){.shaping-demo .inline-svg .greyscale-svg #malayalam-matra-position}

Matra positioning
:::

```{svg-color-toggle-button} malayalam-matra-position
```


> Note: OpenType and Unicode both state that if the syllable includes
> a <abbr title="Zero-Width Joiner">ZWJ</abbr> immediately after the last <samp>"Halant"</samp>, then the final matra
> position should be after the <abbr title="Zero-Width Joiner">ZWJ</abbr>.
>
> However, there are several test sequences indicating that
> Microsoft's Uniscribe shaping engine did not follow this rule (in,
> at least, Devanagari and Bengali text), and in these circumstances
> Uniscribe instead makes the final matra position before the final
> <samp>"Consonant,Halant,ZWJ"</samp>.
>
> Subsequently, the HarfBuzz shaping engine has also followed the same
> pattern. If other shaping engine implementations prefer to maintain
> maximum compatibility with Uniscribe and HarfBuzz, then they should
> also follow suit.

> Note: The Microsoft script-development specifications for OpenType
> shaping also state that if a zero-width non-joiner follows the last
> standalone <samp>"Halant"</samp>, the final matra position is moved to after the
> non-joiner. However, it is unnecessary to test for this condition,
> because a <samp>"Halant,ZWNJ"</samp> subsequence is, by definition, the end of a
> syllable. Consequently, a <samp>"Halant,ZWNJ"</samp> cannot be followed by a
> pre-base dependent vowel.


#### Stage 4, step 3: Reph ####

<samp>"Reph"</samp> or <samp>"Repha"</samp> must be moved from the beginning of the syllable to its final
position. Because Malayalam incorporates the `REPH_POS_AFTER_MAIN`
shaping characteristic, this final position is defined as immediately
after the syllable base.

The algorithm for finding the final <samp>"Reph"</samp> position is

  - Move the <samp>"Reph"</samp> to the position immediately before
    the first post-base matra, syllable modifier, or Vedic sign that
    has a positioning tag after the script's <samp>"Reph"</samp> position in the
    syllable sort order (as listed in [stage
    2](#stage-2-initial-reordering)). This will be the final <samp>"Reph"</samp>
    position. 
	> Note: Because Malayalam incorporates the
    > `REPH_POS_AFTER_MAIN` shaping characteristic, this means
    > any positioning tag of `POS_ABOVEBASE_CONSONANT` or later,
    > although a post-base matra, syllable modifier, or Vedic sign
    > would not typically be tagged with `POS_ABOVEBASE_CONSONANT`.
  - If no other location has been located in the previous step, move
    the <samp>"Reph"</samp> to the end of the syllable.

Finally, if the final position of <samp>"Reph"</samp> or <samp>"Repha"</samp> occurs after a
<samp>"_matra_,Halant"</samp> subsequence, then <samp>"Reph"</samp>/<samp>"Repha"</samp> must be repositioned to the
left of <samp>"Halant"</samp>, to allow for potential matching with `abvs` or
`psts` substitutions from <abbr title="Glyph Substitution table">GSUB</abbr>.

:::{figure-md}
![Repha positioning](/images/malayalam/malayalam-repha-position.svg "Repha positioning"){.shaping-demo .inline-svg .greyscale-svg #malayalam-repha-position}

Repha positioning
:::

```{svg-color-toggle-button} malayalam-repha-position
```


#### Stage 4, step 4: Pre-base-reordering consonants ####

Any pre-base-reordering consonants must be moved to before
the base consonant or syllable base.

Malayalam includes one such reordering consonant. <samp>"Ra"</samp> occurring in the
post-base position is reordered to a pre-base position at this step.

The algorithm for reordering <samp>"Ra"</samp> in this circumstance is:

  - Only reorder the <samp>"Ra"</samp> if the current glyph was substituted using
    the `pref` feature in stage 3, step 6.
  - Select the final position using [the same method](#stage-4-step-2-pre-base-matras) as used for
    reordering a pre-base matra.
  - If the pre-base matra positioning algorithm cannot determine the final
    position, place the <samp>"Ra"</samp> immediately before the base consonant or syllable base.

:::{figure-md}
![Pre-base-reordering consonant positioning](/images/malayalam/malayalam-pref-position.svg "Pre-base-reordering consonant positioning"){.shaping-demo .inline-svg .greyscale-svg #malayalam-pref-position}

Pre-base-reordering consonant positioning
:::

```{svg-color-toggle-button} malayalam-pref-position
```


#### Stage 4, step 5: Initial matras ####

Any left-side dependent vowels (matras) that are at the start of a
word must be flagged for potential substitution by the `init` feature
of <abbr title="Glyph Substitution table">GSUB</abbr>.

Malayalam does not use the `init` feature, so this step will
involve no work when processing `<mlm2>` text. It is included here in
order to maintain compatibility with the other Indic scripts.


### Stage 5: Applying all remaining substitution features from <abbr>GSUB</abbr> ###

In this stage, the remaining substitution features from the <abbr title="Glyph Substitution table">GSUB</abbr> table
are applied. In preparation for this stage, glyph sequences should be
flagged for possible application of <abbr title="Glyph Substitution table">GSUB</abbr> features in stage 2,
step 10.

The order in which these features are applied is not canonical; they
should be applied in the order in which they appear in the <abbr title="Glyph Substitution table">GSUB</abbr> table
in the font.

	init (not used in Malayalam)
	pres
	abvs
	blws
	psts
	haln

The `init` feature is not used in Malayalam.

The `pres` feature replaces pre-base-consonant glyphs with special
presentations forms. This can include consonant conjuncts, half-form
consonants, and stylistic variants of left-side dependent vowels
(matras). 

The `abvs` feature replaces above-base-consonant glyphs with special
presentation forms. This usually includes contextual variants of
above-base marks or contextually appropriate mark-and-base ligatures.

The `blws` feature replaces below-base-consonant glyphs with special
presentation forms. This usually includes replacing base consonants or
syllable bases that
are adjacent to the below-base-consonant form of <samp>"La"</samp> with contextual ligatures.

The `psts` feature replaces post-base-consonant glyphs with special
presentation forms. This usually includes replacing right-side
dependent vowels (matras) with stylistic variants or replacing
post-base-consonant/matra pairs with contextual ligatures. 

:::{figure-md}
![Post-base form substitution](/images/malayalam/malayalam-psts.svg "Post-base form substitution"){.shaping-demo .inline-svg .greyscale-svg #malayalam-psts}

Post-base form substitution
:::

```{svg-color-toggle-button} malayalam-psts
```


The `haln` feature replaces syllable-final <samp>"_Consonant_,Halant"</samp> pairs with
special presentation forms. This can include stylistic variants of the
consonant where placing the <samp>"Halant"</samp> mark on its own is
typographically problematic. 

> Note: Some `<mlm2>` fonts may use the `haln` feature to implement
> Chillu substitutions, as in the example below

:::{figure-md}
![Halant-form formation](/images/malayalam/malayalam-haln.svg "Halant-form formation"){.shaping-demo .inline-svg .greyscale-svg #malayalam-haln}

Halant-form formation
:::

```{svg-color-toggle-button} malayalam-haln
```


> Note: The `calt` feature, which allows for generalized application
> of contextual alternate substitutions, is usually applied at this
> point. However, `calt` is not mandatory for correct Malayalam shaping
> and may be disabled in the application by user preference.

### Stage 6: Applying remaining positioning features from <abbr>GPOS</abbr> ###

In this stage, mark positioning, kerning, and other <abbr title="Glyph Positioning table">GPOS</abbr> features are
applied.

As with the preceding stage, the order in which these features are
applied is not canonical; they should be applied in the order in which
they appear in the <abbr title="Glyph Positioning table">GPOS</abbr> table in the font.

        dist
        abvm
        blwm

> Note: The `kern` feature is usually applied at this stage, if it is
> present in the font. However, `kern` (like `calt`, above) is not
> mandatory for shaping Malayalam text and may be disabled by user preference.

The `dist` feature adjusts the horizontal positioning of
glyphs. Unlike `kern`, adjustments made with `dist` do not require the
application or the user to enable any software _kerning_ features, if
such features are optional. 

The `abvm` feature positions above-base marks for attachment to base
characters. In Malayalam, this includes <samp>"Dot Reph"</samp> in addition to the
diacritical marks and Vedic signs. 

:::{figure-md}
![Above-base mark positioning](/images/malayalam/malayalam-abvm.svg "Above-base mark positioning"){.shaping-demo .inline-svg .greyscale-svg #malayalam-abvm}

Above-base mark positioning
:::

```{svg-color-toggle-button} malayalam-abvm
```


The `blwm` feature positions below-base marks for attachment to base
characters. In Malayalam, this includes below-base marks as well as
the below-base consonant form of <samp>"La"</samp>.

:::{figure-md}
![Below-base mark positioning](/images/malayalam/malayalam-blwm.svg "Below-base mark positioning"){.shaping-demo .inline-svg .greyscale-svg #malayalam-blwm}

Below-base mark positioning
:::

```{svg-color-toggle-button} malayalam-blwm
```


## The `<mlym>` shaping model ##

The older Malayalam script tag, `<mlym>`, has been deprecated. However,
shaping engines may still encounter fonts that were built to work with
`<mlym>` and some users may still have documents that were written to
take advantage of `<mlym>` shaping.

### Distinctions from `<mlm2>` ###

The most significant distinction between the shaping models is that the
sequence of <samp>"Halant"</samp> and consonant glyphs used to trigger shaping
features was altered when migrating from `<mlym>` to
`<mlm2>`. 

Specifically, shaping engines were expected to reorder post-base
<samp>"Halant,_Consonant_"</samp> sequences to <samp>"_Consonant_,Halant"</samp>.

As a result, a font's <abbr title="Glyph Substitution table">GSUB</abbr> substitutions would be written to match
<samp>"_Consonant_,Halant"</samp> sequences in all pre-base and post-base positions.


The `<mlym>` syllable

	Pre-baseC Halant BaseC Halant Post-baseC

would be reordered to

	Pre-baseC Halant BaseC Post-baseC Halant

before features are applied.

In `<mlm2>` text, as described above in this document, there is no
such reordering. The correct sequence to match for <abbr title="Glyph Substitution table">GSUB</abbr> substitutions is
<samp>"_Consonant_,Halant"</samp> for pre-base consonants, but <samp>"Halant,_Consonant_"</samp>
for post-base consonants.

The old Indic shaping model also did not recognize the
`BLWF_MODE_PRE_AND_POST` shaping characteristic. Instead, `<mlym>`
was treated as if it followed the `BLWF_MODE_POST_ONLY`
characteristic. In other words, below-base form substitutions were
only applied to consonants after the base consonant or syllable base.

In addition, for some scripts, left-side dependent vowel marks
(matras) were not repositioned during the final reordering
stage. For `<mlym>` text, the left-side matra was always positioned
immediately before the base consonant or syllable base.


### Advice for handling fonts with `<mlym>` features only ###

Shaping engines may choose to match post-base <samp>"_Consonant_,Halant"</samp>
sequences in order to apply <abbr title="Glyph Substitution table">GSUB</abbr> substitutions when it is known that
the font in use supports only the `<mlym>` shaping model.


### Advice for handling text runs composed in `<mlym>` format ###

Shaping engines may choose to match post-base <samp>"_Consonant_,Halant"</samp>
sequences for <abbr title="Glyph Substitution table">GSUB</abbr> substitutions or to reorder them to
<samp>"Halant,_Consonant_"</samp> when processing text runs that are tagged with
the `<mlym>` script tag and it is known that the font in use supports
only the `<mlm2>` shaping model.

Shaping engines may also choose to apply `blwf` substitutions to
below-base consonants occurring before the base consonant or syllable base when it is
known that the font in use supports an applicable substitution lookup.

Shaping engines may also choose to position left-side matras according
to the `<mlym>` ordering scheme; however, doing so might interfere
with matching <abbr title="Glyph Substitution table">GSUB</abbr> or <abbr title="Glyph Positioning table">GPOS</abbr> features.


================================================
FILE: opentype-shaping-mongolian.md
================================================
```{include} /_global.md
```

# Mongolian script shaping in OpenType #

This document details the general shaping procedure shared by all
Mongolian script styles, and defines the common pieces that style-specific
implementations share. 


**Contents**

  - [General information](#general-information)
  - [Terminology](#terminology)
  - [Glyph classification](#glyph-classification)
      - [Joining properties](#joining-properties)
	  - [Mark classification](#mark-classification)
	  - [Character tables](#character-tables)
  - [The `<mong>` shaping model](#the-mong-shaping-model)
      - [Stage 1: Transient reordering of modifier combining marks](#stage-1-transient-reordering-of-modifier-combining-marks)
      - [Stage 2: Compound character composition and decomposition](#stage-2-compound-character-composition-and-decomposition)
      - [Stage 3: Computing letter joining states](#stage-3-computing-letter-joining-states)
      - [Stage 4: Applying the `stch` feature](#stage-4-applying-the-stch-feature)
      - [Stage 5: Applying the language-form substitution features from <abbr>GSUB</abbr>](#stage-5-applying-the-language-form-substitution-features-from-gsub)
      - [Stage 6: Applying the typographic-form substitution features from <abbr>GSUB</abbr>](#stage-6-applying-the-typographic-form-substitution-features-from-gsub)
      - [Stage 7: Applying the positioning features from <abbr>GPOS</abbr>](#stage-7-applying-the-positioning-features-from-gpos)
  

## General information ##

The Mongolian script is used to write multiple languages, most commonly
Mongolian, Sibe (or Xibe), and Manchu.  In addition, extensions to the
character set may be used to write Tibetan and Sanskrit. 

The classical Mongolian alphabet includes several letters that differ
phonetically but are identical in their visual appearance, such as "O"
(`U+1823`, &#x1823;) and "U" (`U+1824`, &#x1824;). A variant of the
classical alphabet, called Todo (or "clear") Mongolian, was developed
in the 17th Century to remove such ambiguous forms. The Todo
characters are also included in the Mongolian Unicode block.

Due to the common shaping features that the Mongolian script shares
with Arabic, a shaping engine can support Mongolian with the same
shaping model [used for Arabic and related writing systems](opentype-shaping-arabic-general.md).

However, several other, unrelated scripts are also used to write
Mongolian, including 'Phags-Pa, Soyombo, Zanabazar Square, Cyrillic, and
Latin. Each of these scripts has its own OpenType shaping rules and its
Unicode block, and does not use the general Arabic shaping model.

Mongolian is a joining script that uses inter-word spaces, so each
codepoint in a text run may be substituted with one of several
contextual forms corresponding to what, if any, characters appear
before and after the codepoint. Most, but not all, letter sequences
join; shaping engines must track which positions trigger joining
behavior for each letter. 

Mongolian is normally written (and, therefore, rendered) vertically,
from top to bottom. Isolated words or short phrases in Mongolian that
are included in text blocks of horizontal scripts are generally
rotated 90 degrees counterclockwise, so that the letters run
left-to-right. On systems that do not support vertical text setting,
this left-to-right rendering is a common fallback strategy for full
runs of Mongolian text.


## Terminology ##

OpenType shaping uses a standard set of terms for elements of the
Mongolian script. The terms used colloquially in any particular language
may vary, however, potentially causing confusion.

**Base** glyph or character is the standard term for a Mongolian
character that is capable of taking a diacritical mark. 

All consonants and vowels are base characters in
Mongolian. Diacritical marks are not used in the Mongolian, Sibe, or
Manchu languages, but may be encountered in Tibetan or Sanskrit.

A number of consonants in Mongolian take on different forms depending
on the vowels used elsewhere in the word. In addition, some letters
take on different forms when depending on whether they occur in the
first syllable of a word or whether they are used in a native
Mongolian word versus a foreign word. Mongolian fonts implement
substitutions capturing most of these form rules using <abbr title="Glyph Substitution table">GSUB</abbr>. However,
there are occasions where the correct form may not be determined from
context alone.

To indicate the correct form, the text run can include a **free
variation selector** immediately after the letter in
question. There are four free variation selectors in the Mongolian
block ("FVS1", "FVS2", "FVS3", and "FVS4"), although some letters have
alternate forms defined only for a subset of the free variation
selectors.

In addition, letters vary as to whether alternate forms exist for the
isolated, initial, medial, or final position, or for several
positions. The forms that each selector triggers for each letter is
defined in the Unicode Mongolian block. 

For example, the letter "Manchu I" (`U+1873`) has three alternate
forms defined for the medial position:

:::{figure-md}
![Non FVS form substitution](/images/mongolian/mongolian-fvs-none.svg "Non FVS form substitution"){.shaping-demo .inline-svg .greyscale-svg #mongolian-fvs-none}

Non-FVS substitution
:::

```{svg-color-toggle-button} mongolian-fvs-none
```

:::{figure-md}
![FVS1 form substitution](/images/mongolian/mongolian-fvs-fvs1.svg "FVS1 form substitution"){.shaping-demo .inline-svg .greyscale-svg #mongolian-fvs-fvs1}

FVS1 form substitution
:::

```{svg-color-toggle-button} mongolian-fvs-fvs1
```

:::{figure-md}
![FVS2 form substitution](/images/mongolian/mongolian-fvs-fvs2.svg "FVS2 form substitution"){.shaping-demo .inline-svg .greyscale-svg #mongolian-fvs-fvs2}

FVS2 form substitution
:::

```{svg-color-toggle-button} mongolian-fvs-fvs2
```

:::{figure-md}
![FVS3 form substitution](/images/mongolian/mongolian-fvs-fvs3.svg "FVS3 form substitution"){.shaping-demo .inline-svg .greyscale-svg #mongolian-fvs-fvs3}

FVS3 form substitution
:::

```{svg-color-toggle-button} mongolian-fvs-fvs3
```


Free variation selectors have no visual appearance and no advance
width; they are used only to trigger the proper substitution in the
active font's <abbr title="Glyph Substitution table">GSUB</abbr> tables. 


In some Mongolian words, a word-final "A" or "E" is written
disconnected from the preceding letter. In such situations, the
**Mongolian vowel separator** formatting character can be included
between the two letters to trigger such a space.

Sibe text may use the **Sibe syllable boundary marker** (`U+1807`) to
denote syllable boundaries in foreign loanwords.

**Kashida** (or **tatweel**) is the Arabic term for a glyph inserted
into a sequence for the purpose of elongating the baseline stroke of
an Arabic letter. Mongolian features a similar character, called
**nirugu**, in the Mongolian Unicode block.


## Glyph classification ##

Because Mongolian is a joining (or cursive) script, proper shaping of
text runs involves identifying the joining behavior of each character,
then combining that information with any preceding or subsequent
characters to determine the contextually correct form for display.

### Joining properties ###

Mongolian characters are assigned a `JOINING_TYPE` property in the
Unicode standard that indicates how they join to adjacent
characters. There are six possible values: 

  - `JOINING_TYPE_LEFT` indicates that a character joins with
    the subsequent character, but does not join with the preceding
    character. 
	
  - `JOINING_TYPE_RIGHT` indicates that a character joins with the
    preceding character, but does not join with the subsequent character.	

  - `JOINING_TYPE_DUAL` indicates that a character joins with the
    preceding character and joins with the subsequent character.
	
  - `JOINING_TYPE_NON_JOINING` indicates that a character does not
    join with the preceding or with the subsequent character.
	
  - `JOINING_TYPE_TRANSPARENT` indicates that the character does not
    join with adjacent characters _and_ that the character must be
    skipped over when the shaping engine is evaluating the joining
    positions in a sequence of characters. When a
    `JOINING_TYPE_TRANSPARENT` character is encountered in a sequence,
    the `JOINING_TYPE` of the preceding character passes
    through. Diacritical marks are frequently assigned this value. 
	
  - `JOINING_TYPE_JOIN_CAUSING` indicates that the character forces
    the use of joining forms with the preceding and subsequent
    characters. Kashidas and the Zero Width Joiner (`U+200D`) are both
    `JOIN_CAUSING` characters.
  
  
> Note: Almost all characters in Mongolian are of joining type
> `DUAL`. The exceptions are `TRANSPARENT`, `NON_JOINING`, and
> `JOIN_CAUSING`. Thus, the ambiguity that might be encountered due to
> the usage of `LEFT` and `RIGHT` in the names of the other joining
> types (which are so named in reference to the relative positions as
> used in Arabic and related scripts) is avoided.

In other scripts using the Arabic shaping model, letters are also
assigned to a `JOINING_GROUP` that indicates which fundamental
character they behave like with regard to joining behavior. Mongolian,
however, does not use joining groups; all characters are assigned to
the _null_ joining group.


### Mark classification ###

The Unicode standard defines a _canonical combining class_ for each
codepoint that is used whenever a sequence needs to be sorted into
canonical order. 

Only one Mongolian mark belongs to a standard combining
class:

:::{table} Mark-classification table

| Codepoint | Combining class | Glyph                              |
|:----------|:----------------|:-----------------------------------|
|`U+18A9`   | 228             | &#x18A9; Ali Gali Dagalga          |
:::


All other codepoints in the Mongolian block belong to class _0_.

The numeric values of these combining classes are used during Unicode
normalization.
	
			
### Character tables ###

Separate character tables are provided for the Mongolian and Mongolian
Supplement blocks, as well as for other miscellaneous
characters that are used in `<mong>` text runs:

  - [Mongolian character table](character-tables/character-tables-mongolian.md#mongolian-character-table)
  - [Mongolian Supplement character table](character-tables/character-tables-mongolian.md#mongolian-supplement-character-table)
  - [Miscellaneous character table](character-tables/character-tables-mongolian.md#miscellaneous-character-table)


The tables list each codepoint along with its Unicode general
category and its joining type. For letters, the table lists the
codepoint's joining group. For diacritical marks, the table lists the
codepoint's mark combining class. The codepoint's Unicode name and an example
glyph are also provided.

For example:

:::{table} Example character table

| Codepoint | Unicode category | Joining type | Joining group | Mark class | Glyph                        |
|:----------|:-----------------|:-------------|:--------------|:-----------|:-----------------------------|
|`U+1828`   | Letter           | DUAL         | _null_        | _0_        | &#x1828; Na                  |
| | | | | |
|`U+1885`   | Mark [Mn]        | TRANSPARENT  | _null_        | _0_        | &#x1885; Ali Gali Baluda     |
:::


Codepoints with no assigned meaning are
designated as _unassigned_ in the _Unicode category_ column. 


#### Special-function codepoints ####

Other important characters that may be encountered when shaping runs
of Mongolian text include the dotted-circle placeholder (`U+25CC`),
the zero-width joiner (`U+200D`) and zero-width non-joiner (`U+200C`),
the no-break space (`U+00A0`) and the narrow no-break space(`U+202F`).

Each of these is of particular importance to shaping engines, because
these codepoints interact with the shaping engine, the text run, and
the active font, either to mediate non-default shaping behavior or to
relay information about the current shaping process.

The dotted-circle placeholder is frequently used when displaying a
combining mark in isolation. Real-world text documents may
also use other characters, such as hyphens or dashes, in a similar
placeholder fashion; shaping engines should cope with this situation
gracefully.

Dotted-circle placeholder characters (like any Unicode codepoint) can
appear anywhere in text input sequences and should be rendered
normally. <abbr title="Glyph Positioning table">GPOS</abbr> positioning lookups should attach mark glyphs to dotted
circles as they would to other non-mark characters. As visible glyphs,
dotted circles can also be involved in <abbr title="Glyph Substitution table">GSUB</abbr> substitutions.

In addition to the default input-text handling process, shaping
engines may also insert dotted-circle placeholders into the text
sequence. Dotted-circle insertions are required when a non-spacing
mark or dependent sign is formed with no base character present.

This requirement covers:

  - Dependent signs that are assigned their own individual Unicode
    codepoints (such as most dependent-vowel marks or matras)
  
  - Dependent signs that are formed only by specific sequences of
    other codepoints (which is not common in Mongolian but can occur in
    other scripts)


The zero-width joiner (<abbr>ZWJ</abbr>) is primarily used to force the usage of the
cursive connecting form of a letter even when the context of the
adjoining letters would not trigger the connecting form. 

For example, to show the initial form of a letter in isolation (such
as for displaying it in a table of forms), the sequence <samp>"_Letter_,ZWJ"</samp>
would be used. To show the medial form of a letter in isolation, the
sequence <samp>"ZWJ,_Letter_,ZWJ"</samp> would be used.

The zero-width non-joiner (<abbr>ZWNJ</abbr>) is primarily used to prevent a
cursive connection between two adjacent characters that would, under
normal circumstances, form a join. 

The <abbr title="Zero-Width Joiner">ZWJ</abbr> and <abbr title="Zero-Width Non Joiner">ZWNJ</abbr> characters are, by definition, non-printing control
characters and have the _Default_Ignorable_ property in the Unicode
Character Database. In standard text-display scenarios, their function
is to signal a request from the user to the shaping engine for some
particular non-default behavior. As such, they are not rendered
visually.

> Note: Naturally, there are special circumstances where a user or
> document might need to request that a <abbr title="Zero-Width Joiner">ZWJ</abbr> or <abbr title="Zero-Width Non Joiner">ZWNJ</abbr> be rendered
> visually, such as when illustrating the OpenType shaping process, or
> displaying Unicode tables.

Because the <abbr title="Zero-Width Joiner">ZWJ</abbr> and <abbr title="Zero-Width Non Joiner">ZWNJ</abbr> are non-printing control characters, they can
be ignored by any portion of a software text-handling stack not
involved in the shaping operations that the <abbr title="Zero-Width Joiner">ZWJ</abbr> and <abbr title="Zero-Width Non Joiner">ZWNJ</abbr> are designed
to interface with. For example, spell-checking or collation functions
will typically ignore <abbr title="Zero-Width Joiner">ZWJ</abbr> and <abbr title="Zero-Width Non Joiner">ZWNJ</abbr>.

Similarly, the <abbr title="Zero-Width Joiner">ZWJ</abbr> and <abbr title="Zero-Width Non Joiner">ZWNJ</abbr> should be ignored by the shaping engine
when matching sequences of codepoints against the backtrack and
lookahead sequences of a font's <abbr title="Glyph Substitution table">GSUB</abbr> or <abbr title="Glyph Positioning table">GPOS</abbr> lookups.

The no-break space is primarily used to display those codepoints that
are defined as non-spacing (such as diacritical marks) in an
isolated context, as an alternative to displaying them superimposed on
the dotted-circle placeholder.

The narrow no-break space serves a different function in Mongolian. It
is used to visually separate the main body of a word from the word's
suffix. Not all Mongolian words incorporate a narrow no-break space.


## The `<mong>` shaping model ##

Processing a run of `<mong>` text involves seven top-level stages:

1. Transient reordering of modifier combining marks
2. Compound character composition and decomposition
3. Computing letter joining states
4. Applying the `stch` feature
5. Applying the language-form substitution features from <abbr>GSUB</abbr>
6. Applying the typographic-form substitution features from <abbr>GSUB</abbr>
7. Applying the positioning features from <abbr>GPOS</abbr>


### Stage 1: Transient reordering of modifier combining marks ###

<!--- http://www.unicode.org/reports/tr53/tr53-1.pdf --->
> Note: because Mongolian does not feature the "Shadda" mark or any
> marks that belong to _Modifier Combining Marks_ (<abbr>MCM</abbr>) classes, this
> stage should not involve any additional work when processing
> `<mong>` text runs. It is included here to maintain consistency with
> other scripts that utilize the general Arabic-based shaping model.

Sequences of adjacent marks must be reordered so that they appear in
the appropriate visual order before the mark-to-base and mark-to-mark
positioning features from <abbr title="Glyph Positioning table">GPOS</abbr> can be correctly applied.

In particular, those marks that have strong affinity to the base
character must be placed closest to the base.

This mark-reordering operation is distinct from the standard,
cross-script mark-reordering performed during Unicode
normalization. The standard Unicode mark-reordering algorithm is based
on comparing the _Canonical_Combining_Class_ (<abbr>Ccc</abbr>) properties of mark
codepoints, whereas this script-specific reordering utilizes the
_Modifier_Combining_Mark_ (<abbr>MCM</abbr>) subclasses specified in the
character tables.

The algorithm for reordering a sequence of marks is:

  - First, move any <samp>"Shadda"</samp> (combining class `33`) characters to the
    beginning of the mark sequence.
	
  -	Second, move any subsequence of combining-class-`230` characters that begins
       with a `230_MCM` character to the beginning of the sequence,
       before all <samp>"Shadda"</samp> characters. The subsequence must be moved
       as a group.

  - Finally, move any subsequence of combining-class-`220` characters that begins
       with a `220_MCM` character to the beginning of the sequence,
       before all <samp>"Shadda"</samp> characters and before all class-`230`
       characters. The subsequence must be moved as a group.

> Note: Unicode describes this mark-reordering operation, the Arabic
> Mark Transient Reordering Algorithm (<abbr>AMTRA</abbr>), in Technical Report 53,
> which describes it in terms that are distinct from standard,
> <abbr>Ccc</abbr>-based mark reordering.
>
> Specifically, <abbr title="Arabic Mark Transient Reordering Algorithm">AMTRA</abbr> is designated as an operation performed during
> text rendering only, which therefore does not impact other
> Unicode-compliance issues such as allowable input sequences or text
> encoding.
>
> However, shaping engines may choose to perform the reordering of
> modifier combining marks in conjunction with their Unicode
> normalization functionality for increased efficiency.

### Stage 2: Compound character composition and decomposition ###

The `ccmp` feature allows a font to substitute

 - mark-and-base sequences with a pre-composed glyph including both
    the mark and the base (as is done in with a ligature substitution)
	
  - individual compound glyphs with the equivalent sequence of
    decomposed glyphs (such as decomposing a letter with inherent
    marks into a separate fundamental-letter glyph followed by a
    marks-only glyph, to permit more precise positioning)
 
If present, these composition and decomposition substitutions must be
performed before applying any other <abbr title="Glyph Substitution table">GSUB</abbr> or <abbr title="Glyph Positioning table">GPOS</abbr> lookups, because
those lookups may be written to match only the `ccmp`-substituted
glyphs. 


### Stage 3: Computing letter joining states ###

In order to correctly apply the initial, medial, and final form
substitutions from <abbr title="Glyph Substitution table">GSUB</abbr> during stage 6, the shaping engine must
tag every letter for possible application of the appropriate feature.

> Note: The following algorithm includes rules for processing `<syrc>`
> text in addition to `<mong>` text. Implementers concerned only with
> shaping `<mong>` text can omit the portions for `<syrc>`-specific
> rules. 

To determine which feature is appropriate, the shaping engine must
examine each word in turn and compute each letter's joining state from
the letter's `JOINING_TYPE` and the `JOINING_TYPE` of the
preceding character (if any).

> Note: Although Mongolian uses inter-word spaces, the `init` feature
> does _not_ refer to word-initial letters only and the `fina` feature
> does _not_ refer to word-final letters only.
>
> Rather, both of these terms are defined with respect to whether or
> not the preceding and subsequent letters form joins with the current
> letter. The letters at word boundaries will, naturally, take on
> initial and final forms, but initial and final forms of letters also
> occur regularly within words, when the letter in question is
> adjacent to a letter than does not form joins.

This computation starts from the first letter of the word, temporarily
tagging the letter for `isol` substitution. If the first
letter is the only letter in the word, the `isol` tag will remain unchanged.

From here, the algorithm consumes each character in the string, one at
a time, keeping track of the JOINING_TYPE of the previous character. 

If the current character is JOINING_TYPE_TRANSPARENT, move on to the next
character but preserve the currently-tracked JOINING_TYPE at its previous state.

If the preceding character's JOINING_TYPE is LEFT, DUAL, or
JOIN_CAUSING:
  - In `<syrc>` text, if the current character is <samp>"Alaph"</samp>, tag the
    current character for `med2`, then update the tag for the
    preceding character:
	  - `isol` becomes `init`
	  - `fina` becomes `medi`
	  - `init` remains `init`
	  - `medi` remains `medi`
  - If the current character's JOINING_TYPE is RIGHT, DUAL, or
    JOIN_CAUSING, tag the current character for `fina`, then update
    the tag for the preceding character:
	  - `isol` becomes `init`
	  - `fina` becomes `medi`
	  - `init` remains `init`
	  - `medi` remains `medi`

Otherwise, tag the current character for `isol`.

After testing the final character of the word, if the text is in `<syrc>` and
if the last character that is not JOINING_TYPE_TRANSPARENT or
JOINING_TYPE_NON_JOINING is <samp>"Alaph"</samp>, perform an additional test:
  - If the preceding character is JOINING_TYPE_LEFT, tag the current character
    for `fina`
  - If the preceding character's JOINING_GROUP is DALATH_RISH, tag the current
    character for `fin3`
  - Otherwise, tag the current character for `fin2`


Once the last character of the word has been processed, proceed to the
next word and repeat the algorithm, starting at the beginning of the
next word.

> Note: Because the processing of the characters in the algorithm
> described above is deterministic, shaping engines may choose to
> implement the joining-state computation as a state machine, in a lookup
> table, or by any other means desirable.

At the end of this process, all letters should be tagged for possible
substitution by one of the `isol`, `init`, `medi`, `med2`, `fina`, `fin2`, or
`fin3` features.

### Stage 4: Applying the `stch` feature ###

The `stch` feature decomposes and stretches special marks that are
meant to extend to the full width of words to which they are
attached. It was defined for use in `<syrc>` text runs for the <samp>"Syriac
Abbreviation Mark"</samp> (`U+070F`) but it can be used with similar marks in
other scripts.

To apply the `stch` feature, the shaping engine should first decompose the
`U+070F` glyph into components, which results in a beginning point,
midpoint, and endpoint glyphs plus one (or more) extension glyphs: at
least one extension between the beginning and midpoint glyphs and at
least one extension between the midpoint and endpoint glyphs. 

The shaping engine must then calculate the total length of the word to
which the mark applies. That length, minus the advance widths of the
beginning, middle, and endpoint glyphs of the mark, must be divided by
two. 

The result, divided by the advance width of the extension glyph
and rounded up to the next integer, tells the shaping engine how many
copies of the extension glyph must be placed between the midpoint and
each end of the mark.

Following this procedure ensures that the same number of extensions is
used on each side of the mark so that it remains symmetrical.

Finally, the decomposed mark must be reordered as follows: 

  - All of the glyphs in the sequence for the mark, _except_ for
    the final glyph, are repositioned as a group so that they precede
    the word to which the mark is attached.
  - The final glyph in the mark sequence is repositioned to the end of
    the word.
	

### Stage 5: Applying the language-form substitution features from <abbr>GSUB</abbr> ###

The language-substitution phase applies mandatory substitution
features using the rules in the font's <abbr title="Glyph Substitution table">GSUB</abbr> table. In preparation for
this stage, glyph sequences should be tagged for possible application 
of <abbr title="Glyph Substitution table">GSUB</abbr> features.

The order in which these substitutions must be performed is fixed for
all scripts implemented in the Arabic shaping model:

	locl
	isol
	fina
	fin2 (not used in <mong>)
	fin3 (not used in <mong>)
	medi
	med2 (not used in <mong>)
	init
	rlig
	rclt
	calt
	
> Note: `rlig` and `calt` need to be appled to the word as a whole before
> continuing to the next feature.

#### Stage 5, step 1: locl ####

The `locl` feature replaces default glyphs with any language-specific
variants, based on examining the language setting of the text run.

> Note: Strictly speaking, the use of localized-form substitutions is
> not part of the shaping process, but of the localization process,
> and could take place at an earlier point while handling the text
> run. However, shaping engines are expected to complete the
> application of the `locl` feature before applying the subsequent
> <abbr title="Glyph Substitution table">GSUB</abbr> substitutions in the following steps.


#### Stage 5, step 2: isol ####

The `isol` feature substitutes the default glyph for a codepoint with
the isolated form of the letter.

> Note: It is common for a font to use the isolated form of a letter
> as the default, in which case the `isol` feature would apply no
> substitutions. However, this is only a convention, and the active
> font may use other forms as the default glyphs for any or all
> codepoints.

:::{figure-md}
![Isolated form substitution](/images/mongolian/mongolian-isol.svg "Isolated form substitution"){.shaping-demo .inline-svg .greyscale-svg #mongolian-isol}

Isolated form substitution
:::

```{svg-color-toggle-button} mongolian-isol
```


The Mongolian free-variation selectors can also be used in conjunction
with `isol` to trigger alternate forms of certain letters as required
by the orthography.

:::{figure-md}
![Isolated FVS1 form substitution](/images/mongolian/mongolian-isol-fvs1.svg "Isolated FVS1 form substitution"){.shaping-demo .inline-svg .greyscale-svg #mongolian-isol-fvs1}

Isolated FVS1 form substitution
:::

```{svg-color-toggle-button} mongolian-isol-fvs1
```


#### Stage 5, step 3: fina ####

The `fina` feature substitutes the default glyph for a codepoint with
the terminal (or final) form of the letter.

:::{figure-md}
![Final form substitution](/images/mongolian/mongolian-fina.svg "Final form substitution"){.shaping-demo .inline-svg .greyscale-svg #mongolian-fina}

Final form substitution
:::

```{svg-color-toggle-button} mongolian-fina
```


The Mongolian free-variation selectors can also be used in conjunction
with `fina` to trigger alternate forms of certain letters as required
by the orthography.

:::{figure-md}
![Final FVS2 form substitution](/images/mongolian/mongolian-fina-fvs2.svg "Final FVS2 form substitution"){.shaping-demo .inline-svg .greyscale-svg #mongolian-fina-fvs2}

Final FVS2 form substitution
:::

```{svg-color-toggle-button} mongolian-fina-fvs2
```


#### Stage 5, step 4: fin2 ####

This feature is not used in `<mong>` text.

#### Stage 5, step 5: fin3 ####

This feature is not used in `<mong>` text.

#### Stage 5, step 6: medi ####

The `medi` feature substitutes the default glyph for a codepoint with
the medial form of the letter.

:::{figure-md}
![Medial form substitution](/images/mongolian/mongolian-medi.svg "Medial form substitution"){.shaping-demo .inline-svg .greyscale-svg #mongolian-medi}

Medial form substitution
:::

```{svg-color-toggle-button} mongolian-medi
```


The Mongolian free-variation selectors can also be used in conjunction
with `medi` to trigger alternate forms of certain letters as required
by the orthography.

:::{figure-md}
![Medial FVS1 form substitution](/images/mongolian/mongolian-medi-fvs1.svg "Medial FVS1 form substitution"){.shaping-demo .inline-svg .greyscale-svg #mongolian-medi-fvs1}

Medial FVS1 form substitution
:::

```{svg-color-toggle-button} mongolian-medi-fvs1
```


#### Stage 5, step 7: med2 ####

This feature is not used in `<mong>` text.

#### Stage 5, step 8: init ####

The `init` feature substitutes the default glyph for a codepoint with
the initial form of the letter.

:::{figure-md}
![Initial form substitution](/images/mongolian/mongolian-init.svg "Initial form substitution"){.shaping-demo .inline-svg .greyscale-svg #mongolian-init}

Initial form substitution
:::

```{svg-color-toggle-button} mongolian-init
```


The Mongolian free-variation selectors can also be used in conjunction
with `init` to trigger alternate forms of certain letters as required
by the orthography.

:::{figure-md}
![Initial FVS1 form substitution](/images/mongolian/mongolian-init-fvs1.svg "Initial FVS1 form substitution"){.shaping-demo .inline-svg .greyscale-svg #mongolian-init-fvs1}

Initial FVS1 form substitution
:::

```{svg-color-toggle-button} mongolian-init-fvs1
```


#### Stage 5, step 9: rlig ####

The `rlig` feature substitutes glyph sequences with mandatory
ligatures. Substitutions made by `rlig` cannot be disabled by
application-level user interfaces.

:::{figure-md}
![Required ligature substitution](/images/mongolian/mongolian-rlig.svg "Required ligature substitution"){.shaping-demo .inline-svg .greyscale-svg #mongolian-rlig}

Required ligature substitution
:::

```{svg-color-toggle-button} mongolian-rlig
```


#### Stage 5, step 10: rclt ####

The `rclt` feature substitutes glyphs with contextual alternate
forms. In general, this involves replacing the default form of a
connecting glyph with an alternate that provides a preferable
connection to an adjacent glyph.

The `rclt` feature should be used to perform such substitutions that
are required by the orthography of the active script and
language. Substitutions made by `rclt` cannot be disabled by 
application-level user interfaces.

#### Stage 5, step 11: calt ####

The `calt` feature substitutes glyphs with contextual alternate
forms. In general, this involves replacing the default form of a
connecting glyph with an alternate that provides a preferable
connection to an adjacent glyph.

The `calt` feature, in contrast to `rclt` above, performs
substitutions that are not mandatory for orthographic
correctness. However, unlike `rclt`, the substitutions made by `calt`
can be disabled by application-level user interfaces.

<!--- ![Contextual alternate substitution](/images/mongolian/mongolian-calt.svg) --->


### Stage 6: Applying the typographic-form substitution features from <abbr>GSUB</abbr> ###

The typographic-substitution phase applies optional substitution
features using the rules in the font's <abbr title="Glyph Substitution table">GSUB</abbr> table.

The order in which these substitutions must be performed is fixed for
all scripts implemented in the Arabic shaping model:

    liga
	dlig
	cswh
	mset
	

#### Stage 6, step 1: liga ####

The `liga` feature substitutes standard, optional ligatures that are on
by default. Substitutions made by `liga` may be disabled by
application-level user interfaces.

<!--- ![Standard ligature substitution](/images/mongolian/mongolian-liga.svg) --->


#### Stage 6, step 2: dlig ####

The `dlig` feature substitutes additional optional ligatures that are
off by default. Substitutions made by `dlig` may be disabled by
application-level user interfaces.


#### Stage 6, step 3: cswh ####

The `cswh` feature substitutes contextual swash variants of
glyphs. 

<!--- For example, the active font might substitute a longer variant
of <samp>"Noon"</samp> when a certain number of subsequent glyphs do not descend
below the baseline. --->


#### Stage 6, step 4: mset ####

The `mset` feature performs mark positioning by substituting sequences
of bases and marks with precomposed base-and-mark glyphs.

> Note: Positioning marks with the `mark` and `mkmk` features of <abbr title="Glyph Positioning table">GPOS</abbr> is
> preferred, because `mset` can interfere with the OpenType shaping
> process. For example, substitution rules contained in `mset` may not be able to
> account for necessary mark-reordering adjustments conducted in the
> next stage.
> 
> Nevertheless, when the active font uses `mset` substitutions, the
> shaping engine must deal with the situation gracefully.

### Stage 7: Applying the positioning features from <abbr>GPOS</abbr> ###

The positioning stage adjusts the positions of mark and base
glyphs.

The order in which these features are applied is fixed for
all scripts implemented in the Arabic shaping model:

    curs
	kern
	mark
	mkmk

#### Stage 7, step 1: curs ####

The `curs` feature perform cursive positioning. Each glyph has an
entry point and exit point; the `curs` feature positions glyphs so
that the entry point of the current glyph meets the exit point of the
preceding glyph.

<!--- ![Cursive positioning](/images/mongolian/mongolian-curs.svg) --->


#### Stage 7, step 2: kern ####

The `kern` adjusts glyph spacing between pairs of adjacent glyphs.


#### Stage 7, step 3: mark ####

The `mark` feature positions marks with respect to base glyphs.

<!--- ![Mark positioning](/images/mongolian/mongolian-mark.svg) --->


#### Stage 7, step 4: mkmk ####

The `mkmk` feature positions marks with respect to preceding marks,
providing proper positioning for sequences of marks that attach to the
same base glyph.


================================================
FILE: opentype-shaping-myanmar.md
================================================
```{include} /_global.md
```

# Myanmar shaping in OpenType #

This document details the shaping procedure needed to display text
runs in the Myanmar script.


**Contents**

  - [General information](#general-information)
  - [Terminology](#terminology)
  - [Glyph classification](#glyph-classification)
      - [Shaping classes and subclasses](#shaping-classes-and-subclasses)
      - [Myanmar character tables](#myanmar-character-tables)
  - [The `<mym2>` shaping model](#the-mym2-shaping-model)
      - [Stage 1: Identifying syllables and other sequences](#stage-1-identifying-syllables-and-other-sequences)
      - [Stage 2: Initial reordering](#stage-2-initial-reordering)
      - [Stage 3: Applying the basic substitution features from <abbr>GSUB</abbr>](#stage-3-applying-the-basic-substitution-features-from-gsub)
      - [Stage 4: Applying all remaining substitution features from <abbr>GSUB</abbr>](#stage-4-applying-all-remaining-substitution-features-from-gsub)
      - [Stage 5: Applying remaining positioning features from <abbr>GPOS</abbr>](#stage-5-applying-remaining-positioning-features-from-gpos)
  - [The `<mymr>` shaping model](#the-mymr-shaping-model)


## General information ##

The Myanmar or Burmese script is a descendant of the Brahmi script, and follows
many of the same general patterns found in [Indic
scripts](opentype-shaping-indic-general.md). However, Myanmar
incorporates enough distinctions of its own that it is generally not
advisable to attempt supporting it in a general-purpose Indic shaping
engine. 

For example, Myanmar script includes a "Reph"-like feature known as
"Kinzi" although, unlike "Reph", a "Kinzi" may be formed by any of
several initial consonants. Also, notably, real-world texts written in
Myanmar script often do not use inter-word spaces, which may make the
process of syllable identification substantially different from
processing Indic scripts.

The Myanmar script is used to write multiple languages, most commonly
Burmese, Mon, Karen, Kayah, Shan, Palaung, and Pali. In addition,
Sanskrit may be written in Myanmar, so Myanmar script runs may include 
glyphs from the Vedic Extensions block of Unicode. 

There are two extant Myanmar script tags defined in OpenType, `<mymr>`
and `<mym2>`. The older script tag, `<mymr>`, was deprecated in 2005.
Therefore, new fonts should be engineered to work with the `<mym2>`
shaping model. However, if a font is encountered that supports only
`<mymr>`, the shaping engine should deal with it gracefully.

## Terminology ##

OpenType shaping uses a standard set of terms for Brahmi-derived and
Indic scripts.  The terms used colloquially in any particular language
may vary, however, potentially causing confusion.

**Matra** is the standard term for a dependent vowel sign. Syllables
in Myanmar script can include sequences of multiple vowels and,
therefore, multiple matras.

**Halant** and **Virama** are both standard terms for the below-base
"vowel-killer" mark. Unicode documents use the term "virama" most
frequently, while OpenType documents use the term "halant" most
frequently.

**Asat** is the term for the "pure killer" character in Myanmar. An
asat after a consonant serves a similar function as a halant by
suppressing the inherent vowel of the consonant, but the asat is
rendered visually, either as an above-base mark or in a substitution
form triggered by an adjacent codepoint.

An asat may be placed following a consonant to denote that the
consonant is doubled. An asat may also be followed by a halant, a
sequence that is used to trigger the "Kinzi" special form.

**Chandrabindu** (or simply **Bindu**) is the standard term for the
diacritical mark indicating that the preceding vowel should be
nasalized. Myanmar script does not use a chandrabindu; however, the
_BINDU_ category is used for other marks during the
syllable-identification stage in order to maintain compatibility with
other scripts. 

**Tone markers** are an important part of languages written in Myanmar
script. These markers may be either spacing-combining (`[Mc]`) or
non-spacing (`[Mn]`). Several tone markers may be used within a single
syllable.

The term **base consonant** is also critical to Myanmar shaping. The
base consonant of a syllable is the consonant that carries the
syllable's vowel sound, either the inherent vowel (for an unmarked
base consonant) or a dependent vowel (with the addition of a matra). 

A syllable's base consonant is generally rendered in its full form
(although it may form ligatures), while other consonants in the
syllable frequently take on secondary forms. Different <abbr title="Glyph Substitution table">GSUB</abbr>
substitutions may apply to a script's **pre-base** and **post-base**
consonants. Some of these substitutions create **above-base** or
**below-base** forms. The **Kinzi** form of certain consonants is an
example, akin to the "Reph" form of "Ra" in many Indic scripts.

Many Myanmar letters may be followed by a **Variation Selector**
codepoint in order to request the **dotted form** of the corresponding
glyph, which is preferred for some languages written with Myanmar
script. Fonts are not required to include the dotted-form variants;
when they are absent from the active font, the default form of the
corresponding letter will be used instead.

:::{figure-md}
![Dotted form substitution with variation selector](images/myanmar/myanmar-dotted.svg "Dotted form substitution with variation selector"){.shaping-demo .inline-svg .greyscale-svg #myanmar-dotted}

Dotted form substitution with variation selector
:::

```{svg-color-toggle-button} myanmar-dotted
```


Where possible, using the standard terminology is preferred, as the
use of a language-specific term necessitates choosing one language
over all of the others that share a common script.

## Glyph classification ##

Shaping Myanmar text depends on the shaping engine correctly
classifying each glyph in the run. As with most other scripts, the
classifications must distinguish between consonants, vowels
(independent and dependent), numerals, punctuation, and various types
of diacritical mark. 

For most codepoints, the `General Category` property defined in the Unicode
standard is correct, but it is not sufficient to fully capture the
expected shaping behavior (such as glyph reordering). Therefore,
Myanmar glyphs must additionally be classified by how they are treated
when shaping a run of text.

### Shaping classes and subclasses ###

The shaping classes listed in the tables that follow are defined so
that they capture the positioning rules used by Myanmar script. 

For most codepoints, the _Shaping class_ is synonymous with the `Indic
Syllabic Category` defined in Unicode. However, there are some
distinctions, where the defined category does not fully capture the
behavior of the character in the shaping process.

Several of the diacritic and syllable-modifying marks behave according
to their own rules and, thus, have a special class. These include
`BINDU` and `VISARGA`. Some less-common marks behave according to
rules that are similar to these common marks, and are therefore
classified with the corresponding common mark. The Vedic Extensions
also include a `CANTILLATION` class for tone marks.

Myanmar's "halant" codepoint is classified as `INVISIBLE_STACKER`,
rather than the more common `VIRAMA`. This is to indicate that, unlike
the "halant"/"virama" characters in several other scripts, the Myanmar
"halant" is never rendered visually as a glyph.

Myanmar's "Asat" codepoint, however, is rendered visually when it
appears in a syllable. The "Asat" behaves differently than Indic
"halant", however. It can be used to kill a consonant's inherent vowel
sound, but it is not used between consonants to indicate the formation
of a conjunct or a subjoined form. The "Asat" is classified as
`PURE_KILLER`.

Letters generally fall into the classes `CONSONANT`,
`VOWEL_INDEPENDENT`, and `VOWEL_DEPENDENT`. These classes help the
shaping engine parse and identify key positions in a syllable. For
example, Unicode categorizes dependent vowels as `Mark [Mn]`, but the
shaping engine must be able to distinguish between dependent vowels
and diacritical marks (which are categorized as `Mark [Mn]`).

Myanmar uses one subclass of consonant, `CONSONANT_MEDIAL`. This
subclass is used for special non-base variants of several consonants that
serve to modify the syllable's vowel sound. These medial consonants
are rendered as non-spacing marks attached to the base consonant.

> Note: The medial "Ra" is reordered to pre-base-consonant
> position. The other medial consonants do not require reordering.

> Note: The medial consonants are encoded in separate codepoints,
> distinguishing them from the standard (non-medial) variant of the
> corresponding consonant. 

In addition, the Myanmar and Myanmar Extended Unicode blocks include
several codepoints classified as `CONSONANT_PLACEHOLDER`. These
codepoints are used in verbal transcriptions to take tone
marks. However, these glyphs are not consonants in the true sense and
are unlikely to occur within normal words.

Other characters, such as symbols, need no special
attention from the shaping engine, so they are not assigned a shaping
class.

Numbers are classified as `NUMBER`, even though they evoke no special
behavior from the Indic shaping rules, because there are OpenType features that
might affect how the respective glyphs are drawn, such as `tnum`,
which specifies the usage of tabular-width numerals, and `sups`, which
replaces the default glyphs with superscript variants.

Marks and dependent vowels are further labeled with a mark-placement
subclass, which indicates where the glyph will be placed with respect
to the base character to which it is attached. The actual position of
the glyphs is determined by the lookups found in the font's <abbr title="Glyph Positioning table">GPOS</abbr>
table, however, the shaping rules for Indic scripts require that the
shaping engine be able to identify marks by their general
position. 

For example, left-side dependent vowels (matras), classified
with `LEFT_POSITION`, must frequently be reordered. Therefore, the
`LEFT_POSITION` subclass of the character must be tracked throughout
the shaping process.

There are four basic _mark-placement subclasses_ for dependent vowels
(matras). Each corresponds to the visual position of the matra with
respect to the base consonant to which it is attached:

  - `LEFT_POSITION` matras are positioned to the left of the base consonant.
  - `RIGHT_POSITION` matras are positioned to the right of the base consonant.
  - `TOP_POSITION` matras are positioned above the base consonant.
  - `BOTTOM_POSITION` matras are positioned below base consonant.
  
These positions may also be referred to elsewhere in shaping documents as:

  - _Pre-base_ matras
  - _Post-base_ matras
  - _Above-base_ matras
  - _Below-base_ matras
  
respectively. The `LEFT`, `RIGHT`, `TOP`, and `BOTTOM` designations
corresponds to Unicode's preferred terminology. The _Pre_, _Post_,
_Above_, and _Below_ terminology is used in the official descriptions
of OpenType <abbr title="Glyph Substitution table">GSUB</abbr> and <abbr title="Glyph Positioning table">GPOS</abbr> features. Shaping engines may, internally,
use whichever terminology is preferred.

For most mark and dependent-vowel codepoints, the _mark-placement
subclass_ is synonymous with the `Indic Positional Category` defined
in Unicode. However, there are some distinctions, where the defined
category does not fully capture the behavior of the character in the
shaping process. 

Many Myanmar letters may be followed by a `Variation Selector`
codepoint in order to request the "dotted form" of the
corresponding glyph. These variations are defined in Unicode's
`Standardized Variants` document; only the codepoints listed in that
document support substitution via variation selectors. At present,
only "Variation Selector 1" (`U+FE00`) is used with Myanmar.

If the active font does not include glyphs representing the requested
variant of the letter preceding the variation selector, then the
shaping engine must treat the variation selector codepoint as
invisible and ignorable and use the default version of the preceding
letter. 


### Myanmar character tables ###

Separate character tables are provided for the Myanmar and Vedic
Extensions blocks as well as for other miscellaneous characters that
are used in `<mym2>` text runs:

  - [Myanmar character table](character-tables/character-tables-myanmar.md#myanmar-character-table)
  - [Myanmar Extended-A character table](character-tables/character-tables-myanmar.md#myanmar-extended-a-character-table)
  - [Myanmar Extended-B character table](character-tables/character-tables-myanmar.md#myanmar-extended-b-character-table)
  - [Myanmar Extended-C character table](character-tables/character-tables-myanmar.md#myanmar-extended-c-character-table)
  - [Vedic Extensions character table](character-tables/character-tables-myanmar.md#vedic-extensions-character-table)
  - [Miscellaneous character table](character-tables/character-tables-myanmar.md#miscellaneous-character-table)

The tables list each codepoint along with its Unicode general
category, its shaping class, and its mark-placement subclass. The
codepoint's Unicode name and an example glyph are also provided.

For example:

:::{table} Example character table

| Codepoint | Unicode category | Shaping class     | Mark-placement subclass    | Glyph                        |
|:----------|:-----------------|:------------------|:---------------------------|:-----------------------------|
|`U+1000`   | Letter           | CONSONANT         | _null_                     | &#x1000; Ka                  |
| | | | |
|`U+1036`   | Mark [Mn]        | BINDU             | TOP_POSITION               | &#x1036; Anusvara            |
:::


Codepoints with no assigned meaning are
designated as _unassigned_ in the _Unicode category_ column. 

Assigned codepoints with a _null_ in the _Shaping class_
column evoke no special behavior from the shaping engine.

The _Mark-placement subclass_ column indicates mark-placement
positioning for codepoints in the _Mark_ category. Assigned, non-mark
codepoints have a _null_ in this column and evoke no special
mark-placement behavior. Marks tagged with [Mn] in the _Unicode
category_ column are categorized as non-spacing; marks tagged with
[Mc] are categorized as spacing-combining.

Some codepoints in the tables use a _Shaping class_ that
differs from the codepoint's Unicode _General Category_. The _Shaping
class_ takes precedence during OpenType shaping, as it captures more
specific, script-aware behavior.

Other important characters that may be encountered when shaping runs
of Myanmar text include the dotted-circle placeholder (`U+25CC`), 
the no-break space (`U+00A0`), and the zero-width space (`U+200B`).

The dotted-circle placeholder is frequently used when displaying a
dependent vowel (matra) or a combining mark in isolation. Real-world
text syllables may also use other characters, such as hyphens or dashes,
in a similar placeholder fashion; shaping engines should cope with
this situation gracefully.

<!--- The zero-width joiner is primarily used to prevent the formation of a
subjoining form from a <samp>"_Consonant_,Halant,_Consonant_"</samp> sequence. The sequence
<samp>"_Consonant_,Halant,ZWJ,_Consonant_"</samp> blocks the substitution of a
subjoined form for the second consonant. --->

<!---
A secondary usage of the zero-width joiner is to prevent the formation of
<samp>"Reph"</samp>. An initial <samp>"Ra,Halant,ZWJ"</samp> sequence should not produce a <samp>"Reph"</samp>,
where an initial <samp>"Ra,Halant"</samp> sequence without the zero-width joiner
otherwise would.
--->

The no-break space (<abbr>NBSP</abbr>) is primarily used to display
those codepoints that are defined as non-spacing (marks, dependent
vowels (matras), below-base consonant forms, and post-base consonant
forms) in an isolated context, as an alternative to displaying them
superimposed on the dotted-circle placeholder. These sequences will
match <samp>"NBSP,ZWJ,Halant,_Consonant_"</samp>, <samp>"NBSP,_mark_"</samp>, or
<samp>"NBSP,_matra_"</samp>.

The zero-width space may be used between words — even though no visual
word spacing results — in order to indicate word breaks within a text
that can be used by line-breaking algorithms in a higher-level
typesetting environment.


## The `<mym2>` shaping model ##

Processing a run of `<mym2>` text involves five top-level stages:

1. Identifying syllables and other sequences
2. Initial reordering
3. Applying the basic substitution features from <abbr>GSUB</abbr>
4. Applying all remaining substitution features from <abbr>GSUB</abbr>
5. Applying all remaining positioning features from <abbr>GPOS</abbr>


As with other Brahmi-derived and Indic scripts, the initial reordering
stage and the final reordering stage each involve applying a set of several
script-specific rules. The basic substitution features must be applied
to the run in a specific order. The remaining substitution features in
stage four, however, do not have a mandatory order.


Myanmar exhibits many of the same shaping patterns found in Indic
scripts, but it differs in a few critical characteristics. With regard
to these common variations, Myanmar's specific shaping 
characteristics include:


  - The first consonant of a syllable is always the base consonant,
    excluding a consonant that is part of an initial <samp>"Kinzi"</samp>-forming
    sequence (if it is present).

> Note: For comparison with the General Indic shaping model, this
> characteristic would correspond to `BASE_POS_FIRST`.
  
  - <samp>"Kinzi"</samp> is always encoded as a syllable-initial sequence, but it
    is reordered. The final position of <samp>"Kinzi"</samp> is immediately after
    the base consonant. 

> Note: For comparison with the General Indic shaping model, the Kinzi
> -encoding characteristic would correspond to `REPH_MODE_EXPLICIT`,
> and the reordering characteristic would correspond to `POS_AFTER_MAIN`.
  
  - The below-base forms feature is applied only to consonants
    after the base consonant. 

> Note: For comparison with the General Indic shaping model, this
> characteristic would correspond to `BLWF_MODE_POST_ONLY`.

  - Medial Ra is reordered to pre-base position.

  - Pre-base matras are reordered to the beginning of the
    syllable. Multiple pre-base matras can occur; any such sequences
    must be moved together, as a block, at the reordering stage.

> Note: For comparison with the General Indic shaping model, this
> characteristic is distinct to Mynanmar script. Indic scripts apply 
> different reordering rules to pre-base matras that depend on the
> contents of the syllable.
	
  - The ordering positions for right-side and above-base matras is the
    same. All are reordered to immediately after all subjoined consonants.
	
  - Below-base matras are reordered to immediately before any
    right-side and above-base matras.
    	
> Note: For comparison with the General Indic shaping model, this
> characteristic would correspond to `MATRA_POS_TOP`,
> `MATRA_POS_RIGHT` taking the ordering position 
> `POS_AFTER_SUBJOINED`, and `MATRA_POS_BOTTOM` taking the ordering
> position `POS_BELOWBASE_CONSONANT`. 


### Stage 1: Identifying syllables and other sequences ###

A syllable in Myanmar consists of a valid orthographic sequence
that may be followed by a "tail" of modifier signs. 

> Note: The Myanmar Unicode block enumerates two modifier signs,
> "Anusvara" (`U+1036`) and "Visarga" (`U+1038`). There are also
> twenty-one tone markers in the Myanmar and Myanmar Extended-A
> blocks. In addition, Sanskrit text written in Myanmar may include
> additional signs from Vedic Extensions block.

Because texts written in Myanmar script do not generally employ
inter-word spaces, however, shaping engines must rely on
syllable-identification algorithms to recognize word-boundary
patterns — distinguishing numeric sequences, symbols, punctuation, and other
miscellaneous script characters from syllables within words.

Each syllable contains exactly one vowel sound. Valid syllables may
begin with either a consonant or an independent vowel. 

If the syllable begins with a consonant, then the consonant that
provides the vowel sound is referred to as the "base" consonant. If
the syllable begins with an independent vowel, that vowel is the
syllable's only vowel sound and, by definition, there is no "base"
consonant. 

> Note: A consonant that is not accompanied by a dependent vowel (matra) sign
> carries the script's inherent vowel sound. This vowel sound is changed
> by a dependent vowel (matra) sign following the consonant.

Generally speaking, the base consonant is the first consonant of the
syllable and its vowel sound designates the end of the syllable. The
exception to this rule is consonants that are part of a
<samp>"Kinzi"</samp>-triggering sequence.

Post-base consonants in a valid syllable will be preceded by <samp>"Halant"</samp>
marks. 

	BaseC Halant Post-baseC
	
The algorithm for correctly identifying the base consonant includes a
test to recognize these sequences and not mis-identify the base
consonant.

Medial consonants, if they occur, will not be preceded by a
<samp>"Halant"</samp>. This is because medial consonants in Myanmar are used to
modify the vowel sound of the syllable.

> Note: in the Myanmar script, all medial consonants have their own
> distinct codepoints. Therefore, they can be identified by codepoint
> alone, and there is no need for a text run to identify them using
> any special sequences.


As with other Brahmi-derived and Indic scripts, the consonant <samp>"Ra"</samp> receives
special treatment. 

  - A <samp>"Medial Ra"</samp> (`U+103C`) must be reordered to a position immediately
    before the syllable's base consonant. 
	
	Note, however, that <samp>"Medial Ra"</samp> is a separate codepoint from the
    standard <samp>"Ra"</samp> (`U+101B`). 
	
  - A syllable-initial <samp>"Ra"</samp> may also be part of a <samp>"Kinzi"</samp>-triggering
    sequence. 
	
	Notably, however, although <samp>"Ra"</samp> alone will take on the <samp>"Reph"</samp> form
    in Indic script sequences, the Myanmar script's <samp>"Kinzi"</samp> feature
    can be triggered for three consonants, depending on the language
    in use: <samp>"Ra"</samp> (`U+101B`), <samp>"Nga"</samp> (`U+1004`), and <samp>"Mon Nga"</samp>
    (`U+105A`). In each case, the <samp>"Kinzi"</samp> form is triggered by an
    explicit sequence: <samp>"_consonant_,Asat,Halant"</samp>.
	
	There are, therefore, exactly three <samp>"Kinzi"</samp>-forming sequences to
    test for:
	  - <samp>"Ra,Asat,Halant"</samp>
	  - <samp>"Nga,Asat,Halant"</samp>
	  - <samp>"Mon Nga,Asat,Halant"</samp>

:::{figure-md}
![Ra Kinzi](images/myanmar/myanmar-kinzi-ra.svg "Ra Kinzi"){.shaping-demo .inline-svg .greyscale-svg #myanmar-kinzi-ra}

Ra Kinzi
:::

```{svg-color-toggle-button} myanmar-kinzi-ra
```


:::{figure-md}
![Nga Kinzi](images/myanmar/myanmar-kinzi-nga.svg "Nga Kinzi"){.shaping-demo .inline-svg .greyscale-svg #myanmar-kinzi-nga}

Nga Kinzi
:::

```{svg-color-toggle-button} myanmar-kinzi-nga
```


:::{figure-md}
![Mon Nga Kinzi](images/myanmar/myanmar-kinzi-monnga.svg "Mon Nga Kinzi"){.shaping-demo .inline-svg .greyscale-svg #myanmar-kinzi-monnga}

Mon Nga Kinzi
:::

```{svg-color-toggle-button} myanmar-kinzi-monnga
```


In the Myanmar (or Burmese) language, <samp>"Nga"</samp> is the only <samp>"Kinzi"</samp>-forming
consonant. <samp>"Mon Nga"</samp> can form a <samp>"Kinzi"</samp> in the Mon language, and <samp>"Ra"</samp>
can form a <samp>"Kinzi"</samp> in Sanskrit written with the Myanmar script.

In addition to valid syllables, standalone sequences may occur, such
as when an isolated codepoint is shown in example text.

> Note: Foreign loanwords, when written in the Myanmar script, may
> not adhere to the syllable-formation rules described above. 


Syllables should be identified by examining the run and matching
glyphs, based on their categorization, using regular expressions. 

The following regular expressions can be used to match Myanmar-script
syllables. 

The regular expressions utilize the shaping classes from the tables
above. For the purpose of syllable identification, more general
classes can be used, as defined in the following table. This
simplifies the resulting expressions. 

```markdown
_ra_		= "Ra" | "Nga" | "Mon Nga"
_consonant_ 	= `CONSONANT` | `CONSONANT_PLACEHOLDER` - _ra_
_vowel_		= `VOWEL_INDEPENDENT`
_halant_	= `INVISIBLE_STACKER`
_asat_		= "Asat"
_a_		= "Anusvara" | "Sign Ai"
_db_		= "Dot Below"
_zwj_		= `JOINER`
_zwnj_		= `NON_JOINER`
_mh_		= "Medial Ha" | "Mon Medial La"
_mr_		= "Medial Ra"
_mw_		= "Medial Wa" | "Shan Medial Wa"
_my_		= "Medial Ya" | "Mon Medial Na" | "Mon Medial Ma"
_d_		= `NUMBER`
_pt_		= "Tone Sgaw Karen Hathi" | "Tone Sgaw Karen Ke Pho" |
	          "Western Pwo Karen Tone 1" | "Western Pwo Karen Tone
	          2" | "Western Pwo Karen Tone 3" | "Western Pwo Karen
	          Tone 4" | "Western Pwo Karen Tone 5" | "Pao Karen
	          Tone" 
_sm_		= "Visarga" | "Shan Tone 2" | "Shan Tone 3" | "Shan
	          Tone 5" | "Shan Tone 6" | "Shan Council Tone 2" |
	          "Shan Council Tone 3" | "Shan Council Emphatic Tone"
	          | "Rumai Palaung Tone 5" | "Khamti Tone 1" | "Khamti
	          Tone 3" | "Aiton A" 
_punc_		= "Little Section" | "Section"
_matrapre_	= `MATRA` & `LEFT_POSITION`
_matrapost_	= `MATRA` &`RIGHT_POSITION`
_matraabove_	= `MATRA` & `TOP_POSITION` - _a_
_matrabelow_	= `MATRA` & `BOTTOM_POSITION`
_gb_		= U+002D | U+00A0 | U+00D7 | U+2012 | U+2013 | U+2014 |
              U+2015 | U+2022 | U+25CC | U+25FB | U+25FC | U+25FD |
			  U+25FE 
_cs_		= `CONSONANT_WITH_STACKER`
_v_		= `VISARGA`
_vs_		= "Variation Selector"
```

> Note: the _ra_ identification class is mutually exclusive with 
> the _consonant_ class. The union of the _consonant_ and _ra_ classes
> is used in the regular expression elements below in order to
> correctly identify <samp>"Ra"</samp>, <samp>"Nga"</samp>, and <samp>"Mon Nga"</samp> characters that do not
> trigger <samp>"Kinzi"</samp> forms. 
>
> Note, also, that the `CONSONANT_PLACEHOLDER` class is unioned with
> the `CONSONANT` class for the purpose of syllable identification,
> even those these two classes are treated separately in general.
>
> Note: The _mh_, _mw_, and _my_ identification classes include
> several medial letters from the non-Burmese languages; they are
> grouped according to the medial consonants in Burmese that are the
> closest match in terms of shaping behavior.
>
> Note: <samp>"Sign Ai"</samp> is classified as _a_, not as _matraabove_, in order
> to implement orthographically correct behavior.
>
> Note: the _gb_ identification class includes several "generic base"
> codepoints that are often used in real-world text runs to act as
> placeholders for missing letters.

> Note: the tone marker codepoints are divided up between two
> identification classes, reflecting the differing orthographic rules
> they follow. The _pt_ identification class constitutes the "Pwo
> tone" markers, while the _sm_ identification class includes the
> remaining tone markers and other syllable modifiers.


These identification classes form the bases of the following regular
expression elements:

```markdown
C	= _consonant_ | _ra_
Z	= _zwj_ | _zwnj_
K	= _ra_ _asat_ _halant_
Med	= _my_? _mr_? _mw_? _mh_? _asat_?
Vmain	= _matrapre_* _matraabove_* _matrabelow_* _a_* (_db_ _asat_?)?
Vpost	= _matrapost_ _mh_? _asat_* _matraabove_* _a_* (_db_ _asat_?)?
Pwo	= _pt_ _a_* _db_? _asat_?
Tcomplex= _asat_* Med Vmain Vpost* Pwo* _v_* Z?
Tail	= _halant_ | Tcomplex
```

Using the above elements, the following regular expressions define the
possible syllable types:

A consonant-based syllable will match the expression:
```markdown
(K | _cs_)? (C | _vowel_ | _d_ | _gb_) _vs_? (_halant_ (C | _vowel_) _vs_?)* Tail
```

The expressions above use state-machine syntax from the Ragel
state-machine compiler. The operators represent:

```markdown
a* = zero or more copies of a
b+ = one or more copies of b
c? = optional instance of c
d{n} = exactly n copies of d
d{,n} = zero to n copies of d
d{n,} = n or more copies of d
d{n,m} = n to m copies of d
!e = not e
^f = character-level not f
g.h = concatenation of g and h
i|j = i or j
( ) = grouping of expression elements
```


A sequence that does not match any of these expressions should be
regarded as broken. The shaping engine may make a best-effort attempt
to shape the broken sequence, but making guarantees about the
correctness or appearance of the final result is out of scope for this
document.

After the syllables have been identified, each of the subsequent 
shaping stages occurs on a per-syllable basis.


### Stage 2: Initial reordering ###

The initial reordering stage is used to relocate glyphs from the
phonetic order in which they occur in a run of text to the
orthographic order in which they are presented visually.

> Note: Primarily, this means moving dependent-vowel (matra) glyphs, 
> <samp>"Kinzi"</samp>-forming sequences, and pre-base-reordering medial consonants.
>
> These reordering moves are mandatory. The final-reordering stage
> may make additional moves, depending on the text and on the features
> implemented in the active font.

The syllable should be processed by tagging each glyph with its
intended position based on its ordering category. After all glyphs
have been tagged, the entire syllable should be sorted in stable order,
so that glyphs of the same ordering category remain in the same
relative position with respect to each other.

The final sort order of the ordering categories should be:


<!---	POS_RA_TO_BECOME_REPH --->


	POS_PREBASE_MATRA
	
	POS_PREBASE_CONSONANT

	POS_BASE_CONSONANT
	POS_AFTER_MAIN

	POS_BEFORE_SUBJOINED
	POS_BELOWBASE_CONSONANT
	POS_AFTER_SUBJOINED

<!---	POS_BEFORE_POST
	POS_POSTBASE_CONSONANT
	POS_AFTER_POST --->

<!---	POS_FINAL_CONSONANT --->
<!---	POS_SMVD --->

<!--- question: does Myanmar shape handle Vedic signs differently? --->
<!--- or am I looking at an incomplete version of the reordering --->
<!--- logic? --->
<!--- Perhaps SMVD is all just tagged as _POS_AFTER_SUBJOINED --->
<!--- and captures all tone marks, too? --->

This sort order enumerates all of the possible final positions to
which a codepoint might be reordered in Myanmar script. 

The position names mimic those used in the General Indic shaping
model, for ease of implementation. However, shaping engines are free
to use any naming scheme they choose.

The basic positions (left to right) are dependent
vowels (matras) and consonants positioned before the base
consonant (`POS_PREBASE_MATRA` and `POS_PREBASE_CONSONANT`), the base
consonant (`POS_BASE_CONSONANT`), below-base consonants
(`POS_BELOWBASE_CONSONANT`),
and syllable-modifying or Vedic signs (`POS_SMVD`).

In addition, several secondary positions are defined to handle various
reordering rules that deal with relative, rather than absolute,
positioning. `POS_AFTER_MAIN` means that a character must be
positioned immediately after the base consonant. `POS_BEFORE_SUBJOINED`
and `POS_AFTER_SUBJOINED` mean that a character must be positioned
before or after any below-base consonants, respectively. 

For shaping-engine implementers, the names used for the ordering
categories matter only in that they are unambiguous. 

For a definition of the "base" consonant, refer to stage 2, step 1, which follows.


#### Stage 2, step 1: Base consonant ####

The first step is to determine the base consonant of the syllable, if
there is one, and tag it as `POS_BASE_CONSONANT`.

The base consonant is defined as the consonant in a consonant-based
syllable that carries the syllable's vowel sound. That vowel sound
will either be provided by the script's inherent vowel (in which case
it is not written with a separate character) or the sound will be designated
by the addition of a dependent-vowel (matra) sign.

Vowel-based syllables, standalone sequences, and broken text runs will
not have base consonants.

The algorithm for determining the base consonant is

  - Starting from the beginning of the syllable, move forwards until a
    `CONSONANT` is found. 
      * If the consonant is part of a <samp>"Kinzi"</samp> sequence, move to the
        next consonant. 
  - The consonant stopped at will be the base consonant.

> Note: The algorithm considers only `CONSONANT` class consonants, 


#### Stage 2, step 2: Tag matras ####

Second, all left-side dependent-vowel (matra) signs must be tagged to be
moved to the beginning of the syllable, with `POS_PREBASE_MATRA`.

All right-side and above-base dependent-vowel (matra)
signs are tagged `POS_AFTER_SUBJOINED`.

All below-base dependent-vowel (matra) signs are tagged
`POS_BELOWBASE_CONSONANT`. 

For simplicity, shaping engines may choose to tag matras
in an earlier text-processing step, using the information in the
_Mark-placement subclass_ column of the character tables. It is
critical at this step, however, that all matras correctly tagged
before proceeding to the next step. 

#### Stage 2, step 3: Anusvara ####

Third, any `ANUSVARA` marks appearing immediately after a below-base
vowel sign must be tagged with `POS_BEFORE_SUBJOINED`, so that the
marks are reordered to a position immediately before the below-base
vowel signs.


#### Stage 2, step 4: Pre-base-reordering consonants ####

Fourth, all pre-base-reordering consonants must be tagged with
`POS_PREBASE_CONSONANT`. 

Myanmar has one pre-base-reordering consonant: <samp>"Medial Ra"</samp>.

:::{figure-md}
![Pre-base-reordering Medial Ra](images/myanmar/myanmar-medial-ra.svg "Pre-base-reordering Medial Ra"){.shaping-demo .inline-svg .greyscale-svg #myanmar-medial-ra}

Pre-base-reordering Medial Ra
:::

```{svg-color-toggle-button} myanmar-medial-ra
```


#### Stage 2, step 5: Kinzi ####

Fifth, initial <samp>"Kinzi"</samp>-triggering sequences that will become <samp>"Kinzi"</samp>s
must be tagged with `POS_AFTER_MAIN`.

The sequences are:

  - <samp>"Ra,Asat,Halant"</samp>
  - <samp>"Nga,Asat,Halant"</samp>
  - <samp>"Mon Nga,Asat,Halant"</samp>

In the Myanmar (or Burmese) language, <samp>"Nga"</samp> is the only <samp>"Kinzi"</samp>-forming
consonant. <samp>"Mon Nga"</samp> can form a <samp>"Kinzi"</samp> in the Mon language, and <samp>"Ra"</samp>
can form a <samp>"Kinzi"</samp> in Sanskrit written with the Myanmar script.


#### Stage 2, step 6: Post-base consonants ####

Sixth, any remaining non-base consonants that occur after the base
consonant must be tagged with `POS_AFTER_MAIN`. Full consonants (of
class `CONSONANT`) will be preceded by a <samp>"Halant"</samp> glyph. Medial
consonants (of class `CONSONANT_MEDIAL`) will not be preceded by a
<samp>"Halant"</samp> glyph. 

> Note: <samp>"Medial Ra"</samp> should have been tagged with
> `POS_PREBASE_CONSONANT` in stage 2, step four, and must not be
> re-tagged in this step.


#### Stage 2, step 7: Mark tagging ####

<!--- not sure this is done!!! --->

Seventh, all marks must be tagged with the same positioning tag as the
closest non-mark character the mark has affinity with, so that they move together
during the sorting step.

For all marks preceding the base consonant, the mark must be tagged
with the same positioning tag as the closest preceding non-mark
consonant.

For all marks occurring after the base consonant, the mark must be
tagged with the same positioning tag as the closest subsequent consonant.

> Note: In this step, joiner and non-joiner characters must also be
> tagged according to the same rules given for marks, even though
> these characters are not categorized as marks in Unicode.


With these steps completed, the syllable can be sorted into the final sort order.


### Stage 3: Applying the basic substitution features from <abbr>GSUB</abbr> ###

The basic-substitution stage applies mandatory substitution features
using the rules in the font's <abbr title="Glyph Substitution table">GSUB</abbr> table. In preparation for this
stage, glyph sequences should be tagged for possible application 
of <abbr title="Glyph Substitution table">GSUB</abbr> features.

The order in which these substitutions must be performed is fixed:

	locl
	ccmp
	rphf 
	pref 
	blwf 
	pstf


#### Stage 3, step 1: locl ####

The `locl` feature replaces default glyphs with any language-specific
variants, based on examining the language setting of the text run.

> Note: Strictly speaking, the use of localized-form substitutions is
> not part of the shaping process, but of the localization process,
> and could take place at an earlier point while handling the text
> run. However, shaping engines are expected to complete the
> application of the `locl` feature before applying the subsequent
> <abbr title="Glyph Substitution table">GSUB</abbr> substitutions in the following steps.

:::{figure-md}
![Local-forms substitution](images/myanmar/myanmar-locl.svg "Local-forms substitution"){.shaping-demo .inline-svg .greyscale-svg #myanmar-locl}

Local-forms substitution
:::

```{svg-color-toggle-button} myanmar-locl
```


#### Stage 3, step 2: ccmp ####

The `ccmp` feature allows a font to substitute mark-and-base sequences
with a pre-composed glyph including the mark and the base, or to
substitute a single glyph into an equivalent decomposed sequence of glyphs. 
 
If present, these composition and decomposition substitutions must be
performed before applying any other <abbr title="Glyph Substitution table">GSUB</abbr> lookups, because
those lookups may be written to match only the `ccmp`-substituted
glyphs. 

> Note: `ccmp` usage is uncommon in Myanmar fonts. Nevertheless,
> shaping engines must apply any `ccmp` substitutions if they are
> present in the active font.


#### Stage 3, step 3: rphf ####

The `rphf` feature replaces initial <samp>"Kinzi"</samp>-triggering sequences with
the <samp>"Kinzi"</samp> glyph. The sequences are:

  - <samp>"Ra,Asat,Halant"</samp>
  - <samp>"Nga,Asat,Halant"</samp>
  - <samp>"Mon Nga,Asat,Halant"</samp>

In the Myanmar (or Burmese) language, <samp>"Nga"</samp> is the only <samp>"Kinzi"</samp>-forming
consonant. <samp>"Mon Nga"</samp> can form a <samp>"Kinzi"</samp> in the Mon language, and <samp>"Ra"</samp>
can form a <samp>"Kinzi"</samp> in Sanskrit written with the Myanmar script.

:::{figure-md}
![Kinzi composition](/images/myanmar/myanmar-kinzi-nga-1.svg "Kinzi composition"){.shaping-demo .inline-svg .greyscale-svg #myanmar-kinzi-nga-1}

Kinzi composition
:::

```{svg-color-toggle-button} myanmar-kinzi-nga-1
```


#### Stage 3, step 4: pref ####

The `pref` feature replaces pre-base-consonant glyphs with
any special forms. In Myanmar, this can include variant forms for
<samp>"Medial Ra"</samp> or for the left-side matras <samp>"Sign E"</samp> (`U+1031`) or <samp>"Shan
Sign E"</samp> (`U+1084`)

:::{figure-md}
![pref feature application](/images/myanmar/myanmar-pref.svg "pref feature application"){.shaping-demo .inline-svg .greyscale-svg #myanmar-pref}

pref feature application
:::

```{svg-color-toggle-button} myanmar-pref
```


#### Stage 3, step 5: blwf ####

The `blwf` feature replaces below-base-consonant glyphs with any
special forms. In Myanmar, this usually means replacing
post-base-consonant <samp>"Halant,_Consonant_"</samp> sequences with subjoined
forms of the consonant. 

However, Myanmar includes several other below-base-consonant
forms, including medial consonants and below-base dependent vowel
(matra) signs.

The below-base forms feature is applied only to glyphs occurring after
the base consonant. 

:::{figure-md}
![blwf feature application](/images/myanmar/myanmar-blwf.svg "blwf feature application"){.shaping-demo .inline-svg .greyscale-svg #myanmar-blwf}

blwf feature application
:::

```{svg-color-toggle-button} myanmar-blwf
```


#### Stage 3, step 6: pstf ####

The `pstf` feature replaces post-base-consonant glyphs with any
special forms. 

> Note: `pstf` usage is uncommon in Myanmar fonts, because the script
> does not employ special post-base forms of consonants. Nevertheless,
> shaping engines should apply any `pstf` substitutions if they are
> present in the active font.


### Stage 4: Applying all remaining substitution features from <abbr>GSUB</abbr> ###

In this stage, the remaining substitution features from the <abbr title="Glyph Substitution table">GSUB</abbr> table
are applied. The order in which these features are applied is not
canonical; they should be applied in the order in which they appear in
the <abbr title="Glyph Substitution table">GSUB</abbr> table in the font. 

	pres
	abvs
	blws
	psts
	liga


The `pres` feature replaces pre-base-consonant glyphs with special
presentations forms. In Myanmar, this can include stylistic variants
of left-side dependent vowels (matras) or of <samp>"Medial Ra"</samp>. 

:::{figure-md}
![Application of the pres feature](/images/myanmar/myanmar-pres.svg "Application of the pres feature"){.shaping-demo .inline-svg .greyscale-svg #myanmar-pres}

Application of the pres feature
:::

```{svg-color-toggle-button} myanmar-pres
```


The `abvs` feature replaces above-base-consonant glyphs with special
presentation forms. This usually includes contextual variants of
above-base marks or contextually appropriate mark-and-base ligatures.

:::{figure-md}
![Application of the abvs feature](/images/myanmar/myanmar-abvs.svg "Application of the abvs feature"){.shaping-demo .inline-svg .greyscale-svg #myanmar-abvs}

Application of the abvs feature
:::

```{svg-color-toggle-button} myanmar-abvs
```


The `blws` feature replaces below-base-consonant glyphs with special
presentation forms. In Myanmar, this can include contextual ligatures
involving below-base dependent vowel marks (matras), medial
consonants, or subjoined consonants.

:::{figure-md}
![Application of the blws feature](/images/myanmar/myanmar-blws.svg "Application of the blws feature"){.shaping-demo .inline-svg .greyscale-svg #myanmar-blws}

Application of the blws feature
:::

```{svg-color-toggle-button} myanmar-blws
```


The `psts` feature replaces post-base-consonant glyphs with special
presentation forms. This usually includes replacing right-side
dependent vowels (matras) with stylistic variants.


:::{figure-md}
![Application of the psts feature](/images/myanmar/myanmar-psts.svg "Application of the psts feature"){.shaping-demo .inline-svg .greyscale-svg #myanmar-psts}

Application of the psts feature
:::

```{svg-color-toggle-button} myanmar-psts
```


The `liga` feature substitutes standard, optional ligatures that are on
by default. Substitutions made by `liga` may be disabled by
application-level user interfaces.

:::{figure-md}
![Application of the liga feature](/images/myanmar/myanmar-liga.svg "Application of the liga feature"){.shaping-demo .inline-svg .greyscale-svg #myanmar-liga}

Application of the liga feature
:::

```{svg-color-toggle-button} myanmar-liga
```


### Stage 5: Applying remaining positioning features from <abbr>GPOS</abbr> ###

In this stage, mark positioning, kerning, and other <abbr title="Glyph Positioning table">GPOS</abbr> features are
applied. As with the preceding stage, the order in which these
features are applied is not canonical; they should be applied in the
order in which they appear in the <abbr title="Glyph Positioning table">GPOS</abbr> table in the font.

	dist
	abvm
	blwm
	mark
	mkmk

> Note: The `kern` feature is usually applied at this stage, if it is
> present in the font. However, `kern` is not mandatory for shaping
> Myanmar text and may be disabled by user preference.

The `dist` feature adjusts the horizontal positioning of
glyphs. Unlike `kern`, adjustments made with `dist` do not require the
application or the user to enable any software _kerning_ features, if
such features are optional. 

In Myanmar text, `dist` is typically used to adjust the space around a
pre-base-reordering <samp>"Medial Ra"</samp>, because the <samp>"Medial Ra"</samp> codepoint is
classified as being of zero width, but is orthographically a glyph
that encloses the adjacent letter.

:::{figure-md}
![Application of the dist feature](/images/myanmar/myanmar-dist.svg "Application of the dist feature"){.shaping-demo .inline-svg .greyscale-svg #myanmar-dist}

Application of the dist feature
:::

```{svg-color-toggle-button} myanmar-dist
```


The `abvm` feature positions above-base glyphs for attachment to base
characters. In Myanmar, this includes <samp>"Kinzi"</samp> and <samp>"Asat"</samp> in addition
to tone markers, diacritical marks, above-base dependent vowels
(matras), and Vedic signs.

:::{figure-md}
![Application of the abvm feature](/images/myanmar/myanmar-abvm.svg "Application of the abvm feature"){.shaping-demo .inline-svg .greyscale-svg #myanmar-abvm}

Application of the abvm feature
:::

```{svg-color-toggle-button} myanmar-abvm
```


The `blwm` feature positions below-base glyphs for attachment to base
characters. In Myanmar, this includes subjoined consonants as well as
below-base dependent vowels (matras), medial consonants, tone markers,
diacritical marks, and Vedic signs.

:::{figure-md}
![Application of the blwm feature](/images/myanmar/myanmar-blwm.svg "Application of the blwm feature"){.shaping-demo .inline-svg .greyscale-svg #myanmar-blwm}

Application of the blwm feature
:::

```{svg-color-toggle-button} myanmar-blwm
```


The `mark` feature positions marks with respect to base glyphs.

:::{figure-md}
![Application of the mark feature](/images/myanmar/myanmar-mark.svg "Application of the mark feature"){.shaping-demo .inline-svg .greyscale-svg #myanmar-mark}

Application of the mark feature
:::

```{svg-color-toggle-button} myanmar-mark
```
 

The `mkmk` feature positions marks with respect to preceding marks,
providing proper positioning for sequences of marks that attach to the
same base glyph.

:::{figure-md}
![Application of the mkmk feature](/images/myanmar/myanmar-mkmk.svg "Application of the mkmk feature"){.shaping-demo .inline-svg .greyscale-svg #myanmar-mkmk}

Application of the mkmk feature
:::

```{svg-color-toggle-button} myanmar-mkmk
```


## The `<mymr>` shaping model ##

The older Myanmar script tag, `<mymr>`, has been deprecated. However,
shaping engines may still encounter fonts that were built to work with
`<mymr>` and some users may still have documents that were written to
take advantage of `<mymr>` shaping.

Sparse information is available about how the Microsoft Uniscribe
shaping engine treated `<mymr>` text runs. Documentation from the
HarfBuzz shaping engine suggests that the Uniscribe `<mymr>` shaper
did not perform a significant amount of reordering or application of
Indic-like <abbr title="Glyph Substitution table">GSUB</abbr> features.


================================================
FILE: opentype-shaping-nko.md
================================================
```{include} /_global.md
```

# N'Ko script shaping in OpenType #

This document details the general shaping procedure shared by all
N'Ko script styles, and defines the common pieces that style-specific
implementations share. 


**Contents**

  - [General information](#general-information)
  - [Terminology](#terminology)
  - [Glyph classification](#glyph-classification)
      - [Joining properties](#joining-properties)
	  - [Mark classification](#mark-classification)
	  - [Character tables](#character-tables)
  - [The `<nko >` shaping model](#the-nko-shaping-model)
      - [Stage 1: Transient reordering of modifier combining marks](#stage-1-transient-reordering-of-modifier-combining-marks)
      - [Stage 2: Compound character composition and decomposition](#stage-2-compound-character-composition-and-decomposition)
      - [Stage 3: Computing letter joining states](#stage-3-computing-letter-joining-states)
      - [Stage 4: Applying the `stch` feature](#stage-4-applying-the-stch-feature)
      - [Stage 5: Applying the language-form substitution features from <abbr>GSUB</abbr>](#stage-5-applying-the-language-form-substitution-features-from-gsub)
      - [Stage 6: Applying the typographic-form substitution features from <abbr>GSUB</abbr>](#stage-6-applying-the-typographic-form-substitution-features-from-gsub)
      - [Stage 7: Applying the positioning features from <abbr>GPOS</abbr>](#stage-7-applying-the-positioning-features-from-gpos)
  

## General information ##

The N'Ko script is used to write multiple languages in the Manding
language family, most commonly Maninka, Dyula, and Bambara. 

The N'Ko script uses features and rules derived from those of the
Arabic script, and OpenType defines N'Ko shaping features with a
subset of the features used in [Arabic](opentype-shaping-arabic.md) shaping.
Consequently, a shaping engine can support N'Ko and Arabic with a
[single shaping model](opentype-shaping-arabic-general.md).

N'Ko is a joining script that uses inter-word spaces, so each
codepoint in a text run may be substituted with one of several
contextual forms corresponding to what, if any, characters appear
before and after the codepoint. Most, but not all, letter sequences
join; shaping engines must track which positions trigger joining
behavior for each letter. 

N'Ko is written (and, therefore, rendered) from right to
left. Shaping engines must track the directionality of the text run
when scripts of different direction are mixed.

The N'Ko script tag defined in OpenType is `<nko >`. Because OpenType
script tags must be exactly four letters long, the `<nko >` tag
includes a trailing space. 


## Terminology ##

OpenType shaping uses a standard set of terms for elements of the
N'Ko script. The terms used colloquially in any particular language
may vary, however, potentially causing confusion.

**Base** glyph or character is the standard term for a N'Ko
character that is capable of taking a diacritical mark. 

The base characters in N'Ko include both consonants and vowels.

**Kashida** (or **tatweel**) is the term for a glyph inserted into a
sequence for the purpose of elongating the baseline stroke of a
letter. Unicode documents use the term "tatweel" most frequently,
while OpenType documents use the term "kashida" most
frequently. Kashidas are typically inserted in order to justify lines
of text. 

In N'Ko, the kashida character is known as _lajanyalan_.


## Glyph classification ##

Because N'Ko is a joining (or cursive) script, proper shaping of
text runs involves identifying the joining behavior of each character,
then combining that information with any preceding or subsequent
characters to determine the contextually correct form for display.

### Joining properties ###

N'Ko characters are assigned a `JOINING_TYPE` property in the
Unicode standard that indicates how they join to adjacent
characters. There are six possible values: 

  - `JOINING_TYPE_LEFT` indicates that a character joins with
    the subsequent character, but does not join with the preceding
    character. 
	
  - `JOINING_TYPE_RIGHT` indicates that a character joins with the
    preceding character, but does not join with the subsequent character.	

  - `JOINING_TYPE_DUAL` indicates that a character joins with the
    preceding character and joins with the subsequent character.
	
  - `JOINING_TYPE_NON_JOINING` indicates that a character does not
    join with the preceding or with the subsequent character.
	
  - `JOINING_TYPE_TRANSPARENT` indicates that the character does not
    join with adjacent characters _and_ that the character must be
    skipped over when the shaping engine is evaluating the joining
    positions in a sequence of characters. When a
    `JOINING_TYPE_TRANSPARENT` character is encountered in a sequence,
    the `JOINING_TYPE` of the preceding character passes
    through. Diacritical marks are frequently assigned this value. 
	
  - `JOINING_TYPE_JOIN_CAUSING` indicates that the character forces
    the use of joining forms with the preceding and subsequent
    characters. Kashidas and the Zero Width Joiner (`U+200D`) are both
    `JOIN_CAUSING` characters.
  

In other scripts that use the general Arabic shaping model, letters
are also assigned to a `JOINING_GROUP` that indicates which
fundamental character they behave like with regard to joining
behavior.

Joining groups are not necessary in `<nko >` text shaping, so every
codepoint is assigned to the _null_ `JOINING_GROUP`.

### Mark classification ###

The Unicode standard defines a _canonical combining class_ for each
codepoint that is used whenever a sequence needs to be sorted into
canonical order. 

N'Ko marks all belong to standard combining classes:

:::{table} Mark-classification table

| Codepoint | Combining class | Glyph                              |
|:----------|:----------------|:-----------------------------------|
|           | 220             | Other below-base combining marks   |
|           | 230             | Other above-base combining marks   |
:::


The numeric values of these combining classes are used during Unicode
normalization.


These classifications are used in the [mark-transient-reordering
stage](#stage-1-transient-reordering-of-modifier-combining-marks).

			
### Character tables ###

Separate character tables are provided for the NKo block and for other miscellaneous
characters that are used in `<nko >` text runs:

  - [NKo character table](character-tables/character-tables-nko.md#nko-character-table)
  - [Miscellaneous character table](character-tables/character-tables-nko.md#miscellaneous-character-table)


The tables list each codepoint along with its Unicode general
category and its joining type. For letters, the table lists the
codepoint's joining group. For diacritical marks, the table lists the
codepoint's mark combining class. The codepoint's Unicode name and an example
glyph are also provided.

For example:

:::{table} Example character table

| Codepoint | Unicode category | Joining type | Joining group | Mark class | Glyph                        |
|:----------|:-----------------|:-------------|:--------------|:-----------|:-----------------------------|
|`U+07D3`   | Letter           | DUAL         | _null_        | _0_        | &#x07D3; Ba                  |
| | | | | |
|`U+07EB`   | Mark [Mn]        | TRANSPARENT  | _null_        | 230        | &#x07EB; Combining Short High Tone|
:::


Codepoints with no assigned meaning are
designated as _unassigned_ in the _Unicode category_ column. 


#### Special-function codepoints ####

Other important characters that may be encountered when shaping runs
of N'Ko text include the dotted-circle placeholder (`U+25CC`), the
combining grapheme joiner (`U+034F`), the zero-width joiner (`U+200D`)
and zero-width non-joiner (`U+200C`), the left-to-right text marker
(`U+200E`) and right-to-left text marker (`U+200F`), and the no-break
space (`U+00A0`).

Each of these is of particular importance to shaping engines, because
these codepoints interact with the shaping engine, the text run, and
the active font, either to mediate non-default shaping behavior or to
relay information about the current shaping process.

The dotted-circle placeholder is frequently used when displaying a
combining mark in isolation. Real-world text documents may also use
other characters, such as hyphens or dashes, in a similar placeholder
fashion; shaping engines should cope with this situation gracefully.

Dotted-circle placeholder characters (like any Unicode codepoint) can
appear anywhere in text input sequences and should be rendered
normally. <abbr title="Glyph Positioning table">GPOS</abbr> positioning lookups should attach mark glyphs to dotted
circles as they would to other non-mark characters. As visible glyphs,
dotted circles can also be involved in <abbr title="Glyph Substitution table">GSUB</abbr> substitutions.

In addition to the default input-text handling process, shaping
engines may also insert dotted-circle placeholders into the text
sequence. Dotted-circle insertions are required when a non-spacing
mark or dependent sign is formed with no base character present.

This requirement covers:

  - Dependent signs that are assigned their own individual Unicode
    codepoints (such as most dependent-vowel marks or matras)
  
  - Dependent signs that are formed only by specific sequences of
    other codepoints (which is not common in N'Ko but can occur in
    other scripts)


The combining grapheme joiner (<abbr>CGJ</abbr>) is primarily used to alter the
order in which adjacent marks are positioned during the
mark-reordering stage, in order to adhere to the needs of a
non-default language orthography.

By default, OpenType shaping reorders sequences of adjacent marks by
sorting the sequence on the marks' Canonical_Combining_Class (<abbr>Ccc</abbr>)
values. The presence of a <abbr title="Combining Grapheme Joiner">CGJ</abbr> character within a sequence of marks has
the effect of splitting the sequence into two sequences of marks and,
therefore, halting any mark-reordering that would have occurred
between the marks on either side of the <abbr title="Combining Grapheme Joiner">CGJ</abbr>.

The zero-width joiner (<abbr>ZWJ</abbr>) is primarily used to force the usage of the
cursive connecting form of a letter even when the context of the
adjoining letters would not trigger the connecting form. 

For example, to show the initial form of a letter in isolation (such
as for displaying it in a table of forms), the sequence <samp>"_Letter_,ZWJ"</samp>
would be used. To show the medial form of a letter in isolation, the
sequence <samp>"ZWJ,_Letter_,ZWJ"</samp> would be used.

The zero-width non-joiner (<abbr>ZWNJ</abbr>) is primarily used to prevent a
cursive connection between two adjacent characters that would, under
normal circumstances, form a join. 

The <abbr title="Zero-Width Joiner">ZWJ</abbr> and <abbr title="Zero-Width Non Joiner">ZWNJ</abbr> characters are, by definition, non-printing control
characters and have the _Default_Ignorable_ property in the Unicode
Character Database. In standard text-display scenarios, their function
is to signal a request from the user to the shaping engine for some
particular non-default behavior. As such, they are not rendered
visually.

> Note: Naturally, there are special circumstances where a user or
> document might need to request that a <abbr title="Zero-Width Joiner">ZWJ</abbr> or <abbr title="Zero-Width Non Joiner">ZWNJ</abbr> be rendered
> visually, such as when illustrating the OpenType shaping process, or
> displaying Unicode tables.

Because the <abbr title="Zero-Width Joiner">ZWJ</abbr> and <abbr title="Zero-Width Non Joiner">ZWNJ</abbr> are non-printing control characters, they can
be ignored by any portion of a software text-handling stack not
involved in the shaping operations that the <abbr title="Zero-Width Joiner">ZWJ</abbr> and <abbr title="Zero-Width Non Joiner">ZWNJ</abbr> are designed
to interface with. For example, spell-checking or collation functions
will typically ignore <abbr title="Zero-Width Joiner">ZWJ</abbr> and <abbr title="Zero-Width Non Joiner">ZWNJ</abbr>.

Similarly, the <abbr title="Zero-Width Joiner">ZWJ</abbr> and <abbr title="Zero-Width Non Joiner">ZWNJ</abbr> should be ignored by the shaping engine
when matching sequences of codepoints against the backtrack and
lookahead sequences of a font's <abbr title="Glyph Substitution table">GSUB</abbr> or <abbr title="Glyph Positioning table">GPOS</abbr> lookups.


The right-to-left mark (<abbr>RLM</abbr>) and left-to-right mark (<abbr>LRM</abbr>) are used by
the Unicode bidirectionality algorithm (BiDi) to indicate the points
in a text run at which the writing direction changes. Generally
speaking <abbr title="Right-to-Left Mark">RLM</abbr> and <abbr title="Left-to-Right Mark">LRM</abbr> codepoints do not interact with shaping.

The no-break space is primarily used to display those codepoints that
are defined as non-spacing (such as vowel or diacritical marks and "Hamza") in an
isolated context, as an alternative to displaying them superimposed on
the dotted-circle placeholder.


## The `<nko >` shaping model ##

Processing a run of `<nko >` text involves seven top-level stages:

1. Transient reordering of modifier combining marks
2. Compound character composition and decomposition
3. Computing letter joining states
4. Applying the `stch` feature
5. Applying the language-form substitution features from <abbr>GSUB</abbr>
6. Applying the typographic-form substitution features from <abbr>GSUB</abbr>
7. Applying the positioning features from <abbr>GPOS</abbr>


### Stage 1: Transient reordering of modifier combining marks ###

<!--- http://www.unicode.org/reports/tr53/tr53-1.pdf --->
> Note: because N'Ko does not feature the "Shadda" mark or any
> marks that belong to _Modifier Combining Marks_ (<abbr>MCM</abbr>) classes, this
> stage should not involve any additional work when processing
> `<nko >` text runs. It is included here to maintain consistency with
> other scripts that utilize the general Arabic-based shaping model.

Sequences of adjacent marks must be reordered so that they appear in
the appropriate visual order before the mark-to-base and mark-to-mark
positioning features from <abbr title="Glyph Positioning table">GPOS</abbr> can be correctly applied.

In particular, those marks that have strong affinity to the base
character must be placed closest to the base.

This mark-reordering operation is distinct from the standard,
cross-script mark-reordering performed during Unicode
normalization. The standard Unicode mark-reordering algorithm is based
on comparing the _Canonical_Combining_Class_ (<abbr>Ccc</abbr>) properties of mark
codepoints, whereas this script-specific reordering utilizes the
_Modifier_Combining_Mark_ (<abbr>MCM</abbr>) subclasses specified in the
character tables.

The algorithm for reordering a sequence of marks is:

  - First, move any <samp>"Shadda"</samp> (combining class `33`) characters to the
    beginning of the mark sequence.
	
  -	Second, move any subsequence of combining-class-`230` characters that begins
       with a `230_MCM` character to the beginning of the sequence,
       before all <samp>"Shadda"</samp> characters. The subsequence must be moved
       as a group.

  - Finally, move any subsequence of combining-class-`220` characters that begins
       with a `220_MCM` character to the beginning of the sequence,
       before all <samp>"Shadda"</samp> characters and before all class-`230`
       characters. The subsequence must be moved as a group.

> Note: Unicode describes this mark-reordering operation, the Arabic
> Mark Transient Reordering Algorithm (<abbr>AMTRA</abbr>), in Technical Report 53,
> which describes it in terms that are distinct from standard,
> <abbr>Ccc</abbr>-based mark reordering.
>
> Specifically, <abbr title="Arabic Mark Transient Reordering Algorithm">AMTRA</abbr> is designated as an operation performed during
> text rendering only, which therefore does not impact other
> Unicode-compliance issues such as allowable input sequences or text
> encoding.
>
> However, shaping engines may choose to perform the reordering of
> modifier combining marks in conjunction with their Unicode
> normalization functionality for increased efficiency.


### Stage 2: Compound character composition and decomposition ###

The `ccmp` feature allows a font to substitute

 - mark-and-base sequences with a pre-composed glyph including both
    the mark and the base (as is done in with a ligature substitution)
	
  - individual compound glyphs with the equivalent sequence of
    decomposed glyphs (such as decomposing a letter with inherent
    marks into a separate fundamental-letter glyph followed by an
    marks-only glyph, to permit more precise positioning) 
 
If present, these composition and decomposition substitutions must be
performed before applying any other <abbr title="Glyph Substitution table">GSUB</abbr> or <abbr title="Glyph Positioning table">GPOS</abbr> lookups, because
those lookups may be written to match only the `ccmp`-substituted
glyphs. 


### Stage 3: Computing letter joining states ###

In order to correctly apply the initial, medial, and final form
substitutions from <abbr title="Glyph Substitution table">GSUB</abbr> during stage 6, the shaping engine must
tag every letter for possible application of the appropriate feature.

> Note: The following algorithm includes rules for processing `<syrc>`
> text in addition to `<nko >` text. Implementers concerned only with
> shaping `<nko >` text can omit the portions for `<syrc>`-specific
> rules. 

To determine which feature is appropriate, the shaping engine must
examine each word in turn and compute each letter's joining state from
the letter's `JOINING_TYPE` and the `JOINING_TYPE` of the
preceding character (if any).

> Note: Although N'Ko uses inter-word spaces, the `init` feature
> does _not_ refer to word-initial letters only and the `fina` feature
> does _not_ refer to word-final letters only.
>
> Rather, both of these terms are defined with respect to whether or
> not the preceding and subsequent letters form joins with the current
> letter. The letters at word boundaries will, naturally, take on
> initial and final forms, but initial and final forms of letters also
> occur regularly within words, when the letter in question is
> adjacent to a letter than does not form joins.

This computation starts from the first letter of the word, temporarily
tagging the letter for `isol` substitution. If the first
letter is the only letter in the word, the `isol` tag will remain unchanged.

From here, the algorithm consumes each character in the string, one at
a time, keeping track of the JOINING_TYPE of the previous character. 

If the current character is JOINING_TYPE_TRANSPARENT, move on to the next
character but preserve the currently-tracked JOINING_TYPE at its previous state.

If the preceding character's JOINING_TYPE is LEFT, DUAL, or
JOIN_CAUSING:
  - In `<syrc>` text, if the current character is <samp>"Alaph"</samp>, tag the
    current character for `med2`, then update the tag for the
    preceding character:
	  - `isol` becomes `init`
	  - `fina` becomes `medi`
	  - `init` remains `init`
	  - `medi` remains `medi`
  - If the current character's JOINING_TYPE is RIGHT, DUAL, or
    JOIN_CAUSING, tag the current character for `fina`, then update
    the tag for the preceding character:
	  - `isol` becomes `init`
	  - `fina` becomes `medi`
	  - `init` remains `init`
	  - `medi` remains `medi`

Otherwise, tag the current character for `isol`.

After testing the final character of the word, if the text is in `<syrc>` and
if the last character that is not JOINING_TYPE_TRANSPARENT or
JOINING_TYPE_NON_JOINING is <samp>"Alaph"</samp>, perform an additional test:
  - If the preceding character is JOINING_TYPE_LEFT, tag the current character
    for `fina`
  - If the preceding character's JOINING_GROUP is DALATH_RISH, tag the current
    character for `fin3`
  - Otherwise, tag the current character for `fin2`


Once the last character of the word has been processed, proceed to the
next word and repeat the algorithm, starting at the beginning of the
next word.

> Note: Because the processing of the characters in the algorithm
> described above is deterministic, shaping engines may choose to
> implement the joining-state computation as a state machine, in a lookup
> table, or by any other means desirable.


At the end of this process, all letters should be tagged for possible
substitution by one of the `isol`, `init`, `medi`, `med2`, `fina`, `fin2`, or
`fin3` features.

### Stage 4: Applying the `stch` feature ###

The `stch` feature decomposes and stretches special marks that are
meant to extend to the full width of words to which they are
attached. It was defined for use in `<syrc>` text runs for the "Syriac
Abbreviation Mark" (`U+070F`) but it can be used with similar marks in
other scripts.

> Note: N'Ko does not feature marks that require the `stch` feature;
> it is described here to maintain compatibility with other scripts
> that use the general Arabic shaping model.

To apply the `stch` feature, the shaping engine should first decompose the
`U+070F` glyph into components, which results in a beginning point,
midpoint, and endpoint glyphs plus one (or more) extension glyphs: at
least one extension between the beginning and midpoint glyphs and at
least one extension between the midpoint and endpoint glyphs. 

The shaping engine must then calculate the total length of the word to
which the mark applies. That length, minus the advance widths of the
beginning, middle, and endpoint glyphs of the mark, must be divided by
two. 

The result, divided by the advance width of the extension glyph
and rounded up to the next integer, tells the shaping engine how many
copies of the extension glyph must be placed between the midpoint and
each end of the mark.

Following this procedure ensures that the same number of extensions is
used on each side of the mark so that it remains symmetrical.

Finally, the decomposed mark must be reordered as follows: 

  - All of the glyphs in the sequence for the mark, _except_ for
    the final glyph, are repositioned as a group so that they precede
    the word to which the mark is attached.
  - The final glyph in the mark sequence is repositioned to the end of
    the word.
	

### Stage 5: Applying the language-form substitution features from <abbr>GSUB</abbr> ###

The language-substitution phase applies mandatory substitution
features using the rules in the font's <abbr title="Glyph Substitution table">GSUB</abbr> table. In preparation for
this stage, glyph sequences should be tagged for possible application 
of <abbr title="Glyph Substitution table">GSUB</abbr> features.

The order in which these substitutions must be performed is fixed for
all scripts implemented in the N'Ko shaping model:

	locl
	isol
	fina
	fin2 (not used in N'Ko)
	fin3 (not used in N'Ko)
	medi
	med2 (not used in N'Ko)
	init
	rlig (not used in N'Ko)
	rclt (not used in N'Ko)
	calt
	
> Note: `rlig` and `calt` need to be appled to the word as a whole before
> continuing to the next feature.

#### Stage 5, step 1: locl ####

The `locl` feature replaces default glyphs with any language-specific
variants, based on examining the language setting of the text run.

> Note: Strictly speaking, the use of localized-form substitutions is
> not part of the shaping process, but of the localization process,
> and could take place at an earlier point while handling the text
> run. However, shaping engines are expected to complete the
> application of the `locl` feature before applying the subsequent
> <abbr title="Glyph Substitution table">GSUB</abbr> substitutions in the following steps.

<!--- ![Localized form substitution](/images/nko/nko-locl.svg) --->


#### Stage 5, step 2: isol ####

The `isol` feature substitutes the default glyph for a codepoint with
the isolated form of the letter.

> Note: It is common for a font to use the isolated form of a letter
> as the default, in which case the `isol` feature would apply no
> substitutions. However, this is only a convention, and the active
> font may use other forms as the default glyphs for any or all
> codepoints.

<!--- ![Isolated form substitution](/images/nko/nko-isol.svg) --->


#### Stage 5, step 3: fina ####

The `fina` feature substitutes the default glyph for a codepoint with
the terminal (or final) form of the letter.

:::{figure-md}
![Final form substitution](/images/nko/nko-fina.svg "Final form substitution"){.shaping-demo .inline-svg .greyscale-svg #nko-fina}

Final form substitution
:::

```{svg-color-toggle-button} nko-fina
```


#### Stage 5, step 4: fin2 ####

This feature is not used in `<nko >` text.

#### Stage 5, step 5: fin3 ####

This feature is not used in `<nko >` text.

#### Stage 5, step 6: medi ####

The `medi` feature substitutes the default glyph for a codepoint with
the medial form of the letter.

:::{figure-md}
![Medial form substitution](/images/nko/nko-medi.svg "Medial form substitution"){.shaping-demo .inline-svg .greyscale-svg #nko-medi}

Medial form substitution
:::

```{svg-color-toggle-button} nko-medi
```


#### Stage 5, step 7: med2 ####

This feature is not used in `<nko >` text.

#### Stage 5, step 8: init ####

The `init` feature substitutes the default glyph for a codepoint with
the initial form of the letter.

:::{figure-md}
![Initial form substitution](/images/nko/nko-init.svg "Initial form substitution"){.shaping-demo .inline-svg .greyscale-svg #nko-init}

Initial form substitution
:::

```{svg-color-toggle-button} nko-init
```


#### Stage 5, step 9: rlig ####

This feature is not used in `<nko >` text.


#### Stage 5, step 10: rclt ####

This feature is not used in `<nko >` text.


#### Stage 5, step 11: calt ####

The `calt` feature substitutes glyphs with contextual alternate
forms. In general, this involves replacing the default form of a
connecting glyph with an alternate that provides a preferable
connection to an adjacent glyph.

The `calt` feature, in contrast to `rclt` above, performs
substitutions that are not mandatory for orthographic
correctness. However, unlike `rclt`, the substitutions made by `calt`
can be disabled by application-level user interfaces.

<!--- ![Contextual alternate substitution](/images/nko/nko-calt.svg) --->


### Stage 6: Applying the typographic-form substitution features from <abbr>GSUB</abbr> ###

The typographic-substitution phase applies optional substitution
features using the rules in the font's <abbr title="Glyph Substitution table">GSUB</abbr> table.

The order in which these substitutions must be performed is fixed for
all scripts implemented in the N'Ko shaping model:

    liga
	dlig
	cswh (not used in N'Ko)
	mset (not used in N'Ko)
	

#### Stage 6, step 1: liga ####

The `liga` feature substitutes standard, optional ligatures that are on
by default. Substitutions made by `liga` may be disabled by
application-level user interfaces.

<!--- ![Standard ligature substitution](/images/nko/nko-liga.svg) --->


#### Stage 6, step 2: dlig ####

The `dlig` feature substitutes additional optional ligatures that are
off by default. Substitutions made by `dlig` may be disabled by
application-level user interfaces.


#### Stage 6, step 3: cswh ####

This feature is not used in `<nko >` text.


#### Stage 6, step 4: mset ####

This feature is not used in `<nko >` text.


### Stage 7. Applying the positioning features from <abbr>GPOS</abbr> ###

The positioning stage adjusts the positions of mark and base
glyphs.

The order in which these features are applied is fixed for
all scripts implemented in the Arabic shaping model:

    curs (not used in N'Ko)
	kern
	mark
	mkmk

#### 7.1 `curs` ####


This feature is not used in `<nko >` text.


#### 7.2 `kern` ####

The `kern` adjusts glyph spacing between pairs of adjacent glyphs.


#### 7.3 `mark` ####

The `mark` feature positions marks with respect to base glyphs.

:::{figure-md}
![Mark positioning](/images/nko/nko-mark.svg "Mark positioning"){.shaping-demo .inline-svg .greyscale-svg #nko-mark}

Mark positioning
:::

```{svg-color-toggle-button} nko-mark
```


#### 7.4 `mkmk` ####

The `mkmk` feature positions marks with respect to preceding marks,
providing proper positioning for sequences of marks that attach to the
same base glyph.


================================================
FILE: opentype-shaping-normalization.md
================================================
# Normalization in OpenType shaping #

## Unicode normalization ##

Unicode defines algorithms for normalizing a sequence of input
codepoints into either a canonical composed form or a canonical
decomposed form. The purpose of these algorithms and of the defined
normalization forms is to generate equivalent representations of input
sequences regardless of variations in the order of the input sequences.

For example, a base letter with an attached mark might exist in
Unicode as a single codepoint, but an input sequence might consist of
the base letter codepoint followed by the combining mark
codepoint. Unicode normalization can be used to determine that the
<samp>"Letter, Mark"</samp> sequence is equivalent to the single codepoint. This
simplifies sorting, searching, string comparison, and many other common
tasks.

OpenType shaping utilizes Unicode normalization, but OpenType
shaping has a distinctly different goal: to select the best or most
appropriate representation of the input codepoint sequence that is
available in the active font.


### Unicode equivalence and decomposition

Unicode defines two levels of _equivalence_: "canonical equivalence"
and "compatibility equivalence."

Both of these equivalence relationships are stored as
`Decomposition_Mapping` properties for codepoints in the Unicode
Character Database. In a canonical equivalence relationship, a
codepoint will have a `Decomposition_Mapping` that lists either one or
two other codepoints. In a compatibility equivalence relationship, a
codepoint will instead have a `Decomposition_Mapping` that starts with
a formatting tag which is followed by either one or two other
codepoints.

> Note: Decomposition mappings typically map one input codepoint to
> two output codepoints.
> 
> Decomposition mappings that produce one output codepoint are rare
> and are defined in order to handle particular, uncommon encoding
> circumstances. However, because such mappings exist, shaping engines
> should not assume that all decomposition mappings produce exactly
> two output codepoints.

For shaping purposes, canonical equivalence is generally of greatest
concern. Canonical equivalence defines that sequences such as
<samp>"Letter,Mark"</samp> (a standalone base character followed by a
combining-mark character) are to be treated the same as <samp>"Letter-with-mark"</samp> (a
codepoint that includes both the base and the mark).

The canonical `Decomposition_Mapping`s are required for Unicode
normalization and, even outside of the Unicode normalization
algorithm, help shaping engines make the correct matches between
codepoint sequences and glyphs.

Compatibility equivalence is more akin to defining fallback
relationships, such as defining that a superscript numeral has the
same underlying meaning as the full-size numeral. If the active font
has no glyph for the superscript numeral codepoint, any decision as to
whether substituting the full-size numeral glyph, artifically scaling
the full-size numeral glyph, or displaying a `.notdef` glyph is the 
desirable output is more likely to be a question left up to the
application layer or to the end user, rather than to be handled by the
shaping engine.

However, there may be compatibility equivalence relationships of
significant interest to shaping engines or to other components of a
text-rendering stack. For example, the Arabic Presentation Form
codepoints have defined compatibility equivalences that maps each one
to a codepoint in the Arabic block. Therefore, this information can be
used to enable fallback support for shaping older documents that
include Arabic Presentation Form text runs.


### Unicode normalization forms

Unicode defines four "normalization forms," two of which are focused
on canonical equivalence and two of which are focused on compatibility
equivalence.

The canonical equivalence forms are:

  - Normalization Form D = `NFD`
    - All codepoints have gone through full, recursive canonical
      decomposition
  - Normalization Form C = `NFC`
    - All codepoints have gone through full, recursive canonical
      decomposition, followed by full canonical composition

The compatibility equivalence forms are:

  - Normalization Form KD = `NFKD`
    - All codepoints have gone through full, recursive canonical
      decomposition and full, recursive compatibility decomposition
  - Normalization Form KC = `NFKC`
    - All codepoints have gone through full, recursive canonical
      decomposition and full, recursive compatibility decomposition,
      followed by full canonical composition


### Unicode canonical combining classes

The Unicode `Canonical_Combining_Class` (`Ccc`) property holds a
numerical value for every codepoint. It can be used to sort sequences
into canonical order.

Base letters, other non-mark codepoints, and spacing mark codepoints
will have `Ccc` of `0`, meaning that the codepoint is unaffected by
the reordering algorithm.

Combining marks can have `Ccc` values from `1` to `254`. The
reordering algorithm sorts subsequences of adjacent marks into order
of increasing `Ccc` values.


### Unicode normalization algorithm

The general Unicode normalization algorithm is structured to produce
output in the user's preference between the four normalization
forms. So the steps performed vary based on whether the desired output
is to be in form `NFD`, `NFC`, `NFKD`, or `NFKC`.

> Note: The end goal of OpenType shaping normalization is not to
> produce these Unicode-specified normalization forms, but to produce
> the optimal rendered output. That is why a modified normalization
> algorithm, as described in the next section, is used for shaping
> text.

The general Unicode normalization algorithm applies to all text except
Hangul syllables. It involves three stages:

1. Full decomposition:
  - If `NFD` or `NFC` is the desired output, recursively apply
    canonical decomposition mappings
  - If `NFKD` or `NFKC` is the desired output, recursively apply
    canonical decomposition mappings followed by compatibility
    decomposition mappings

2. Canonical reordering:
  - Sort all subsequences that consist of `Ccc` &gt; `0` codepoints
    into order of increasing `Ccc` value

3. Recomposition, if desired:
  - If either `NFD` or `NFKD` is the desired output, stop.
  - If either `NFC` or `NFKC` is the desired output, apply canonical
    recomposition
   
Canonical recomposition segments the text run into chunks that begin
with <samp>"Starter"</samp> codepoints (which have `Ccc` = `0`) and progressively
tests the subsequent codepoints in the chunk, recombining them, in
order, with the starter whenever all of the following is true:
  - there is a canonical `Decomposition_Mapping` for the
    <samp>"Starter,Subsequent_codepoint"</samp> pair
  - the codepoint of the canonical `Decomposition_Mapping` does not
    have the `Composition_Exclusion` or `Full_Composition_Exclusion`
    properties
  - there are no characters of `Ccc` = `0` or of a higher `Ccc` value
    than the starter between the starter and the subsequent codepoint
	
In conceptual terms, the recomposition algorithm applies the reverse
of the decomposition mappings, except that the now-reordered sequence
may enable different pairings to match first.

The additional test conditions enable pairs to potentially match on
several decomposition mappings in a sequence where one base is
followed by several combining marks that attach at different
positions.

For example, in the fully decomposed and reordered sequence
<samp>"Letter,Mark_1,Mark_2"</samp>, if <samp>"Letter,Mark_1"</samp>
is not part of a canonical 
`Decomposition_Mapping` but <samp>"Letter,Mark_2"</samp> is part of a canonical
`Decomposition_Mapping`, then <samp>"Letter,Mark_2"</samp> will recombine into
<samp>"Letter-and-Mark_2"</samp>, followed by <samp>"Mark_1"</samp>.


### Unicode normalization for Hangul syllables

Hangul syllables can be algorithmically composed and decomposed
because of the strict jamo-ordering of the codepoints that make up the
Hangul Syllables block.

Shaping engines can can use these algorithms to compose sequences of
individual jamo codepoints into precomposed-syllable codepoints, or to
compose individual jamo glyphs into a composite syllable when the
active font does not include a precomposed glyph for the required
syllable.

The algorithm used to normalize Hangul syllables is not related to the
Unicode normalization algorithm used for other scripts. The Hangul
algorithm is described in stage 2 of the [Hangul
shaping](opentype-shaping-hangul.md#stage-2-determining-if-the-syllable-can-be-composed-into-a-hangul-syllables-codepoint) document.


## OpenType shaping normalization ##

Normalization for OpenType shaping closely follows the Unicode
normalization model, but it takes place in the context of a known text
run and a specific active font.

As a result, OpenType shaping takes the text context and available
font contents into account, making decisions intended to result in the
best possible output to the shaping process.


### Goals ###

The OpenType shaping normalization algorithm also decomposes and
reorders the codepoints in a text run. But it differs from Unicode
normalization, particularly at the recomposition stage, in order to
offer the following features useful for shaping engines:

1. Different shaping models can request different preferred formats
   (composed or decomposed) as output
2. Individual decomposition and recomposition mappings will not be
   applied if doing so would result in a codepoint for which the
   active font does not provide a glyph
3. Additional decompositions and recompositions not included in
   Unicode are supported, including the decomposition of multi-part
   dependent vowels (matras) in several Indic and Brahmic-derived
   scripts as well as arbitrary decompositions and compositions
   implemented in `ccmp` and `locl` <abbr title="Glyph Substitution table">GSUB</abbr> lookups


### Shaping model preferences ###

Each shaping model supported by an OpenType shaping engine should
request its preferred normalization form: either fully composed or
fully decomposed.

> Note: in both cases, the preferred normalization form should be
> understood as considering only canonical decomposition mappings, not
> compatibility decomposition mappings.

Which form is preferred for the model primarily depends on the details
of the model, such as whether or not generic Unicode recomposition is
known to interfere with mark positioning, reordering, or other shaping
operations.

Complex shaping models, particularly those which may involve
reordering or the positioning of multi-part marks, tend to prefer
decomposed forms. Nevertheless, deciding which form is preferred for
which model is an implementation decision ultimately left up to the
shaping-engine implementor, who can take speed, complexity, and other
trade-offs into account.

The preferred form may also be specific to a language, such as when a
minority language employs different diacritic ordering than the
ordering encoded in Unicode's <abbr>Ccc</abbr> data. In this case, a font
targetting the minority language may be expected to handle
language-specific mark-to-mark positioning in <abbr title="Glyph Positioning table">GPOS</abbr>; as a result, the
shaping engine should allow for the positioning lookups by designating
a preference for decomposed forms.

Although a generic Unicode normalization implementation would target
the forms defined in Unicode (`NFD`, `NFC`, `NFKD`, or `NFKC`),
OpenType shaping preferred forms are not identical to these Unicode
forms and should not be advertized as being functionally equivalent.

Scripts and languages may also benefit from defining other preferred
forms beyond "fully decomposed" and "fully recomposed." For example,
it might be useful to define a preferred form in which all sequences
of marks are recomposed, but base-and-mark sequences are not
recomposed.


### OpenType shaping normalization algorithm ###

Opentype shaping normalization consists of four main stages.

1. Full decomposition
2. Canonical reordering
3. Selective recomposition
4. Applying font-specific normalization features

Distinctions from Unicode normalization at each stage are described
below.


#### Stage 1: Full decomposition ####

In the first stage, full `NFD` decomposition is performed, as in
Unicode normalization, except for a small set of exceptions required
by specific shapers:

  - recursively apply canonical decomposition mappings, except for:
      - Devanagari <samp>"Rra"</samp>
	  - Bengali <samp>"Rra"</samp> and <samp>"Rha"</samp>
	  - Tamil <samp>"Au"</samp>

After this decomposition, a second set of non-canonical and non-Unicode
mappings is applied:

  - Several scripts (including many covered in the Indic2 shaping
    model, as well as several other Brahmic-derived scripts) include
    multi-part dependent vowel (matra) characters that should be
    decomposed into multiple glyphs, so that those glyphs can be
    independently positioned around base letters.
	
	These additional decompositions are listed in the individual
	script-shaping documents.
	
  - Shaping engines implementing fallback support for older encodings
    should remap those older codepoints to their updated values.
    For example, a shaper that supports text using the Arabic
    Presentation Forms block should remap the Arabic Presentation
    Forms codepoints to the corresponding Arabic-block default
    codepoints and <abbr title="Glyph Substitution table">GSUB</abbr> positional features.
	
	These substitutions are defined in a set of Unicode compatibility
    decomposition mappings.

  - Certain punctuation and symbol codepoints should be remapped, such as
    remapping "non-breaking hyphen" codepoints to "hyphen".
  
Some of these additional decompositions and mappings may also be
implemented in and active font's <abbr title="Glyph Substitution table">GSUB</abbr> lookups, but that is not
guaranteed. Consequently, a normalization function must implement them
in order to fulfill the goal of providing stable output.


#### Stage 2: Canonical reordering ####

In the second stage, mark sequences are reordered into canonical
order:

  - Sort all subsequences that consist of `Ccc` &gt; `0` codepoints
    into order of increasing `Ccc` value

Several script-specific shapers require additional reordering to
compensate for limitations in the Unicode <abbr>Ccc</abbr> mark-reordering
model. For example, several Arabic mark sequences are reordered in
[stage 1](opentype-shaping-arabic.md#stage-1-transient-reordering-of-modifier-combining-marks) of the Arabic
shaping model and [stage 1](opentype-shaping-syriac.md#stage-1-transient-reordering-of-modifier-combining-marks)
of the Syriac shaping model.

These are listed briefly in stage 4, step 4, below, but full
discussion of each case can be found in each script's shaping
document.


#### Stage 3: Selective recomposition ####

The recomposition stage is selective and depends on the form requested
by the shaping model in use:

  - If the shaping model prefers composed forms, then proceed with
    recomposition as described in stage 3, step 1

  - If the shaping model prefers decomposed forms, then proceed with
    the recomposition as described in stage 3, step 2
	

##### Stage 3, step 1: Recomposition for composed-form preference #####

If composed forms have been requested, then proceed as in the Unicode
canonical recomposition algorithm: segment the text run into chunks
that begin with <samp>"Starter"</samp> codepoints (which have `Ccc` = `0`) and
progressively tests the subsequent codepoints in the chunk,
recombining them, in order, with the starter whenever all of the
test conditions are met.

The following test conditions must be true:
  - there is a canonical `Decomposition_Mapping` for the
    <samp>"Starter,Subsequent_codepoint"</samp> pair 
  - the codepoint of the canonical `Decomposition_Mapping` does not
    have the `Composition_Exclusion` or `Full_Composition_Exclusion`
    properties
  - there are no characters of `Ccc` = `0` or of a higher `Ccc` value
    than the starter between the starter and the subsequent codepoint
  - the starter and the subsequent codepoint are not both of `Ccc` = `0`
  - the glyph that results from applying the recomposition exists in
    the active font


##### Stage 3, step 2: Recomposition for decomposed-form preference #####

If decomposed forms have been requested, then a simple check is
performed to cope with any decomposed forms that are absent in the
active font.

Segment the text run into chunks that begin with <samp>"Starter"</samp> codepoints
(which have `Ccc` = `0`) and progressively tests the subsequent
codepoints in the chunk. 

- If there is no standalone glyph for the subsequent codepoint, but
  there is a `Decomposition_Mapping` for the
  <samp>"Starter,subsequent_codepoint"</samp> pair and a glyph exists for the
  recomposed codepoint, 
  then recombine the starter and the subsequent codepoint

<!---
HARFBUZZ logic here: https://github.com/harfbuzz/harfbuzz/src/hb-ot-shape-normalize.cc#L425
--->


#### Stage 4: Normalization-related <abbr title="Glyph Substitution table">GSUB</abbr> features and other font-specific considerations ####

After the decomposition, mark-reordering, and selective
recomposition stages, OpenType shaping normalization also takes
certain <abbr title="Glyph Substitution table">GSUB</abbr> lookups and complex-script shaping operations into
consideration.

These additional operations may produce final output that differs
from Unicode `NFD` and `NFC` forms. However, the output from stage
four should be identical for any two canonically-equivalent input
sequences in the same active font and script/language context.

> Note: the features discussed below are applied after the completion
> of the decomposition, mark-reordering, and recomposition
> stages. Furthermore, they are applied before any other <abbr title="Glyph Substitution table">GSUB</abbr> and <abbr title="Glyph Positioning table">GPOS</abbr>
> features.
> 
> As a result, shaping engine implementors may choose to
> defer application of these features to the start of <abbr title="Glyph Substitution table">GSUB</abbr> and <abbr title="Glyph Positioning table">GPOS</abbr>
> processing for the sake of convenience.

The `ccmp` and `locl` features can involve normalization, as described
below. If they are present in the active font and match the text run,
all `ccmp` and `locl` features should be applied, and should be
applied in the order in which they are listed in the <abbr title="Glyph Substitution table">GSUB</abbr> table.


##### Stage 4, step 1: ccmp features #####

The `ccmp` feature is applied to all text runs. `ccmp` lookups are not
meant be to be disabled by end users in application code.

`ccmp` lookups can specify arbitrary decomposition mappings and
composition mappings, via one-to-many or many-to-one <abbr title="Glyph Substitution table">GSUB</abbr>
substitutions.

These lookups should be applied regardless of whether
they correspond to the expected decomposition and recomposition
mappings in Unicode, because `ccmp` is font-specific.

A common usage of `ccmp` is to decompose a single codepoint into two
or more glyphs representing discrete components, so that those
components can be more precisely positioned.

For example, many Arabic letters include ijam: dots that, while they
may visually resemble marks, are instead intrinsic components of the
letter and not diacritics. Because the ijam are not marks, a letter
with ijam does not decompose to separate Unicode codepoints. By
decomposing the letter into discrete base and ijam glyphs in `ccmp`, a
font can implement better contextual positioning of the ijam, and can
do so with considerably less work than including numerous alternate
glyphs.

<!--- comment from the HarfBuzz source code that I am not
      certain of the meeting of:
"When a font has a precomposed character for a sequence but the 'ccmp'
feature in the font is not adequate, use the precomposed character
which typically has better mark positioning."
--->


##### Stage 4, step 2: locl features #####

The `locl` feature is applied to text runs based on matching script
and language tags.

When the tags match, any lookups in `locl` are applied by default
during shaping, and these lookups are not meant be to be disabled by
end users in application code.

`locl` lookups often implement simple one-to-one substitutions to
replace default glyph forms with alternate shapes preferred in the
language/script combination.

However, `locl` lookups may also interact with normalization by
performing decompositions or compositions. These substitutions are
often used to preserve orthographic or linguistic features that are
not fully captured by Unicode normalization forms or <abbr>Ccc</abbr> ordering.

For example, in the Turkish alphabet, "dotted i" and "dotless i" are
two distinct letters. For runs of text in Turkish, a font may
deliberately substitute a generic "i" glyph with "dotted i" or the "i,
dot diacritic" sequence with `locl` lookups in order to ensure that
the dot diacritic is not lost as text is processed.

Or, for example, in a particular script and language pairing, readers
might expect or prefer certain sequences of diacritics to stack in a
different order than the order their Unicode <abbr>Ccc</abbr> values dictate. A
`locl` lookup could be used to implement the preferred reordering in a
many-to-one <abbr title="Glyph Substitution table">GSUB</abbr> substitution.


##### Stage 4, step 3: Variation Selectors #####

Unicode defines _standardized_variation_sequences_ as sequences of two
codepoints where the first codepoint is any base character or mark,
and the second character is a Variation Selector. Mapping a
standardized variation sequence to a glyph is not done via <abbr title="Glyph Substitution table">GSUB</abbr>,
however, but in the `cmap` table of a font.

Unicode normalization does not consider Variation Selector
codepoints.

When performing OpenType shaping normalization, however, if the
<samp>"_letter_,Variation Selector"</samp> is not mapped to a glyph in the active
font, a shaping engine may prefer to drop the Variation Selector
codepoint and render the default form of the character or to replace
the sequence with a `.notdef` glyph. Which option is preferred may be
language- or script-specific.


#### Stage 4, step 4: Interaction with script-specific shaping models ####

Reordering and composition are defined as shaping operations in
several script-specific shaping models. In some cases, a reordering
operation or composition may be designated by a particular <abbr title="Glyph Substitution table">GSUB</abbr> or
<abbr title="Glyph Positioning table">GPOS</abbr> feature tag.

Shaping-engine implementors should take care to note where completing
normalization early in the shaping process may reduce the need for
applying such operations later.

For example, in the Indic2 shaping model, sequences of marks are
reordered in stage 2, step 4. But this reordering is identical to the
Unicode canonical reordering, so a shaping-engine implementation that
normalizes all text runs before starting the Indic2 shaping process
will not need to perform any reordering at that step — assuming that
the Indic2 shaping model is configured to prefer decomposed forms.

Similarly, in stage 3, step 2 of the Indic2 shaping model, the `nukt`
feature composes <samp>"Base,Nukta"</samp> sequences into <samp>"Base-and-Nukta"</samp>
glyphs. A shaping engine that designates the Indic2 shaping model as
preferring composed forms could, therefore, have such <samp>"Base,Nukta"</samp>
sequences recomposed during Unicode normalization. However, such a
recomposition preference would likely cause other problems, such as
the unwanted recomposition of multi-part dependent vowels (matras).

Script-specific shaping models can also involve special exceptions to
the generic composition and reordering process of normalization. For
example:

  - In the Hebrew shaper, stage 2, Hebrew Alphabetic Presentation
    Forms, if available in the active font, are composed.

  - In the Arabic shaping model, stage 1, and in the Syriac shaping
    model, stage 1, certain marks are reordered after normalization
    and after <abbr title="Glyph Substitution table">GSUB</abbr> feature application.

  - In Bengali, <samp>"Ya,Nukta"</samp> is composed into <samp>"Yya"</samp> before <abbr title="Glyph Substitution table">GSUB</abbr> feature
    application, to avoid potential ambiguities during the application
    of later features.


#### Compatibility decompositions ####

As was mentioned in stage 1 of the OpenType shaping normalization
algorithm, the codepoints in the Arabic Presentation Forms blocks
have Unicode compatibility `Decomposition_Mapping`s that a shaping
engine can use to map codepoints from Arabic Presentation Forms to
codepoints in the Arabic block. Each Arabic Presentation Form
`Decomposition_Mapping` is tagged with a positional tag corresponding
to a positional <abbr title="Glyph Substitution table">GSUB</abbr> feature: `<final>`, `<initial>`,`<isolated>`, or
`<medial>`.

This tag information can be used to construct a set of synthetic <abbr title="Glyph Substitution table">GSUB</abbr>
lookups corresponding to `fina`, `init`, `isol`, and `medi`. However,
shaping engines should take care not to offer guarantees about the
expect output, unless explicit support for older files known to be
encoded with Arabic Presentation Forms codepoints is desired.

Similarly, several other compatibility `Decomposition_Mapping` tags
could theoretically be exploited to enable some level of fallback
support for shaping codepoints when the necessary glyphs are missing
in the active font, such as mapping `<fraction>` decompositions to
`frac`, `<super>` decompositions to `sups`,  `<sub>` to `subs` or
`sinf`, or `<compat>` to various generic list-item delimiter
sequences.

All such decompositions, however, should be implemented as
fallbacks and the decision to employ them is best left up to the
application layer or end user's preferences.


================================================
FILE: opentype-shaping-oriya.md
================================================
```{include} /_global.md
```

# Oriya shaping in OpenType #

This document details the shaping procedure needed to display text
runs in the Oriya script.


**Contents**

  - [General information](#general-information)
  - [Terminology](#terminology)
  - [Glyph classification](#glyph-classification)
      - [Shaping classes and subclasses](#shaping-classes-and-subclasses)
      - [Oriya character tables](#oriya-character-tables)
  - [The `<ory2>` shaping model](#the-ory2-shaping-model)
      - [Stage 1: Identifying syllables and other sequences](#stage-1-identifying-syllables-and-other-sequences)
      - [Stage 2: Initial reordering](#stage-2-initial-reordering)
      - [Stage 3: Applying the basic substitution features from <abbr>GSUB</abbr>](#stage-3-applying-the-basic-substitution-features-from-gsub)
      - [Stage 4: Final reordering](#stage-4-final-reordering)
      - [Stage 5: Applying all remaining substitution features from <abbr>GSUB</abbr>](#stage-5-applying-all-remaining-substitution-features-from-gsub)
      - [Stage 6: Applying remaining positioning features from <abbr>GPOS</abbr>](#stage-6-applying-remaining-positioning-features-from-gpos)
  - [The `<orya>` shaping model](#the-orya-shaping-model)
      - [Distinctions from `<ory2>`](#distinctions-from-ory2)
      - [Advice for handling fonts with `<orya>` features only](#advice-for-handling-fonts-with-orya-features-only)
      - [Advice for handling text runs composed in `<orya>` format](#advice-for-handling-text-runs-composed-in-orya-format)


## General information ##

The Oriya or Odia script belongs to the Indic family, and follows
the same general patterns as the other Indic scripts. Oriya is
distinctive in some respects because it includes both some features
common to the North Indic subgroup and some features common to the
South Indic subgroup.

The Oriya script is used to Oriya (or Odia) language. In addition,
Sanskrit may be written in Oriya, so Oriya script runs may include
glyphs from the Vedic Extensions block of Unicode. 

There are two extant Oriya script tags defined in OpenType, `<orya>`
and `<ory2>`. The older script tag, `<orya>`, was deprecated in 2005.
Therefore, new fonts should be engineered to work with the `<ory2>`
shaping model. However, if a font is encountered that supports only
`<orya>`, the shaping engine should deal with it gracefully.

## Terminology ##

OpenType shaping uses a standard set of terms for Indic scripts.  The
terms used colloquially in any particular language may vary, however,
potentially causing confusion.

**Matra** is the standard term for a dependent vowel sign. 

**Halant** and **Virama** are both standard terms for the below-base "vowel-killer"
mark. Unicode documents use the term "virama" most frequently, while
OpenType documents use the term "halant" most frequently. In the Oriya
language, this sign is known as the _halanta_.

**Chandrabindu** (or simply **Bindu**) is the standard term for the diacritical mark
indicating that the preceding vowel should be nasalized. In the Oriya
language, this mark is known as the _candrabindu_.

The term **base consonant** is also critical to Indic shaping. The
base consonant of a syllable is the consonant that carries the
syllable's vowel sound, either the inherent vowel (for an unmarked
base consonant) or a dependent vowel (with the addition of a matra).

Different <abbr title="Glyph Substitution table">GSUB</abbr>
substitutions may apply to a script's **pre-base** and **post-base**
consonants. Some of these substitutions create **above-base** or
**below-base** forms. The **Reph** form of the consonant "Ra" is an
example.

Syllables may also begin with an **independent vowel** instead of a
consonant. In these syllables, the independent vowel is rendered in
full-letter form, not as a matra, and the independent vowel serves as the
syllable base, similar to a base consonant.

Where possible, using the standard terminology is preferred, as the
use of a language-specific term necessitates choosing one language
over all of the others that share a common script.

## Glyph classification ##

Shaping Oriya text depends on the shaping engine correctly
classifying each glyph in the run. As with most other scripts, the
classifications must distinguish between consonants, vowels
(independent and dependent), numerals, punctuation, and various types
of diacritical mark.

For most codepoints, the `General Category` property defined in the Unicode
standard is correct, but it is not sufficient to fully capture the
expected shaping behavior (such as glyph reordering). Therefore,
Oriya glyphs must additionally be classified by how they are treated
when shaping a run of text.

### Shaping classes and subclasses ###

The shaping classes listed in the tables that follow are defined so
that they capture the positioning rules used by Indic scripts. 

For most codepoints, the _Shaping class_ is synonymous with the `Indic
Syllabic Category` defined in Unicode. However, there are some
distinctions, where the defined category does not fully capture the
behavior of the character in the shaping process.

Several of the diacritic and syllable-modifying marks behave according
to their own rules and, thus, have a special class. These include
`BINDU`, `VISARGA`, `AVAGRAHA`, `NUKTA`, and `VIRAMA`. Some
less-common marks behave according to rules that are similar to these
common marks, and are therefore classified with the corresponding
common mark. The Vedic Extensions also include a `CANTILLATION`
class for tone marks.

Letters generally fall into the classes `CONSONANT`,
`VOWEL_INDEPENDENT`, and `VOWEL_DEPENDENT`. These classes help the
shaping engine parse and identify key positions in a syllable. For
example, Unicode categorizes dependent vowels as `Mark [Mn]`, but the
shaping engine must be able to distinguish between dependent vowels
and diacritical marks (which are categorized as `Mark [Mn]`).

Other characters, such as symbols and miscellaneous letters (for
example, letter-like symbols that only occur as standalone entities
and do not occur within syllables), need no special attention from the
shaping engine, so they are not assigned a shaping class.

Numbers are classified as `NUMBER`, even though they evoke no special
behavior from the Indic shaping rules, because there are OpenType features that
might affect how the respective glyphs are drawn, such as `tnum`,
which specifies the usage of tabular-width numerals, and `sups`, which
replaces the default glyphs with superscript variants.

Marks and dependent vowels are further labeled with a mark-placement
subclass, which indicates where the glyph will be placed with respect
to the base character to which it is attached. The actual position of
the glyphs is determined by the lookups found in the font's <abbr title="Glyph Positioning table">GPOS</abbr>
table, however, the shaping rules for Indic scripts require that the
shaping engine be able to identify marks by their general
position. 

For example, left-side dependent vowels (matras), classified
with `LEFT_POSITION`, must frequently be reordered, with the final
position determined by whether or not other letters in the syllable
have formed ligatures or combined into conjunct forms. Therefore, the
`LEFT_POSITION` subclass of the character must be tracked throughout
the shaping process.

There are four basic _mark-placement subclasses_ for dependent vowels
(matras). Each corresponds to the visual position of the matra with
respect to the syllable base to which it is attached:

  - `LEFT_POSITION` matras are positioned to the left of the syllable base.
  - `RIGHT_POSITION` matras are positioned to the right of the syllable base.
  - `TOP_POSITION` matras are positioned above the syllable base.
  - `BOTTOM_POSITION` matras are positioned below syllable base.
  
These positions may also be referred to elsewhere in shaping documents as:

  - _Pre-base_ matras
  - _Post-base_ matras
  - _Above-base_ matras
  - _Below-base_ matras
  
respectively. The `LEFT`, `RIGHT`, `TOP`, and `BOTTOM` designations
corresponds to Unicode's preferred terminology. The _Pre_, _Post_,
_Above_, and _Below_ terminology is used in the official descriptions
of OpenType <abbr title="Glyph Substitution table">GSUB</abbr> and <abbr title="Glyph Positioning table">GPOS</abbr> features. Shaping engines may, internally,
use whichever terminology is preferred.

In addition, dependent-vowel codepoints that are composed of multiple
components will be designated in character tables as having a compound
_mark-placement subclass_, such as `TOP_AND_RIGHT` or `LEFT_AND_RIGHT`. 

However, these multi-part matras are decomposed into separate matra
components during the shaping process. After the decomposition, each
matra component will belong to exactly one of the four basic
_mark-placement subclasses_.

For most mark and dependent-vowel codepoints, the _mark-placement
subclass_ is synonymous with the `Indic Positional Category` defined
in Unicode. However, there are some distinctions, where the defined
category does not fully capture the behavior of the character in the
shaping process. 

### Oriya character tables ###

Separate character tables are provided for the Oriya and Vedic
Extensions blocks as well as for other miscellaneous characters that
are used in `<ory2>` text runs:

  - [Oriya character table](character-tables/character-tables-oriya.md#oriya-character-table)
  - [Vedic Extensions character table](character-tables/character-tables-oriya.md#vedic-extensions-character-table)
  - [Miscellaneous character table](character-tables/character-tables-oriya.md#miscellaneous-character-table)

The tables list each codepoint along with its Unicode general
category, its shaping class, and its mark-placement subclass. The
codepoint's Unicode name and an example glyph are also provided.

For example:

:::{table} Example character table

| Codepoint | Unicode category | Shaping class     | Mark-placement subclass    | Glyph                        |
|:----------|:-----------------|:------------------|:---------------------------|:-----------------------------|
|`U+0B01`   | Mark [Mn]        | BINDU             | TOP_POSITION               | &#x0B01; Candrabindu         |
| | | | |
|`U+0B15`   | Letter           | CONSONANT         | _null_                     | &#x0B15; Ka                  |
:::


Codepoints with no assigned meaning are
designated as _unassigned_ in the _Unicode category_ column. 

Assigned codepoints with a _null_ in the _Shaping class_
column evoke no special behavior from the shaping engine.

The _Mark-placement subclass_ column indicates mark-placement
positioning for codepoints in the _Mark_ category. Assigned, non-mark
codepoints have a _null_ in this column and evoke no special
mark-placement behavior. Marks tagged with [Mn] in the _Unicode
category_ column are categorized as non-spacing; marks tagged with
[Mc] are categorized as spacing-combining.

Some codepoints in the tables use a _Shaping class_ that
differs from the codepoint's Unicode _General Category_. The _Shaping
class_ takes precedence during OpenType shaping, as it captures more
specific, script-aware behavior.


#### Special-function codepoints ####

Other important characters that may be encountered when shaping runs
of Oriya text include the dotted-circle placeholder (`U+25CC`), the
zero-width joiner (`U+200D`) and zero-width non-joiner (`U+200C`), and
the no-break space (`U+00A0`).

Each of these is of particular importance to shaping engines, because
these codepoints interact with the shaping engine, the text run, and
the active font, either to mediate non-default shaping behavior or to
relay information about the current shaping process.

The dotted-circle placeholder is frequently used when displaying a
dependent vowel (matra) or a combining mark in isolation. Real-world
text syllables may also use other characters, such as hyphens or dashes,
in a similar placeholder fashion; shaping engines should cope with
this situation gracefully.

Dotted-circle placeholder characters (like any Unicode codepoint) can
appear anywhere in text input sequences and should be rendered
normally. <abbr title="Glyph Positioning table">GPOS</abbr> positioning lookups should attach mark glyphs to dotted
circles as they would to other non-mark characters. As visible glyphs,
dotted circles can also be involved in <abbr title="Glyph Substitution table">GSUB</abbr> substitutions.

In addition to the default input-text handling process, shaping
engines may also insert dotted-circle placeholders into the text
sequence. Dotted-circle insertions are required when a non-spacing
mark or dependent sign is formed with no base character present.

This requirement covers:

  - Dependent signs that are assigned their own individual Unicode
    codepoints (such as most dependent-vowel marks or matras)
  
  - Dependent signs that are formed only by specific sequences of
    other codepoints (such as <samp>"Reph"</samp>)


The zero-width joiner (<abbr>ZWJ</abbr>) is primarily used to prevent the formation
of a conjunct from a <samp>"_Consonant_,Halant,_Consonant_"</samp> sequence.

  - The sequence <samp>"_Consonant_,Halant,ZWJ,_Consonant_"</samp> blocks the
    formation of a conjunct between the two consonants. 

Note, however, that the <samp>"_Consonant_,Halant"</samp> subsequence in the above
example may still trigger a half-forms feature. To prevent the
application of the half-forms feature in addition to preventing the
conjunct, the zero-width non-joiner (<abbr>ZWNJ</abbr>) must be used instead.

  - The sequence <samp>"_Consonant_,Halant,ZWNJ,_Consonant_"</samp> should produce
    the first consonant in its standard form, followed by an explicit
    <samp>"Halant"</samp>. 

A secondary usage of the zero-width joiner is to prevent the formation of
<samp>"Reph"</samp>.

  - An initial <samp>"Ra,Halant,ZWJ"</samp> sequence should not produce a <samp>"Reph"</samp>,
    even where an initial <samp>"Ra,Halant"</samp> sequence without the zero-width
    joiner would otherwise produce a <samp>"Reph"</samp>.

The <abbr title="Zero-Width Joiner">ZWJ</abbr> and <abbr title="Zero-Width Non Joiner">ZWNJ</abbr> characters are, by definition, non-printing control
characters and have the _Default_Ignorable_ property in the Unicode
Character Database. In standard text-display scenarios, their function
is to signal a request from the user to the shaping engine for some
particular non-default behavior. As such, they are not rendered
visually.

> Note: Naturally, there are special circumstances where a user or
> document might need to request that a <abbr title="Zero-Width Joiner">ZWJ</abbr> or <abbr title="Zero-Width Non Joiner">ZWNJ</abbr> be rendered
> visually, such as when illustrating the OpenType shaping process, or
> displaying Unicode tables.

Because the <abbr title="Zero-Width Joiner">ZWJ</abbr> and <abbr title="Zero-Width Non Joiner">ZWNJ</abbr> are non-printing control characters, they can
be ignored by any portion of a software text-handling stack not
involved in the shaping operations that the <abbr title="Zero-Width Joiner">ZWJ</abbr> and <abbr title="Zero-Width Non Joiner">ZWNJ</abbr> are designed
to interface with. For example, spell-checking or collation functions
will typically ignore <abbr title="Zero-Width Joiner">ZWJ</abbr> and <abbr title="Zero-Width Non Joiner">ZWNJ</abbr>.

Similarly, the <abbr title="Zero-Width Joiner">ZWJ</abbr> and <abbr title="Zero-Width Non Joiner">ZWNJ</abbr> should be ignored by the shaping engine
when matching sequences of codepoints against the backtrack and
lookahead sequences of a font's <abbr title="Glyph Substitution table">GSUB</abbr> or <abbr title="Glyph Positioning table">GPOS</abbr> lookups.

For example:

  - A lookup that substitutes an alternate version of a
    dependent-vowel (matra) glyph when it is preceded by <samp>"Ka,Halant,Tta"</samp>
    should still be applied if the dependent-vowel codepoint is preceded
    by <samp>"Ka,Halant,ZWJ,Tta"</samp> in the text run.

The no-break space (<abbr>NBSP</abbr>) is primarily used to display those
codepoints that are defined as non-spacing (marks, dependent vowels
(matras), below-base consonant forms, and post-base consonant forms)
in an isolated context, as an alternative to displaying them
superimposed on the dotted-circle placeholder. These sequences will
match <samp>"NBSP,ZWJ,Halant,_Consonant_"</samp>, <samp>"NBSP,_mark_"</samp>, or <samp>"NBSP,_matra_"</samp>.

In addition to general punctuation, runs of Oriya text often use the
danda (`U+0964`) and double danda (`U+0965`) punctuation marks from
the Devanagari block.


## The `<ory2>` shaping model ##

Processing a run of `<ory2>` text involves six top-level stages:

1. Identifying syllables and other sequences
2. Initial reordering
3. Applying the basic substitution features from <abbr>GSUB</abbr>
4. Final reordering
5. Applying all remaining substitution features from <abbr>GSUB</abbr>
6. Applying all remaining positioning features from <abbr>GPOS</abbr>


As with other Indic scripts, the initial reordering stage and the
final reordering stage each involve applying a set of several
script-specific rules. The basic substitution features must be applied
to the run in a specific order. The remaining substitution features in
stage five, however, do not have a mandatory order.

Indic scripts follow many of the same shaping patterns, but they
differ in a few critical characteristics that the shaping engine must
track. These include:

  - The position of the base consonant in a syllable.
  
  - The final position of <samp>"Reph"</samp>.
  
  - Whether <samp>"Reph"</samp> must be requested explicitly or if it is formed by
    a specific, implicit sequence.
	
  - Whether the below-base forms feature is applied only to consonants
    before the syllable base, only to consonants after the base
    consonant, or to both.
	
  - The ordering positions for dependent vowels
    (matras). Specifically, right-side, above-base, and below-base
    matras follow different rules in different scripts. 
	All Indic scripts position left-side matras in the same
    manner, in the ordering position `POS_PREBASE_MATRA`. 

With regard to these common variations, Oriya's specific shaping
characteristics include:

  - `BASE_POS_LAST` = The base consonant of a syllable is the last
     consonant, not counting any special final-consonant forms.

  - `REPH_POS_AFTER_MAIN` = <samp>"Reph"</samp> is ordered immediately after the
     base consonant or syllable base.

  - `REPH_MODE_IMPLICIT` = <samp>"Reph"</samp> is formed by an initial <samp>"Ra,Halant"</samp> sequence.

  - `BLWF_MODE_PRE_AND_POST` = The below-forms feature is applied both to
     pre-base consonants and to post-base consonants.

  - `MATRA_POS_TOP` = `POS_AFTER_MAIN`  = Above-base matras are
    ordered immediately after the base consonant or syllable base.

  - `MATRA_POS_RIGHT` = `POS_AFTER_POST` = Right-side matras are
     ordered after all post-base consonant forms.

  - `MATRA_POS_BOTTOM` = `POS_AFTER_SUBJOINED` = Below-base matras are
     ordered after all subjoined (i.e., below-base) consonant forms.

These characteristics determine how the shaping engine must reorder
certain glyphs, how base consonants are determined, and how <samp>"Reph"</samp>
should be encoded within a run of text.


### Stage 1: Identifying syllables and other sequences ###

A syllable in Oriya consists of a valid orthographic sequence
that may be followed by a "tail" of modifier signs. 

> Note: The Oriya Unicode block enumerates four modifier signs,
> "Candrabindu" (`U+0B01`), "Anusvara" (`U+0B02`), "Visarga" 
> (`U+0B03`), and "Avagraha" (`U+0B3D`). In addition, Sanskrit text
> written in Oriya may include additional signs from Vedic Extensions
> block.

Each syllable contains exactly one vowel sound. Valid syllables may
begin with either a consonant or an independent vowel. 

If the syllable begins with a consonant, then the consonant that
provides the vowel sound is referred to as the "base" consonant. If
the syllable begins with an independent vowel, that vowel is the
syllable's only vowel sound and, by definition, there is no "base"
consonant. 

> Note: A consonant that is not accompanied by a dependent vowel (matra) sign
> carries the script's inherent vowel sound. This vowel sound is changed
> by a dependent vowel (matra) sign following the consonant.

From the shaping engine's perspective, the main distinction between a
syllable with a base consonant and a syllable with an
independent-vowel base is that a syllable with an independent-vowel
base is less likely to include additional consonants in special forms
and less likely to include dependent vowel signs
(matras). Therefore, in the common case, vowel-based syllables may
involve less reordering, substitution feature applications, and other
processing than consonant-based syllables.

In some languages and orthographies, vowel-based syllables are
not permitted to include additional consonants or matras, and certain
<abbr title="Glyph Substitution table">GSUB</abbr> substitution features do not occur. However, there are often
known exceptions, and real-world text makes no such guarantees. 

> Note: Shaping engines may choose to treat independent-vowel bases 
> like base consonants for the sake of simplicity or code
> reuse.
>
> However, implementations that take this approach should note
> that removing the distinction between base consonants and
> independent-vowel bases entirely may have unintended
> consequences. Making guarantees about the correctness of the results
> or about language-specific tests is out of scope for this document.

Generally speaking, the base consonant is the final consonant of the
syllable and its vowel sound designates the end of the syllable. This
rule is synonymous with the `BASE_POS_LAST` characteristic mentioned
earlier. 

Non-base consonants in a valid syllable will be separated by <samp>"Halant"</samp>
marks. Pre-base consonants will be followed by <samp>"Halant"</samp>, while
post-base consonants will be preceded by <samp>"Halant"</samp>.

	Pre-baseC Halant BaseC Halant Post-baseC
	
The algorithm for correctly identifying the base consonant includes a
test to recognize these sequences and not mis-identify the base
consonant.

All consonants in Oriya can potentially occur in pre-base
position. The <samp>"Halant"</samp> marks on pre-base consonants indicate that they
carry no vowel. Instead, they affect syllable pronunciation by
combining with the base consonant (e.g., "_thr_" or "_spl_").

Two consonants in Oriya are allowed to occur in post-base
position: <samp>"Ya"</samp> and <samp>"Yya"</samp>.

Oriya consonants take on a variety of different forms in
consonant conjuncts. In some cases, the base consonant takes a
below-base or mark-like form.

<!--- 
KA (0B15), JA (0B1C), NA (0B28), BA (0B2C), WA (0B35), LA (0B32), and
LLA (0B33) are presented in their half-forms.

TA (0B24), DDHA (0B22), THA (0B25),
CHA (0B1B), BHA (0B2D), MA (0B2E), and NNA (0B23) are rendered as
consonant signs placed below consonant letters. These signs retain the
inherent vowel A. 

Only the sign representing YYA (0B5F) is positioned
to the right of a consonant.

Some consonant in Oriya are rendered as consonant signs when they
function as part of a consonant cluster. These signs do not have
visual similarity with the consonants they represent.

KA      +     TA	↝ K.TA
La + Halant	 +	Ta	↝ ¦
DA	 +      MA	↝ D.MA
]ç	 +	c	↝ ]ê

Such consonant clusters may function as consonant and can further take
other consonant as /ma:tra:/ matras. For example,

TA	+	SA	↝ T.SA	+	NA	↝ T.S.NA
[ç	+	j	↝ júç		+	_	↝ júð

Diminutive form of consonants: A diminutive form of consonant is used
as the final component of a consonant cluster. Such diminutive forms
retain the inherent vowel A and are positioned below the relevant
consonant.

GA	+	DHA	↝ G.DHA
Nç	+	^	↝ ‘
SHA	+	CA	↝ SH.CA
hç	+	Q	↝ ¾


From info at http://www.ciil-lisindia.net/oriya/oriya.html
https://web.archive.org/web/20150304085123/http://www.ciil-lisindia.net:80/Oriya/Oriya.html
--->


As with other Indic scripts, the consonant <samp>"Ra"</samp> receives special
treatment; in many circumstances it is replaced by a combining
mark-like form. 

  - A <samp>"Ra,Halant"</samp> sequence at the beginning of a syllable is replaced
    with an above-base mark called <samp>"Reph"</samp> (unless the <samp>"Ra"</samp> is the only
    consonant in the syllable). This rule is synonymous with the
    `REPH_MODE_IMPLICIT` characteristic mentioned earlier.
  - A non-initial <samp>"Halant,Ra"</samp> sequence is replaced with a
    below-base mark called <samp>"Raphala"</samp>.
  
<samp>"Reph"</samp> and <samp>"Raphala"</samp> characters must be reordered after the
syllable-identification stage is complete. 

> Note: Generally speaking, OpenType fonts will implement support for
> any below-base, post-base, and pre-base-reordering consonant forms
> by including the necessary substitution rules in their `blwf`,
> `pstf`, and `pref` lookups in <abbr title="Glyph Substitution table">GSUB</abbr>.
>
> Consequently, whenever shaping engines need to determine whether or 
> not a given consonant can take on such a special form, the most
> appropriate test is to check if the consonant is included in the
> relevant <abbr title="Glyph Substitution table">GSUB</abbr> lookup. Other implementations are possible, such as
> maintaining static tables of consonants, but checking for <abbr title="Glyph Substitution table">GSUB</abbr>
> support ensures that the expected behavior is implemented in the
> active font, and is therefore the most reliable approach.


In addition to valid syllables, standalone sequences may occur, such
as when an isolated codepoint is shown in example text.

> Note: Foreign loanwords, when written in the Oriya script, may
> not adhere to the syllable-formation rules described above. In
> particular, it is not uncommon to encounter foreign loanwords that
> contain a word-final suffix of consonants.
>
> Nevertheless, such word-final suffixes will be correctly matched by
> the regular expressions listed below. These loanwords are pronounced
> different, which raises issues for potential readers, but the
> character sequences do not affect the shaping process.


Syllables should be identified by examining the run and matching
glyphs, based on their categorization, using regular expressions. 

The following general-purpose Indic-shaping regular expressions can be
used to match Oriya syllables.

The regular expressions utilize the shaping classes from the tables
above. For the purpose of syllable identification, more general
classes can be used, as defined in the following table. This
simplifies the resulting expressions. 

```markdown
_ra_		= The consonant "Ra" 
_consonant_	= ( `CONSONANT` | `CONSONANT_DEAD` ) - _ra_
_vowel_		= `VOWEL_INDEPENDENT`
_nukta_	  	= `NUKTA`
_halant_	= `VIRAMA`
_zwj_		= `JOINER`
_zwnj_		= `NON_JOINER`
_matra_		= `VOWEL_DEPENDENT` | `PURE_KILLER`
_syllablemodifier_	= `SYLLABLE_MODIFIER` | `BINDU` | `VISARGA` | `GEMINATION_MARK`
_vedicsign_	= `CANTILLATION`
_placeholder_	= `PLACEHOLDER` | `CONSONANT_PLACEHOLDER` | `NUMBER`
_dottedcircle_	= `DOTTED_CIRCLE`
_repha_		= `CONSONANT_PRE_REPHA`
_consonantmedial_	= `CONSONANT_MEDIAL`
_symbol_	= `SYMBOL` | `AVAGRAHA`
_consonantwithstacker_	= `CONSONANT_WITH_STACKER`
_other_		= `OTHER` | `MODIFYING_LETTER`
```


> Note: the _ra_ identification class is mutually exclusive with 
> the _consonant_ class. The union of the _consonant_ and _ra_ classes
> is used in the regular expression elements below in order to
> correctly identify <samp>"Ra"</samp> characters that do not trigger <samp>"Reph"</samp> or
> <samp>"Rakaar"</samp> shaping behavior.
>
> Note, also, that the cantillation mark "combining Ra" in the
> Devanagari Extended block does _not_ belong to the _ra_
> identification class, and that the other "combining consonant"
> cantillation marks in the Devanagari Extended block do not belong to
> the _consonant_ identification class.

> Note: The _placeholder_ identification class includes codepoints
> that are often used in place of vowels or consonants when a document
> needs to display a matra, mark, or special form in isolation or
> in another context beyond a standard syllable. Examples of
> _placeholder_ codepoints include hyphens and non-breaking
> spaces. Sequences that utilize this approach should be identified as
> "standalone" syllables.
>
> The _placeholder_ identification class also includes numerals, which
> are commonly used as word substitutes within normal text. Examples
> include ordinals (e.g., "4th").

> Note: The _other_ identification class includes codepoints that
> do not interact with adjacent characters for shaping purposes. Even
> though some of these codepoints (such as `MODIFYING_LETTER`) can
> occur within words, they evoke no behavior from the shaping
> engine and do not factor into the regular expressions that
> follow. Therefore, the shaping engine may choose to ignore them
> during syllable identification; they are listed here for completeness.

These identification classes form the bases of the following regular
expression elements:

```markdown
C	= _consonant_ | _ra_
Z	= _zwj_ | _zwnj_
REPH	= (_ra_ _halant_) | _repha_
CN		= C _zwj_? _nukta_?
FORCED_RAKAR	= _zwj_ _halant_ _zwj_ _ra_
S	= _symbol_ _nukta_?
MATRA_GROUP	= Z{0,3} _matra_ _nukta_? (_halant_ | FORCED_RAKAR)?
SYLLABLE_TAIL	= (Z? _syllablemodifier_ _syllablemodifier_? _zwnj_?)? _vedicsign_{0,3}
HALANT_GROUP	= Z? _halant_ (_zwj_ _nukta_?)?
FINAL_HALANT_GROUP	= HALANT_GROUP | (_halant_ _zwnj_)
MEDIAL_GROUP	= _consonantmedial_?
HALANT_OR_MATRA_GROUP	= FINAL_HALANT_GROUP | MATRA_GROUP*)
```

> Note: Practically speaking, shaping engines are highly unlikely to
> encounter more than 4 sequential `(MATRA_GROUP)`
> instances in any real-word syllables. Thus, implementations may
> choose to limit occurrences by limiting the above expressions to a
> finite length, such as `(MATRA_GROUP){0,4}` .


Using the above elements, the following regular expressions define the
possible syllable types:

A consonant-based syllable will match the expression:
```markdown
(_repha_|_consonantwithstacker_)? (CN HALANT_GROUP)* CN MEDIAL_GROUP HALANT_OR_MATRA_GROUP SYLLABLE_TAIL
```

> Note: Practically speaking, shaping engines are highly unlikely to
> encounter more than 4 sequential `(CN HALANT_GROUP)`
> instances in any real-word syllables. Thus, implementations may
> choose to limit occurrences by limiting the above expressions to a
> finite length, such as `(CN HALANT_GROUP){0,4}` .

A vowel-based syllable will match the expression:
```markdown
REPH? _vowel_ _nukta_? (_zwj_ | (HALANT_GROUP CN)* MEDIAL_GROUP HALANT_OR_MATRA_GROUP SYLLABLE_TAIL)
```

> Note: Practically speaking, shaping engines are highly unlikely to
> encounter more than 4 sequential `(HALANT_GROUP CN)`
> instances in any real-word syllables. Thus, implementations may
> choose to limit occurrences by limiting the above expressions to a
> finite length, such as `(HALANT_GROUP CN){0,4}` .

A standalone syllable will match the expression:
```markdown
((_repha_|_consonantwithstacker_)? _placeholder_ | REPH? _dottedcircle_) _nukta_? (HALANT_GROUP CN)* MEDIAL_GROUP HALANT_OR_MATRA_GROUP SYLLABLE_TAIL
```

> Note: Practically speaking, shaping engines are highly unlikely to
> encounter more than 4 sequential `(HALANT_GROUP CN)`
> instances in any real-word syllables. Thus, implementations may
> choose to limit occurrences by limiting the above expressions to a
> finite length, such as `(HALANT_GROUP CN){0,4}` .

> Note: Although they are labeled as "standalone syllables"</samp> here,
> many sequences that match the standalone regular expression above
> are instances where a document needs to display a matra, combining
> mark, or special form in isolation. Such sequences might not have
> any significance with regard to the definition of syllables used in
> the language or orthography of the text.

A symbol-based syllable will match the expression:
```markdown
S SYLLABLE_TAIL
```

A broken syllable will match the expression:
```markdown
REPH? _nukta_? (HALANT_GROUP CN)* MEDIAL_GROUP HALANT_OR_MATRA_GROUP SYLLABLE_TAIL
```

> Note: Practically speaking, shaping engines are highly unlikely to
> encounter more than 4 sequential `(HALANT_GROUP CN)`
> instances in any real-word syllables. Thus, implementations may
> choose to limit occurrences by limiting the above expressions to a
> finite length, such as `(HALANT_GROUP CN){0,4}` .


The primary problem involved in shaping broken syllables is the lack
of a syllable base (either a base consonant or an independent
vowel). Without a syllable base, the shaping engine cannot perform
<abbr title="Glyph Positioning table">GPOS</abbr> positioning and other contextual operations that are required
later in the shaping process.

To make up for this limitation, shaping engines should insert a
dotted-circle placeholder (`U+25CC`) character into the text stream
where the missing syllable base was expected to occur. This
placeholder allows the shaping process to proceed on a best-effort
basis at handling the broken-syllable sequence, but making guarantees
about the orthographic correctness or preferred appearance of the
final result is out of scope for this document.

Shaping engines can perform this dotted-circle insertion at any point
after the broken syllable has been recognized and before <abbr title="Glyph Substitution table">GSUB</abbr> features
are applied. However, the best results will likely be attained by
performing the insertion immediately, before proceeding to
stage 2. This will enable the maximum number of <abbr title="Glyph Substitution table">GSUB</abbr> and <abbr title="Glyph Positioning table">GPOS</abbr> features
in the active font to be correctly applied to the text run by ensuring
that all reordering, tagging, and sorting algorithms are executed as
usual.

> Note: In software stacks where other text-handling operations, such
> as Unicode normalization and localization, are performed before the
> text run is passed to the shaping engine, there is a potential for
> the dotted-circle insertion to cause unexpected effects.
>
> For example, if a `ccmp` or `locl` feature substitutes the default
> dotted-circle placeholder glyph with a variant glyph of a different
> size or weight for the (`U+25CC`) codepoint, then any shaping engine
> which relies on another software component to handle that
> functionality must take additional care to ensure consistency.


The expressions above use state-machine syntax from the Ragel
state-machine compiler. The operators represent:

```markdown
a* = zero or more copies of a
b+ = one or more copies of b
c? = optional instance of c
d{n} = exactly n copies of d
d{,n} = zero to n copies of d
d{n,} = n or more copies of d
d{n,m} = n to m copies of d
!e = not e
^f = character-level not f
g.h = concatenation of g and h
i|j = i or j
( ) = grouping of expression elements
```


After the syllables have been identified, each of the subsequent 
shaping stages occurs on a per-syllable basis.

### Stage 2: Initial reordering ###

The initial reordering stage is used to relocate glyphs from the
phonetic order in which they occur in a run of text to the
orthographic order in which they are presented visually.

> Note: Primarily, this means moving dependent-vowel (matra) glyphs, 
> <samp>"Ra,Halant"</samp> glyph sequences, and other consonants that take special
> treatment in some circumstances. 
>
> These reordering moves are mandatory. The final-reordering stage
> may make additional moves, depending on the text and on the features
> implemented in the active font.

The syllable should be processed by tagging each glyph with its
intended position based on its ordering category. After all glyphs
have been tagged, the entire syllable should be sorted in stable order,
so that glyphs of the same ordering category remain in the same
relative position with respect to each other.

The final sort order of the ordering categories should be:


	POS_RA_TO_BECOME_REPH
	POS_PREBASE_MATRA
	POS_PREBASE_CONSONANT

	POS_SYLLABLE_BASE
	POS_AFTER_MAIN

	POS_ABOVEBASE_CONSONANT

	POS_BEFORE_SUBJOINED
	POS_BELOWBASE_CONSONANT
	POS_AFTER_SUBJOINED

	POS_BEFORE_POST
	POS_POSTBASE_CONSONANT
	POS_AFTER_POST

	POS_FINAL_CONSONANT
	POS_SMVD


This sort order enumerates all of the possible final positions to
which a codepoint might be reordered, across all of the Indic
scripts. It includes some ordering categories not utilized in
Oriya. 

The basic positions (left to right) are <samp>"Reph"</samp>
(`POS_RA_TO_BECOME_REPH`), dependent vowels (matras) and consonants
positioned before the base consonant or syllable base
(`POS_PREBASE_MATRA` and `POS_PREBASE_CONSONANT`), the base consonant
or syllable base (`POS_SYLLABLE_BASE`), above-base consonants
(`POS_ABOVEBASE_CONSONANT`), below-base consonants
(`POS_BELOWBASE_CONSONANT`), consonants positioned after the base
consonant or syllable base (`POS_POSTBASE_CONSONANT`), syllable-final
consonants (`POS_FINAL_CONSONANT`), and syllable-modifying or Vedic
signs (`POS_SMVD`).

In addition, several secondary positions are defined to handle various
reordering rules that deal with relative, rather than absolute,
positioning. `POS_AFTER_MAIN` means that a character must be
positioned immediately after the syllable base. `POS_BEFORE_SUBJOINED`
and `POS_AFTER_SUBJOINED` mean that a character must be positioned
before or after any below-base consonants, respectively. Similarly,
`POS_BEFORE_POST` and `POS_AFTER_POST` mean that a character must be
positioned before or after any post-base consonants, respectively. 

For shaping-engine implementers, the names used for the ordering
categories matter only in that they are unambiguous. 

For a definition of the "base" consonant, refer to stage 2, step 1, which
follows.

#### Stage 2, step 1: Base consonant ####

The first step is to determine the base consonant of the syllable, if
there is one, and tag it as `POS_SYLLABLE_BASE`.

In a syllable that begins with an independent vowel, the independent
vowel will always serve as the syllable base, and it should be tagged
as `POS_SYLLABLE_BASE`. The shaping engine can then proceed to step 2.

In a standalone sequence or other syllable that begins with a placeholder
or dotted circle, the placeholder or dotted circle will always serve
as the syllable base, and it should be tagged as
`POS_SYLLABLE_BASE`. The shaping engine can then proceed to step 2.

In a syllable that begins with a consonant, the shaping engine must
determine the base consonant by a script-specific algorithm.

> Note: Shaping engines may choose to treat independent-vowel bases 
> like base consonants for the sake of simplicity or code
> reuse.
>
> However, implementations that take this approach should note
> that removing the distinction between base consonants and
> independent-vowel bases entirely may have unintended
> consequences. Making guarantees about the correctness of the results
> or about language-specific tests is out of scope for this document.

The base consonant is defined as the consonant in a consonant-based
syllable that carries the syllable's vowel sound. That vowel sound
will either be provided by the script's inherent vowel (in which case
it is not written with a separate character) or the sound will be designated
by the addition of a dependent-vowel (matra) sign.


<!--- > Because vowel-based syllables will not include consonants and
> because independent vowels do not take on special forms or require
> reordering, many of the steps that follow will involve no
> work for a vowel-based syllable. However, vowel-based syllables must
> still be sorted and their marks handled correctly, and <abbr title="Glyph Substitution table">GSUB</abbr> and <abbr title="Glyph Positioning table">GPOS</abbr>
> lookups must be applied. These steps of the shaping process follow
> the same rules that are employed for consonant-based syllables.
--->

While performing the base-consonant search, shaping engines may
also encounter special-form consonants, including below-base
consonants and post-base consonants. Each of these special-form
consonants must also be tagged (`POS_BELOWBASE_CONSONANT`,
`POS_POSTBASE_CONSONANT`, respectively). 

Any pre-base-reordering consonant (such as a pre-base-reordering <samp>"Ra"</samp>)
encountered during the base-consonant search must be tagged
`POS_POSTBASE_CONSONANT`. 
 
> Note: Shaping engines may choose any method to identify consonants that
> have below-base, post-base, or pre-base-reordering forms while
> executing the above algorithm. For example, one implementation may
> choose to maintain a static table of special-form consonants to
> compare against the text run. Another implementation might examine
> the active font to see if it includes a `blwf`, `pstf`, or `pref`
> lookup in the <abbr title="Glyph Substitution table">GSUB</abbr> table that affects the consonants encountered in
> the syllable.
>
> However, checking for <abbr title="Glyph Substitution table">GSUB</abbr> support ensures that the expected
> behavior is implemented in the active font, and is therefore the
> most reliable approach.


The algorithm for determining the base consonant is

  - If the syllable starts with <samp>"Ra,Halant"</samp> and the syllable contains
    more than one consonant, exclude the starting <samp>"Ra"</samp> from the list of
    consonants to be considered. 
  - Starting from the end of the syllable, move backwards until a consonant is found.
      * If the consonant is the first consonant, stop.
      * If the consonant is preceded by the sequence <samp>"Halant,ZWJ"</samp>, stop.
      * If the consonant has a below-base form, tag it as
        `POS_BELOWBASE_CONSONANT`, then move to the previous consonant. 
      * If the consonant has a post-base form, tag it as
        `POS_POSTBASE_CONSONANT`, then move to the previous consonant. 
      * If the consonant is a pre-base-reordering <samp>"Ra"</samp>, tag it as
        `POS_POSTBASE_CONSONANT`, then move to the previous consonant. 
      * If none of the above conditions is true, stop.
  - The consonant stopped at will be the base consonant.

> Note: The algorithm is designed to work for all Indic
> scripts. However, Oriya does not utilize pre-base-reordering <samp>"Ra"</samp>.

Oriya includes two consonants that can take on post-base forms, `Ya` and `Yya`.

:::{figure-md}
![Post-base consonant Ya](/images/oriya/oriya-pstf-ya.svg "Post-base consonant Ya"){.shaping-demo .inline-svg .greyscale-svg #oriya-pstf-ya}

Post-base consonant Ya
:::

```{svg-color-toggle-button} oriya-pstf-ya
```


:::{figure-md}
![Post-base consonant Yya](/images/oriya/oriya-pstf-yya.svg "Post-base consonant Yya"){.shaping-demo .inline-svg .greyscale-svg #oriya-pstf-yya}

Post-base consonant Yya
:::

```{svg-color-toggle-button} oriya-pstf-yya
```


Oriya includes one consonant that can take on a special below-base form:

  - <samp>"Halant,Ra"</samp> (in a syllable-final position) take on the <samp>"Raphala"</samp>
    form. 

:::{figure-md}
![Raphala composition](/images/oriya/oriya-blwf-ra.svg "Raphala composition"){.shaping-demo .inline-svg .greyscale-svg #oriya-blwf-ra}

Raphala composition
:::

```{svg-color-toggle-button} oriya-blwf-ra
```


<!---In addition, all consonants in Oriya can take on subjoined forms.--->

> Note: Because Oriya employs the `BLWF_MODE_PRE_AND_POST` shaping
> characteristic, consonants with below-base special forms may occur
> before or after the syllable base. 
> 
> During the base-consonant search, only the <samp>"Halant,_consonant_"</samp> 
> pattern following the syllable base for these below-base forms will
> be encountered. Stage 2, step 5 below ensures that the <samp>"_consonant_,Halant"</samp>
> pattern preceding the syllable base for these below-base forms will
> also be tagged correctly.


#### Stage 2, step 2: Matra decomposition ####

Second, any multi-part dependent vowels (matras) must be decomposed
into their individual components. Oriya has three
multi-part dependent vowels, "Ai" (`U+0B48`), "O" (`U+0B4B`), and "Au" (`U+0B4C`). Each
has a canonical decomposition, so this step is unambiguous. 

> "Ai" (`U+0B48`) decomposes to "`U+0B47`,`U+0B56`"
>
> "O" (`U+0B4B`) decomposes to "`U+0B47`,`U+0B3E`"
>
> "Au" (`U+0B4C`) decomposes to "`U+0B47`,`U+0B57`"

> Note: "Au Length Mark" (`U+0B57`) is categorized in Unicode as being a
> top-and-right matra, a combination that would normally decompose
> into one TOP_POSITION mark and one RIGHT_POSITION mark
> (`U+0B3E`,`U+0B56`). In "Au Length Mark", however, the `U+0B3E`
> component is intended to be positioned over the `U+0B56` component,
> not above the base.
>
> Consequently, the two decomposed components should both be tagged
> for the `POS_AFTER_POST` sorting position, and neither will need to
> be reordered.
>
> In addition, the decomposition is not canonical in
> Unicode, so performing the decomposition may trigger unknown
> behavior from other components of the software stack. Consequently,
> shaping engines may choose to skip it. 

Because this decomposition is a character-level operation, the shaping
engine may choose to perform it earlier, such as during an initial
Unicode-normalization stage. However, all such decompositions must be
completed before the shaping engine begins step three, below.

:::{figure-md}
![Two-part matra decomposition](/images/oriya/oriya-matra-decompose.svg "Two-part matra decomposition"){.shaping-demo .inline-svg .greyscale-svg #oriya-matra-decompose}

Two-part matra decomposition
:::

```{svg-color-toggle-button} oriya-matra-decompose
```


#### Stage 2, step 3: Tag matras ####

Third, all left-side dependent-vowel (matra) signs, including those that
resulted from the preceding decomposition step, must be tagged to be
moved to the beginning of the syllable, with `POS_PREBASE_MATRA`.

All above-base dependent-vowel (matra) signs are tagged `POS_AFTER_MAIN`.

All right-side dependent-vowel (matra) signs are tagged
`POS_AFTER_POST`.

All below-base dependent-vowel (matra) signs are tagged
`POS_AFTER_SUBJOINED`.

For simplicity, shaping engines may choose to tag single-part matras
in an earlier text-processing step, using the information in the
_Mark-placement subclass_ column of the character tables. It is
critical at this step, however, that all decomposed matras are also
correctly tagged before proceeding to the next step.

#### Stage 2, step 4: Adjacent marks ####

Fourth, any subsequences of marks that include a <samp>"Nukta"</samp> and a
<samp>"Halant"</samp> or Vedic sign must be reordered so that the <samp>"Nukta"</samp> appears
first.

This means that the subsequence <samp>"Halant,Nukta"</samp> is reordered to
<samp>"Nukta,Halant"</samp> and that the subsequence <samp>"_Vedic_sign_,Nukta"</samp> is
reordered to <samp>"Nukta,_Vedic_sign"</samp>.

For subsequences of affected marks that are longer than two, the
reordering operation must be repeated until the <samp>"Nukta"</samp> is the first
character in the subsequence. No other marks in the subsequence
should be reordered.

This order is canonical in Unicode and is required so that
<samp>"_consonant_,Nukta"</samp> substitution rules from <abbr title="Glyph Substitution table">GSUB</abbr> will be correctly
matched later in the shaping process.

#### Stage 2, step 5: Pre-base consonants ####

Fifth, consonants that occur before the syllable base must be tagged
with `POS_PREBASE_CONSONANT`. Excluding initial <samp>"Ra,Halant"</samp> sequences
that will become <samp>"Reph"</samp>s: 

  - If the consonant has a below-base form, tag it as
          `POS_BELOWBASE_CONSONANT`. 
  - Otherwise, tag it as `POS_PREBASE_CONSONANT`.
  
> Note: Shaping engines may choose any method to identify consonants that
> have below-base, post-base, or pre-base-reordering forms while
> executing the above algorithm. For example, one implementation may
> choose to maintain a static table of special-form consonants to
> compare against the text run. Another implementation might examine
> the active font to see if it includes a `blwf`, `pstf`, or `pref`
> lookup in the <abbr title="Glyph Substitution table">GSUB</abbr> table that affects the consonants encountered in
> the syllable.
>
> However, checking for <abbr title="Glyph Substitution table">GSUB</abbr> support ensures that the expected
> behavior is implemented in the active font, and is therefore the
> most reliable approach.

Oriya includes one consonant that can take on a special below-base form:

  - <samp>"Halant,Ra"</samp> (in a non-initial position) takes on the <samp>"Raphala"</samp>
    form. 

:::{figure-md}
![Raphala composition](/images/oriya/oriya-blwf-ra-1.svg "Raphala composition"){.shaping-demo .inline-svg .greyscale-svg #oriya-blwf-ra-1}

Raphala composition
:::

```{svg-color-toggle-button} oriya-blwf-ra-1
```


> Note: Because Oriya employs the `BLWF_MODE_PRE_AND_POST` shaping
> characteristic, consonants with below-base special forms may occur
> before or after the syllable base. 
> 
> During the base-consonant search in stage 2, step 1, any instances of the
> <samp>"Halant,_consonant_"</samp>  pattern following the syllable base for these
> below-base forms will be encountered. The tagging in this step
> ensures that the <samp>"_consonant_,Halant"</samp> pattern preceding the syllable
> base for these below-base forms will also be tagged correctly.


#### Stage 2, step 6: Reph ####

Sixth, initial <samp>"Ra,Halant"</samp> sequences that will become <samp>"Reph"</samp>s must be tagged with
`POS_RA_TO_BECOME_REPH`.

> Note: an initial <samp>"Ra,Halant"</samp> sequence will always become a <samp>"Reph"</samp>
> unless the <samp>"Ra"</samp> is the only consonant in the syllable.

#### Stage 2, step 7: Final consonants ####

Seventh, all final consonants must be tagged. Consonants that occur
after the syllable base _and_ after a dependent vowel (matra) sign
must be tagged with  `POS_FINAL_CONSONANT`.

> Note: Final consonants occur only in Sinhala and should not be
> expected in `<ory2>` text runs. This step is included here to
> maintain compatibility across Indic scripts.


#### Stage 2, step 8: Mark tagging ####

Eighth, all marks must be tagged. 

> Note: In this step, joiner and non-joiner characters must also be
> tagged according to the same rules given for marks, even though
> these characters are not categorized as marks in Unicode.

Marks in the `BINDU`, `VISARGA`, `AVAGRAHA`, `CANTILLATION`,
`SYLLABLE_MODIFIER`, `GEMINATION_MARK`, and `SYMBOL` categories should
be tagged with `POS_SMVD`. 

Oriya includes one exception to the above general rule. The
<samp>"Candrabindu"</samp> (`U+0B01`) must be tagged with `POS_BEFORE_SUBJOINED`.

All <samp>"Nukta"</samp>s must be tagged with the same positioning tag as the
preceding consonant, independent vowel, placeholder, or dotted circle.

All remaining marks (not in the `POS_SMVD` category and not <samp>"Nukta"</samp>s)
must be tagged with the same positioning tag as the closest non-mark
character the mark has affinity with, so that they move together 
during the sorting step.

There are two possible cases: those marks before the syllable base
and those marks after the syllable base. In addition, an exception is
made for <samp>"Halant"</samp> marks that follow a left-side (pre-base) matra.

  1. Initially, all remaining marks should be tagged with the same
	 positioning tag as the closest preceding consonant.

  2. For each consonant after the syllable base (such as post-base
	 consonants, below-base consonants, or final consonants), all
	 remaining marks located between that current consonant and any
	 previous consonant should be tagged with the same positioning tag as
	 the current (later) consonant.
  
     In other words, all consonants preceding the syllable base "own" the
	 marks that follow them, while all consonants after the syllable base
	 "own" the marks that come before them. When a syllable does not have
	 any consonants after the syllable base, the syllable base should
	 "own" all the marks that follow it.
  
  3. Finally, <samp>"Halant"</samp> marks that follow a left-side dependent vowel
     (matra) should _not_ be tagged with the left-side matra's
     positioning tag. Instead, the <samp>"Halant"</samp> should be tagged with the
     positioning tag of the non-mark character preceding the left-side
     matra. This prevents the <samp>"Halant"</samp> mark from being moved with the
     left-side matra when the syllable is sorted.


<!--- HarfBuzz also tags everything between a post-base consonant or -->
<!--matra and another post-base consonant as belonging to the latter -->
<!--post-base consonant. --->


#### Stage 2, step 9: Sort syllable ####

With these steps completed, the syllable can be sorted into the final
sort order as listed at the beginning of stage 2.

The glyphs in the syllable should be sorted in stable order,
so that glyphs of the same ordering category remain in the same
relative position with respect to each other.


#### Stage 2, step 10: Flag sequences for possible feature applications ####

With the initial reordering complete, those glyphs in the syllable that
may have <abbr title="Glyph Substitution table">GSUB</abbr> or <abbr title="Glyph Positioning table">GPOS</abbr> features applied in stages 3, 5, and 6 should be
flagged for each potential feature. 

This flagging is preliminary; the set of potential features varies
between different scripts and which features are supported varies
between fonts. It is also possible that the application of
one feature on a glyph sequence will perform a substitution that makes
a later feature no longer applicable to the updated sequence.

Consequently, the flagging must be completed before shaping proceeds
to the stages during which features are applied.

Some shaping features, such as `locl`, can potentially apply to any
glyphs. Therefore it is not necessary to maintain a separate flag for
these features in the bitmask (or other data structure) used to track
the flags -- although shaping engines may do so if desired.

The sequences to flag are summarized in the list below; a full
description of each feature's function and interpretation is provided
in <abbr title="Glyph Substitution table">GSUB</abbr> and <abbr title="Glyph Positioning table">GPOS</abbr> application stages that follow.

  - `nukt` should match <samp>"_Consonant_,Nukta"</samp> sequences
  - `akhn` should match <samp>"Ka,Halant,Ssa"</samp> and <samp>"Ja,Halant,Nya"</samp>
  - `rphf` should match initial <samp>"Ra,Halant"</samp> sequences but _not_ match
            initial <samp>"Ra,Halant,ZWJ"</samp> sequences
  - `blwf` should match <samp>"Halant,_Consonant_"</samp> in post-base positions and
            <samp>"Ra,Halant"</samp> in non-initial pre-base positions
  - `half` should match <samp>"_Consonant_,Halant"</samp> in pre-base position but
           _not_ match <samp>"Ra,Halant"</samp> sequences flagged for `rphf` and
           _not_ match <samp>"_Consonant_,Halant,ZWNJ,_Consonant_"</samp> sequences
  - `pstf` should match <samp>"Halant,Ya"</samp> and <samp>"Halant,Yya"</samp> in post-base position
  - `cjct` should match <samp>"_Consonant_,Halant,_Consonant_"</samp> but _not_
            match <samp>"_Consonant_,Halant,ZWJ,_Consonant_"</samp> or
            <samp>"_Consonant_,Halant,ZWNJ,_Consonant_"</samp>


### Stage 3: Applying the basic substitution features from <abbr>GSUB</abbr> ###

The basic-substitution stage applies mandatory substitution features
using the rules in the font's <abbr title="Glyph Substitution table">GSUB</abbr> table. In preparation for this
stage, glyph sequences should be flagged for possible application 
of <abbr title="Glyph Substitution table">GSUB</abbr> features in stage 2, step 10.

The order in which these substitutions must be performed is fixed for
all Indic scripts:

	locl
	nukt
	akhn
	rphf 
	rkrf (not used in Oriya)
	pref (not used in Oriya)
	blwf 
	abvf
	half
	pstf
	vatu
	cjct
	cfar (not used in Oriya)

#### Stage 3, step 1: locl ####

The `locl` feature replaces default glyphs with any language-specific
variants, based on examining the language setting of the text run.

> Note: Strictly speaking, the use of localized-form substitutions is
> not part of the shaping process, but of the localization process,
> and could take place at an earlier point while handling the text
> run. However, shaping engines are expected to complete the
> application of the `locl` feature before applying the subsequent
> <abbr title="Glyph Substitution table">GSUB</abbr> substitutions in the following steps.

#### Stage 3, step 2: nukt ####

The `nukt` feature replaces <samp>"_Consonant_,Nukta"</samp> sequences with a
precomposed nukta-variant of the consonant glyph. 

  - The context defined for a `nukt` feature is:

:::{table} `nukt` feature context
    
| Backtrack     | Matching sequence             | Lookahead     |
|:--------------|:------------------------------|:--------------|
| _none_        | `_consonant_`(full),`_nukta_` | _none_        |
:::


:::{figure-md}
![Nukta composition](/images/oriya/oriya-nukt.svg "Nukta composition"){.shaping-demo .inline-svg .greyscale-svg #oriya-nukt}

Nukta composition
:::

```{svg-color-toggle-button} oriya-nukt
```


#### Stage 3, step 3: akhn ####

The `akhn` feature replaces two specific sequences with required ligatures. 

  - <samp>"Ka,Halant,Ssa"</samp> is substituted with the <samp>"KSsa"</samp> ligature. 
  - <samp>"Ja,Halant,Nya"</samp> is substituted with the <samp>"JNya"</samp> ligature. 
  
These sequences can occur anywhere in a syllable. The <samp>"KSsa"</samp> and
<samp>"JNya"</samp> characters have orthographic status equivalent to full
consonants in some languages, and fonts may have `cjct` substitution
rules designed to match them in subsequences. Therefore, this
feature must be applied before all other many-to-one substitutions.

  - The context defined for an `akhn` feature is:

:::{table} `akhn` feature context
    
| Backtrack     | Matching sequence           | Lookahead     |
|:--------------|:----------------------------|:--------------|
| _none_        | `AKHAND_CONSONANT_SEQUENCE` | _none_        |
:::


:::{figure-md}
![KSsa ligation](/images/oriya/oriya-akhn-kssa.svg "KSsa ligation"){.shaping-demo .inline-svg .greyscale-svg #oriya-akhn-kssa}

KSsa ligation
:::

```{svg-color-toggle-button} oriya-akhn-kssa
```


:::{figure-md}
![JNya ligation](/images/oriya/oriya-akhn-jnya.svg "JNya ligation"){.shaping-demo .inline-svg .greyscale-svg #oriya-akhn-jnya}

JNya ligation
:::

```{svg-color-toggle-button} oriya-akhn-jnya
```


#### Stage 3, step 4: rphf ####

The `rphf` feature replaces initial <samp>"Ra,Halant"</samp> sequences with the
<samp>"Reph"</samp> glyph.

  - An initial <samp>"Ra,Halant,ZWJ"</samp> sequence, however, must not be flagged for
    the `rphf` substitution.
	

  - The context defined for a `rphf` feature is:

:::{table} `rphf` feature context
    
| Backtrack        | Matching sequence       | Lookahead     |
|:-----------------|:------------------------|:--------------|
| `SYLLABLE_START` | "Ra"(full),`_halant_`   | _none_        |
:::


:::{figure-md}
![Reph composition](/images/oriya/oriya-rphf.svg "Reph composition"){.shaping-demo .inline-svg .greyscale-svg #oriya-rphf}

Reph composition
:::

```{svg-color-toggle-button} oriya-rphf
```


#### Stage 3, step 5: rkrf ####

> This feature is not used in Oriya.

#### Stage 3, step 6: pref ####

> This feature is not used in Oriya.

#### Stage 3, step 7: blwf ####

The `blwf` feature replaces below-base-consonant glyphs with any
special forms. Oriya includes one special below-base consonant
form:

  - <samp>"Halant,Ra"</samp> (in a non-initial position) takes on the <samp>"Raphala"</samp>
    form. 

:::{figure-md}
![Raphala composition](/images/oriya/oriya-blwf-ra-2.svg "Raphala composition"){.shaping-demo .inline-svg .greyscale-svg #oriya-blwf-ra-2}

Raphala composition
:::

```{svg-color-toggle-button} oriya-blwf-ra-2
```


<!---In addition, all consonants in Oriya can take on subjoined forms.--->

Because Oriya incorporates the `BLWF_MODE_PRE_AND_POST` shaping
characteristic, any pre-base consonants and any post-base consonants
may potentially match a `blwf` substitution; therefore, both cases must
be flagged for comparison. Note that this is not necessarily the case in other
Indic scripts that use a different `BLWF_MODE_` shaping
characteristic. 

  - The context defined for a `blwf` feature is:

:::{table} `blwf` feature context
    
| Backtrack     | Matching sequence        | Lookahead     |
|:--------------|:-------------------------|:--------------|
| `_consonant_` | `_halant_`,`_consonant_` | _none_        |
:::


:::{figure-md}
![Below-base consonant composition](/images/oriya/oriya-blwf.svg "Below-base consonant composition"){.shaping-demo .inline-svg .greyscale-svg #oriya-blwf}

Below-base consonant composition
:::

```{svg-color-toggle-button} oriya-blwf
```


#### Stage 3, step 8: abvf ####

> This feature is not used in Oriya.

#### Stage 3, step 9: half ####

The `half` feature replaces <samp>"_Consonant_,Halant"</samp> sequences before the
base consonant or syllable base with "half forms" of the consonant
glyphs.

In the most common case, this substitution applies to
<samp>"_Consonant_,Halant"</samp> sequences that are followed by another
_Consonant_.

In addition, a sequence matching <samp>"_Consonant_,Halant,ZWJ"</samp> must also be
flagged for potential `half` substitutions.

> Note: The presence of the <samp>"ZWJ"</samp> at the end of the sequence means
> that the sequence may match the regular-expression test in stage 1
> as the end of a syllable, even without being followed by a base
> consonant or syllable base.
>
> The fact that the regular-expression tests identify a syllable break
> after the <samp>"_Consonant_,Halant,ZWJ"</samp> is a byproduct of OpenType
> shaping and Unicode encoding, however, and might not have any
> significance with regard to the definition of syllables used in the
> language or orthography of the text.

There are two exceptions to the default behavior, for which the
shaping engine must test:

  - Initial <samp>"Ra,Halant"</samp> sequences, which should have been flagged for
    the `rphf` feature earlier, must not be flagged for potential
    `half` substitutions.

  - A sequence matching <samp>"_Consonant_,Halant,ZWNJ,_Consonant_"</samp> must not be
    flagged for potential `half` substitutions.


#### Stage 3, step 10: pstf ####

The `pstf` feature replaces post-base-consonant glyphs with any special forms.


:::{figure-md}
![Post-base form Ya composition](/images/oriya/oriya-pstf-ya-1.svg "Post-base form Ya composition"){.shaping-demo .inline-svg .greyscale-svg #oriya-pstf-ya-1}

Post-base form Ya composition
:::

```{svg-color-toggle-button} oriya-pstf-ya-1
```


:::{figure-md}
![Post-base form Yya composition](/images/oriya/oriya-pstf-yya-1.svg "Post-base form Yya composition"){.shaping-demo .inline-svg .greyscale-svg #oriya-pstf-yya-1}

Post-base form Yya composition
:::

```{svg-color-toggle-button} oriya-pstf-yya-1
```


#### Stage 3, step 11: vatu ####

> This feature is not used in Oriya.


#### Stage 3, step 12: cjct ####

The `cjct` feature replaces sequences of adjacent consonants with
conjunct ligatures. These sequences must match <samp>"_Consonant_,Halant,_Consonant_"</samp>.

A sequence matching <samp>"_Consonant_,Halant,ZWJ,_Consonant_"</samp> or
<samp>"_Consonant_,Halant,ZWNJ,_Consonant_"</samp> must not be flagged to form a conjunct.

> Note: The presence of the <samp>"ZWJ"</samp> in a
> <samp>"_Consonant_,Halant,ZWJ,_Consonant_"</samp> sequence should automatically
> inhibit any `cjct` feature rules from matching the sequence as valid
> input, and thus prevent the `cjct` substitution from being applied.

> Note: The presence of the <samp>"ZWNJ"</samp> in a
> <samp>"_Consonant_,Halant,ZWNJ,_Consonant_"</samp> sequence means that the
> <samp>"_Consonant_,Halant,ZWNJ"</samp> subsequence will match the
> regular-expression test in stage 1 as the end of a syllable.
> 
> Because OpenType shaping features in `<ory2>` are defined as
> applying only within an individual syllable, this means that the
> presence of the <samp>"ZWNJ"</samp> will automatically prevent the application of
> a `cjct` feature by triggering the identification of a syllable
> break between the two consonants.
>
> The fact that the regular-expression tests identify a syllable break
> after the <samp>"_Consonant_,Halant,ZWNJ"</samp> is a byproduct of OpenType
> shaping and Unicode encoding, however, and might not have any
> significance with regard to the definition of syllables used in the
> language or orthography of the text.
>
> Note, also: The presence of the <samp>"ZWJ"</samp> means that a
> <samp>"_Consonant_,Halant,ZWJ"</samp> sequence may match the regular-expression
> test in stage 1 as the end of a syllable, even without being
> followed by a base consonant or syllable base. By definition,
> however, a <samp>"_Consonant_,Halant,ZWJ"</samp> syllable identified in stage 1
> cannot also include a <samp>"_Consonant_"</samp> after the <abbr title="Zero-Width Joiner">ZWJ</abbr>.

The font's <abbr title="Glyph Substitution table">GSUB</abbr> rules might be implemented so that `cjct`
substitutions apply to half-form consonants; therefore, this feature
must be applied after the `half` feature. 


:::{figure-md}
![Conjunct ligation](/images/oriya/oriya-cjct.svg "Conjunct ligation"){.shaping-demo .inline-svg .greyscale-svg #oriya-cjct}

Conjunct ligation
:::

```{svg-color-toggle-button} oriya-cjct
```


#### Stage 3, step 13: cfar ####

> This feature is not used in Oriya.


### Stage 4: Final reordering ###

The final reordering stage repositions marks, dependent-vowel (matra)
signs, and <samp>"Reph"</samp> glyphs to the appropriate location with respect to
the base consonant or syllable base. Because multiple substitutions
may have occurred during the application of the basic-shaping features
in the preceding stage, these repositioning moves could not be
performed during the initial reordering stage.

Like the initial reordering stage, the steps involved in this stage
occur on a per-syllable basis.

<!--- Check that classifications have not been mangled. If the -->
<!--character is a Halant AND a ligature was formed AND a multiple
substitution was performed, restore the classification to VIRAMA
because it was almost certainly lost in the preceding <abbr title="Glyph Substitution table">GSUB</abbr> stage.
--->

#### Stage 4, step 1: Base consonant ####

The final reordering stage, like the initial reordering stage, begins
with determining the syllable base of each syllable, following the
same algorithm used in stage 2, step 1.

In a syllable that begins with an independent vowel, the independent
vowel will always serve as the syllable base. In a standalone sequence or
other syllable that begins with a placeholder or a dotted circle, the
placeholder or dotted circle will always serve as the syllable base.

In a syllable that begins with a consonant, the shaping engine must
repeat the base-consonant search algorithm used in stage 2, step 1.

The codepoint of the underlying base consonant or syllable base will
not change between the search performed in stage 2, step 1, and the
search repeated here. However, the application of <abbr title="Glyph Substitution table">GSUB</abbr> shaping
features in stage 3 means that several ligation and many-to-one
substitutions may have taken place. The final glyph produced by that
process may, therefore, be a conjunct or ligature form — in most
cases, such a glyph will not have an assigned Unicode codepoint.
   
#### Stage 4, step 2: Pre-base matras ####

Pre-base dependent vowels (matras) that were reordered during the
initial reordering stage must be moved to their final position. This
position is defined as:
   
   - after the last standalone <samp>"Halant"</samp> glyph that comes after the
     matra's starting position and also comes before the main
     consonant.
   - If a zero-width joiner follows this last standalone <samp>"Halant"</samp>, the
     final matra position is moved to after the joiner.

This means that the matra will move to the right of all explicit
<samp>"consonant,Halant"</samp> subsequences, but will stop to the left of the base
consonant or syllable base, all conjuncts or ligatures that contain
the base consonant or syllable base, and all half forms.

:::{figure-md}
![Pre-base matra position](/images/oriya/oriya-matra-position.svg "Pre-base matra position"){.shaping-demo .inline-svg .greyscale-svg #oriya-matra-position}

Pre-base matra position
:::

```{svg-color-toggle-button} oriya-matra-position
```


> Note: OpenType and Unicode both state that if the syllable includes
> a <abbr title="Zero-Width Joiner">ZWJ</abbr> immediately after the last <samp>"Halant"</samp>, then the final matra
> position should be after the <abbr title="Zero-Width Joiner">ZWJ</abbr>.
>
> However, there are several test sequences indicating that
> Microsoft's Uniscribe shaping engine did not follow this rule (in,
> at least, Devanagari and Bengali text), and in these circumstances
> Uniscribe instead makes the final matra position before the final
> <samp>"Consonant,Halant,ZWJ"</samp>.
>
> Subsequently, the HarfBuzz shaping engine has also followed the same
> pattern. If other shaping engine implementations prefer to maintain
> maximum compatibility with Uniscribe and HarfBuzz, then they should
> also follow suit.

> Note: The Microsoft script-development specifications for OpenType
> shaping also state that if a zero-width non-joiner follows the last
> standalone <samp>"Halant"</samp>, the final matra position is moved to after the
> non-joiner. However, it is unnecessary to test for this condition,
> because a <samp>"Halant,ZWNJ"</samp> subsequence is, by definition, the end of a
> syllable. Consequently, a <samp>"Halant,ZWNJ"</samp> cannot be followed by a
> pre-base dependent vowel.


#### Stage 4, step 3: Reph ####

<samp>"Reph"</samp> must be moved from the beginning of the syllable to its final
position. Because Oriya incorporates the `REPH_POS_AFTER_MAIN`
shaping characteristic, this final position is immediately after the
syllable base.

The algorithm for finding the final <samp>"Reph"</samp> position is

  - Move the <samp>"Reph"</samp> to the position immediately before
    the first post-base matra, syllable modifier, or Vedic sign that
    has a positioning tag after the script's <samp>"Reph"</samp> position in the
    syllable sort order (as listed in [stage
    2](#stage-2-initial-reordering)). This will be the final <samp>"Reph"</samp>
    position. 
	> Note: Because Oriya incorporates the
    > `REPH_POS_AFTER_MAIN` shaping characteristic, this means
    > any positioning tag of `POS_ABOVEBASE_CONSONANT` or later,
    > although a post-base matra, syllable modifier, or Vedic sign
    > would not typically be tagged with `POS_ABOVEBASE_CONSONANT`.
  - If no other location has been located in the previous step, move
    the <samp>"Reph"</samp> to the end of the syllable.

Finally, if the final position of <samp>"Reph"</samp> or <samp>"Repha"</samp> occurs after a
<samp>"_matra_,Halant"</samp> subsequence, then <samp>"Reph"</samp>/<samp>"Repha"</samp> must be repositioned to the
left of <samp>"Halant"</samp>, to allow for potential matching with `abvs` or
`psts` substitutions from <abbr title="Glyph Substitution table">GSUB</abbr>.

:::{figure-md}
![Reph position](/images/oriya/oriya-reph-position.svg "Reph position"){.shaping-demo .inline-svg .greyscale-svg #oriya-reph-position}

Reph position
:::

```{svg-color-toggle-button} oriya-reph-position
```


#### Stage 4, step 4: Pre-base-reordering consonants ####

Any pre-base-reordering consonants must be moved to immediately before
the base consonant or syllable base.
  
Oriya does not use pre-base-reordering consonants, so this step will
involve no work when processing `<ory2>` text. It is included here in order
to maintain compatibility with the other Indic scripts.


#### Stage 4, step 5: Initial matras ####

Any left-side dependent vowels (matras) that are at the start of a
word must be flagged for potential substitution by the `init` feature
of <abbr title="Glyph Substitution table">GSUB</abbr>.

Oriya does not use the `init` feature, so this step will
involve no work when processing `<ory2>` text. It is included here in
order to maintain compatibility with the other Indic scripts.


### Stage 5: Applying all remaining substitution features from <abbr>GSUB</abbr> ###

In this stage, the remaining substitution features from the <abbr title="Glyph Substitution table">GSUB</abbr> table
are applied. In preparation for this stage, glyph sequences should be
flagged for possible application of <abbr title="Glyph Substitution table">GSUB</abbr> features in stage 2,
step 10.

The order in which these features are applied is not canonical; they
should be applied in the order in which they appear in the <abbr title="Glyph Substitution table">GSUB</abbr> table
in the font.

	init (not used in Oriya)
	pres
	abvs
	blws
	psts
	haln

The `init` feature is not used in Oriya.

The `pres` feature replaces pre-base-consonant glyphs with special
presentations forms. This can include consonant conjuncts, half-form
consonants, and stylistic variants of left-side dependent vowels
(matras). 

:::{figure-md}
![Pre-base form substitution](/images/oriya/oriya-pres.svg "Pre-base form substitution"){.shaping-demo .inline-svg .greyscale-svg #oriya-pres}

Pre-base form substitution
:::

```{svg-color-toggle-button} oriya-pres
```


The `abvs` feature replaces above-base-consonant glyphs with special
presentation forms. This usually includes contextual variants of
above-base marks or contextually appropriate mark-and-base ligatures.

:::{figure-md}
![Above-base form substitution](/images/oriya/oriya-abvs.svg "Above-base form substitution"){.shaping-demo .inline-svg .greyscale-svg #oriya-abvs}

Above-base form substitution
:::

```{svg-color-toggle-button} oriya-abvs
```


The `blws` feature replaces below-base-consonant glyphs with special
presentation forms. This usually includes replacing base consonants or
syllable bases that
are adjacent to below-base-consonant forms like <samp>"Raphala"</samp> with
contextual ligatures.

:::{figure-md}
![Below-base form substitution](/images/oriya/oriya-blws.svg "Below-base form substitution"){.shaping-demo .inline-svg .greyscale-svg #oriya-blws}

Below-base form substitution
:::

```{svg-color-toggle-button} oriya-blws
```


The `psts` feature replaces post-base-consonant glyphs with special
presentation forms. This usually includes replacing right-side
dependent vowels (matras) with stylistic variants or replacing
post-base-consonant/matra pairs with contextual ligatures. 

:::{figure-md}
![Post-base form substitution](/images/oriya/oriya-psts.svg "Post-base form substitution"){.shaping-demo .inline-svg .greyscale-svg #oriya-psts}

Post-base form substitution
:::

```{svg-color-toggle-button} oriya-psts
```


The `haln` feature replaces syllable-final <samp>"_Consonant_,Halant"</samp> pairs with
special presentation forms. This can include stylistic variants of the
consonant where placing the <samp>"Halant"</samp> mark on its own is
typographically problematic. 

:::{figure-md}
![Halant form substitution](/images/oriya/oriya-haln.svg "Halant form substitution"){.shaping-demo .inline-svg .greyscale-svg #oriya-haln}

Halant form substitution
:::

```{svg-color-toggle-button} oriya-haln
```

> Note: The `calt` feature, which allows for generalized application
> of contextual alternate substitutions, is usually applied at this
> point. However, `calt` is not mandatory for correct Oriya shaping
> and may be disabled in the application by user preference.


### Stage 6: Applying remaining positioning features from <abbr>GPOS</abbr> ###

In this stage, mark positioning, kerning, and other <abbr title="Glyph Positioning table">GPOS</abbr> features are
applied.

As with the preceding stage, the order in which these features are
applied is not canonical; they should be applied in the order in which
they appear in the <abbr title="Glyph Positioning table">GPOS</abbr> table in the font.

        dist
        abvm
        blwm

> Note: The `kern` feature is usually applied at this stage, if it is
> present in the font. However, `kern` (like `calt`, above) is not
> mandatory for shaping Oriya text and may be disabled by user preference.

The `dist` feature adjusts the horizontal positioning of
glyphs. Unlike `kern`, adjustments made with `dist` do not require the
application or the user to enable any software _kerning_ features, if
such features are optional. 

:::{figure-md}
![Distance positioning](/images/oriya/oriya-dist.svg "Distance positioning"){.shaping-demo .inline-svg .greyscale-svg #oriya-dist}

Distance positioning
:::

```{svg-color-toggle-button} oriya-dist
```


The `abvm` feature positions above-base marks for attachment to base
characters. In Oriya, this includes <samp>"Reph"</samp> in addition to the
above-base dependent vowels (matras), diacritical marks and Vedic signs. 

:::{figure-md}
![Above-base mark position](/images/oriya/oriya-abvm.svg "Above-base mark position"){.shaping-demo .inline-svg .greyscale-svg #oriya-abvm}

Above-base mark position
:::

```{svg-color-toggle-button} oriya-abvm
```


The `blwm` feature positions below-base marks for attachment to base
characters. In Oriya, this includes below-base dependent vowels
(matras) as well as the below-base consonant form <samp>"Raphala"</samp>.

:::{figure-md}
![Below-base mark position](/images/oriya/oriya-blwm.svg "Below-base mark position"){.shaping-demo .inline-svg .greyscale-svg #oriya-blwm}

Below-base mark position
:::

```{svg-color-toggle-button} oriya-blwm
```


## The `<orya>` shaping model ##

The older Oriya script tag, `<orya>`, has been deprecated. However,
shaping engines may still encounter fonts that were built to work with
`<orya>` and some users may still have documents that were written to
take advantage of `<orya>` shaping.

### Distinctions from `<ory2>` ###

The most significant distinction between the shaping models is that the
sequence of <samp>"Halant"</samp> and consonant glyphs used to trigger shaping
features was altered when migrating from `<orya>` to
`<ory2>`. 

Specifically, shaping engines were expected to reorder post-base
<samp>"Halant,_Consonant_"</samp> sequences to <samp>"_Consonant_,Halant"</samp>.

As a result, a font's <abbr title="Glyph Substitution table">GSUB</abbr> substitutions would be written to match
<samp>"_Consonant_,Halant"</samp> sequences in all pre-base and post-base positions.


The `<orya>` syllable

	Pre-baseC Halant BaseC Halant Post-baseC

would be reordered to

	Pre-baseC Halant BaseC Post-baseC Halant

before features are applied.

In `<ory2>` text, as described above in this document, there is no
such reordering. The correct sequence to match for <abbr title="Glyph Substitution table">GSUB</abbr> substitutions is
<samp>"_Consonant_,Halant"</samp> for pre-base consonants, but <samp>"Halant,_Consonant_"</samp>
for post-base consonants.

The old Indic shaping model also did not recognize the
`BLWF_MODE_PRE_AND_POST` shaping characteristic. Instead, `<orya>`
was treated as if it followed the `BLWF_MODE_POST_ONLY`
characteristic. In other words, below-base form substitutions were
only applied to consonants after the base consonant or syllable base.

In addition, for some scripts, left-side dependent vowel marks
(matras) were not repositioned during the final reordering
stage. For `<orya>` text, the left-side matra was always positioned
at the beginning of the syllable.


### Advice for handling fonts with `<orya>` features only ###

Shaping engines may choose to match post-base <samp>"_Consonant_,Halant"</samp>
sequences in order to apply <abbr title="Glyph Substitution table">GSUB</abbr> substitutions when it is known that
the font in use supports only the `<orya>` shaping model.

### Advice for handling text runs composed in `<orya>` format ###

Shaping engines may choose to match post-base <samp>"_Consonant_,Halant"</samp>
sequences for <abbr title="Glyph Substitution table">GSUB</abbr> substitutions or to reorder them to
<samp>"Halant,_Consonant_"</samp> when processing text runs that are tagged with
the `<orya>` script tag and it is known that the font in use supports
only the `<ory2>` shaping model.

Shaping engines may also choose to apply `blwf` substitutions to
below-base consonants occurring before the base consonant or syllable base when it is
known that the font in use supports an applicable substitution lookup.

Shaping engines may also choose to position left-side matras according
to the `<orya>` ordering scheme; however, doing so might interfere
with matching <abbr title="Glyph Substitution table">GSUB</abbr> or <abbr title="Glyph Positioning table">GPOS</abbr> features.


================================================
FILE: opentype-shaping-sinhala.md
================================================
```{include} /_global.md
```

# Sinhala shaping in OpenType #

This document details the shaping procedure needed to display text
runs in the Sinhala script.


**Contents**

  - [General information](#general-information)
  - [Terminology](#terminology)
  - [Glyph classification](#glyph-classification)
      - [Shaping classes and subclasses](#shaping-classes-and-subclasses)
      - [Sinhala character tables](#sinhala-character-tables)
  - [The `<sinh>` shaping model](#the-sinh-shaping-model)
      - [Stage 1: Identifying syllables and other sequences](#stage-1-identifying-syllables-and-other-sequences)
      - [Stage 2: Initial reordering](#stage-2-initial-reordering)
      - [Stage 3: Applying the basic substitution features from <abbr>GSUB</abbr>](#stage-3-applying-the-basic-substitution-features-from-gsub)
      - [Stage 4: Final reordering](#stage-4-final-reordering)
      - [Stage 5: Applying all remaining substitution features from <abbr>GSUB</abbr>](#stage-5-applying-all-remaining-substitution-features-from-gsub)
      - [Stage 6: Applying remaining positioning features from <abbr>GPOS</abbr>](#stage-6-applying-remaining-positioning-features-from-gpos)

## General information ##

The Sinhala script belongs to the Indic family, and follows
the same general patterns as the other Indic scripts. More
specifically, it belongs to the South Indic subgroup.

The Sinhala script is used to write multiple languages, most commonly
Sinhalese and Pali. In addition, Sanskrit may be written
in Sinhala, so Sinhala script runs may include glyphs from the Vedic
Extensions block of Unicode. 

Unlike many other Indic scripts, there is only one extant Sinhala
script tag defined in OpenType, `<sinh>`.


## Terminology ##

OpenType shaping uses a standard set of terms for Indic scripts.  The
terms used colloquially in any particular language may vary, however,
potentially causing confusion.

**Halant** and **Virama** are both standard terms for the below-base "vowel-killer"
sign. Unicode documents use the term "virama" most frequently, while
OpenType documents use the term "halant" most frequently. In the
Sinhalese language, this sign is known as the _al-lakuna_ or _hal kirīma_.

**Chandrabindu** (or simply **Bindu**) is the standard term for the diacritical mark
indicating that the preceding vowel should be nasalized. 

The term **base consonant** is also critical to Indic shaping. The
base consonant of a syllable is the consonant that carries the
syllable's vowel sound, either the inherent vowel (for an unmarked
base consonant) or a dependent vowel (with the addition of a matra).

A syllable's base consonant is generally rendered in its full form
(although it may form ligatures), while other consonants in the
syllable frequently take on secondary forms. Different <abbr title="Glyph Substitution table">GSUB</abbr>
substitutions may apply to a script's **pre-base** and **post-base**
consonants. Some of these substitutions create **above-base** or
**below-base** forms. The **Reph** form of the consonant "Ra" is an
example. In the Sinhalese language, the Reph form is known as _repaya_.

Syllables may also begin with an **independent vowel** instead of a
consonant. In these syllables, the independent vowel is rendered in
full-letter form, not as a matra, and the independent vowel serves as the
syllable base, similar to a base consonant.

Where possible, using the standard terminology is preferred, as the
use of a language-specific term necessitates choosing one language
over all of the others that share a common script.

## Glyph classification ##

Shaping Sinhala text depends on the shaping engine correctly
classifying each glyph in the run. As with most other scripts, the
classifications must distinguish between consonants, vowels
(independent and dependent), numerals, punctuation, and various types
of diacritical mark.

For most codepoints, the `General Category` property defined in the Unicode
standard is correct, but it is not sufficient to fully capture the
expected shaping behavior (such as glyph reordering). Therefore,
Sinhala glyphs must additionally be classified by how they are treated
when shaping a run of text.

### Shaping classes and subclasses ###

The shaping classes listed in the tables that follow are defined so
that they capture the positioning rules used by Indic scripts. 

For most codepoints, the _Shaping class_ is synonymous with the `Indic
Syllabic Category` defined in Unicode. However, there are some
distinctions, where the defined category does not fully capture the
behavior of the character in the shaping process.

Several of the diacritic and syllable-modifying marks behave according
to their own rules and, thus, have a special class. These include
`BINDU`, `VISARGA`, `AVAGRAHA`, `NUKTA`, and `VIRAMA`. Some
less-common marks behave according to rules that are similar to these
common marks, and are therefore classified with the corresponding
common mark. The Vedic Extensions also include a `CANTILLATION`
class for tone marks.

Letters generally fall into the classes `CONSONANT`,
`VOWEL_INDEPENDENT`, and `VOWEL_DEPENDENT`. These classes help the
shaping engine parse and identify key positions in a syllable. For
example, Unicode categorizes dependent vowels as `Mark [Mn]`, but the
shaping engine must be able to distinguish between dependent vowels
and diacritical marks (which are categorized as `Mark [Mn]`).

Other characters, such as symbols and miscellaneous letters (for
example, letter-like symbols that only occur as standalone entities
and do not occur within syllables), need no special attention from the
shaping engine, so they are not assigned a shaping class.

Numbers are classified as `NUMBER`, even though they evoke no special
behavior from the Indic shaping rules, because there are OpenType features that
might affect how the respective glyphs are drawn, such as `tnum`,
which specifies the usage of tabular-width numerals, and `sups`, which
replaces the default glyphs with superscript variants.

Marks and dependent vowels are further labeled with a mark-placement
subclass, which indicates where the glyph will be placed with respect
to the base character to which it is attached. The actual position of
the glyphs is determined by the lookups found in the font's <abbr title="Glyph Positioning table">GPOS</abbr>
table, however, the shaping rules for Indic scripts require that the
shaping engine be able to identify marks by their general
position. 

For example, left-side dependent vowels (matras), classified
with `LEFT_POSITION`, must frequently be reordered, with the final
position determined by whether or not other letters in the syllable
have formed ligatures or combined into conjunct forms. Therefore, the
`LEFT_POSITION` subclass of the character must be tracked throughout
the shaping process.

There are four basic _mark-placement subclasses_ for dependent vowels
(matras). Each corresponds to the visual position of the matra with
respect to the syllable base to which it is attached:

  - `LEFT_POSITION` matras are positioned to the left of the syllable base.
  - `RIGHT_POSITION` matras are positioned to the right of the syllable base.
  - `TOP_POSITION` matras are positioned above the syllable base.
  - `BOTTOM_POSITION` matras are positioned below syllable base.
  
These positions may also be referred to elsewhere in shaping documents as:

  - _Pre-base_ matras
  - _Post-base_ matras
  - _Above-base_ matras
  - _Below-base_ matras
  
respectively. The `LEFT`, `RIGHT`, `TOP`, and `BOTTOM` designations
corresponds to Unicode's preferred terminology. The _Pre_, _Post_,
_Above_, and _Below_ terminology is used in the official descriptions
of OpenType <abbr title="Glyph Substitution table">GSUB</abbr> and <abbr title="Glyph Positioning table">GPOS</abbr> features. Shaping engines may, internally,
use whichever terminology is preferred.

In addition, dependent-vowel codepoints that are composed of multiple
components will be designated in character tables as having a compound
_mark-placement subclass_, such as `TOP_AND_RIGHT` or `LEFT_AND_RIGHT`. 

However, these multi-part matras are decomposed into separate matra
components during the shaping process. After the decomposition, each
matra component will belong to exactly one of the four basic
_mark-placement subclasses_.

For most mark and dependent-vowel codepoints, the _mark-placement
subclass_ is synonymous with the `Indic Positional Category` defined
in Unicode. However, there are some distinctions, where the defined
category does not fully capture the behavior of the character in the
shaping process. 

### Sinhala character tables ###

Separate character tables are provided for the Sinhala, Sinhala
Archaic Numbers, and Vedic Extensions block as well as for other
miscellaneous characters that are used in `<sinh>` text runs:

  - [Sinhala character table](character-tables/character-tables-sinhala.md#sinhala-character-table)
  - [Sinhala Archaic Numbers character table](character-tables/character-tables-sinhala.md#sinhala-archaic-numbers-character-table)
  - [Vedic Extensions character table](character-tables/character-tables-sinhala.md#vedic-extensions-character-table)
  - [Miscellaneous character table](character-tables/character-tables-sinhala.md#miscellaneous-character-table)

The tables list each codepoint along with its Unicode general
category, its shaping class, and its mark-placement subclass. The
codepoint's Unicode name and an example glyph are also provided.

For example:

:::{table} Example character table

| Codepoint | Unicode category | Shaping class     | Mark-placement subclass    | Glyph                        |
|:----------|:-----------------|:------------------|:---------------------------|:-----------------------------|
|`U+0D82`   | Mark [Mn]        | BINDU             | RIGHT_POSITION             | &#x0D82; Anusvara            |
| | | | |
|`U+0D9A`   | Letter           | CONSONANT         | _null_                     | &#x0D9A; Ka                  |
:::


Codepoints with no assigned meaning are designated as _unassigned_ in
the _Unicode category_ column.

Assigned codepoints with a _null_ in the _Shaping class_
column evoke no special behavior from the shaping engine. 

The _Mark-placement subclass_ column indicates mark-placement
positioning for codepoints in the _Mark_ category. Assigned, non-mark
codepoints have a _null_ in this column and evoke no special
mark-placement behavior. Marks tagged with [Mn] in the _Unicode
category_ column are categorized as non-spacing; marks tagged with
[Mc] are categorized as spacing-combining.

Some codepoints in the tables use a _Shaping class_ that
differs from the codepoint's Unicode _General Category_. The _Shaping
class_ takes precedence during OpenType shaping, as it captures more
specific, script-aware behavior.


#### Special-function codepoints ####

Other important characters that may be encountered when shaping runs
of Sinhala text include the dotted-circle placeholder (`U+25CC`), the
zero-width joiner (`U+200D`) and zero-width non-joiner (`U+200C`), and
the no-break space (`U+00A0`).

Each of these is of particular importance to shaping engines, because
these codepoints interact with the shaping engine, the text run, and
the active font, either to mediate non-default shaping behavior or to
relay information about the current shaping process.

The dotted-circle placeholder is frequently used when displaying a
dependent vowel (matra) or a combining mark in isolation. Real-world
text syllables may also use other characters, such as hyphens or dashes,
in a similar placeholder fashion; shaping engines should cope with
this situation gracefully.

Dotted-circle placeholder characters (like any Unicode codepoint) can
appear anywhere in text input sequences and should be rendered
normally. <abbr title="Glyph Positioning table">GPOS</abbr> positioning lookups should attach mark glyphs to dotted
circles as they would to other non-mark characters. As visible glyphs,
dotted circles can also be involved in <abbr title="Glyph Substitution table">GSUB</abbr> substitutions.

In addition to the default input-text handling process, shaping
engines may also insert dotted-circle placeholders into the text
sequence. Dotted-circle insertions are required when a non-spacing
mark or dependent sign is formed with no base character present.

This requirement covers:

  - Dependent signs that are assigned their own individual Unicode
    codepoints (such as most dependent-vowel marks or matras)
  
  - Dependent signs that are formed only by specific sequences of
    other codepoints (such as <samp>"Reph"</samp>)


In other Indic scripts, the zero-width joiner (<abbr>ZWJ</abbr>) is used to prevent
the formation of conjuncts and to suppress the formation of <samp>"Reph"</samp>.

Sinhala, however, differs considerably in its use of <samp>"ZWJ"</samp>.

  - In `<sinh>` text, <samp>"Reph"</samp> is only formed by the use of an explicit
    <samp>"Ra,Halant,ZWJ"</samp> sequence.
  - In `<sinh>` text, the sequence
    <samp>"Consonant_1,Halant,ZWJ,Consonant_2"</samp> is used to specify the
    subjoined form of <samp>"Consonant_2"</samp>.
 
:::{figure-md}
![Reph formation](/images/sinhala/sinhala-rphf.svg "Reph formation"){.shaping-demo .inline-svg .greyscale-svg #sinhala-rphf}

Reph formation
:::

```{svg-color-toggle-button} sinhala-rphf
```


The zero-width non-joiner (<abbr>ZWNJ</abbr>) is not used in shaping runs of
Sinhala text. The <abbr title="Zero-Width Non Joiner">ZWNJ</abbr> is referenced below in various regular
expressions and shaping rules, however, because it is used by other
Indic scripts.

The <abbr title="Zero-Width Joiner">ZWJ</abbr> and <abbr title="Zero-Width Non Joiner">ZWNJ</abbr> characters are, by definition, non-printing control
characters and have the _Default_Ignorable_ property in the Unicode
Character Database. In standard text-display scenarios, their function
is to signal a request from the user to the shaping engine for some
particular non-default behavior. As such, they are not rendered
visually.

> Note: Naturally, there are special circumstances where a user or
> document might need to request that a <abbr title="Zero-Width Joiner">ZWJ</abbr> or <abbr title="Zero-Width Non Joiner">ZWNJ</abbr> be rendered
> visually, such as when illustrating the OpenType shaping process, or
> displaying Unicode tables.

Because the <abbr title="Zero-Width Joiner">ZWJ</abbr> and <abbr title="Zero-Width Non Joiner">ZWNJ</abbr> are non-printing control characters, they can
be ignored by any portion of a software text-handling stack not
involved in the shaping operations that the <abbr title="Zero-Width Joiner">ZWJ</abbr> and <abbr title="Zero-Width Non Joiner">ZWNJ</abbr> are designed
to interface with. For example, spell-checking or collation functions
will typically ignore <abbr title="Zero-Width Joiner">ZWJ</abbr> and <abbr title="Zero-Width Non Joiner">ZWNJ</abbr>.

Similarly, the <abbr title="Zero-Width Joiner">ZWJ</abbr> and <abbr title="Zero-Width Non Joiner">ZWNJ</abbr> should be ignored by the shaping engine
when matching sequences of codepoints against the backtrack and
lookahead sequences of a font's <abbr title="Glyph Substitution table">GSUB</abbr> or <abbr title="Glyph Positioning table">GPOS</abbr> lookups.

For example:

  - A lookup that substitutes an alternate version of a
    dependent-vowel (matra) glyph when it is preceded by <samp>"Ka,Halant,Tta"</samp>
    should still be applied if the dependent-vowel codepoint is preceded
    by <samp>"Ka,Halant,ZWJ,Tta"</samp> in the text run.

The no-break space (<abbr>NBSP</abbr>) is primarily used to display those
codepoints that are defined as non-spacing (marks, dependent vowels
(matras), below-base consonant forms, and post-base consonant forms)
in an isolated context, as an alternative to displaying them
superimposed on the dotted-circle placeholder. These sequences will
match <samp>"NBSP,ZWJ,Halant,_Consonant_"</samp>, <samp>"NBSP,_mark_"</samp>, or <samp>"NBSP,_matra_"</samp>.


## The `<sinh>` shaping model ##

Processing a run of `<sinh>` text involves six top-level stages:

1. Identifying syllables and other sequences
2. Initial reordering
3. Applying the basic substitution features from <abbr>GSUB</abbr>
4. Final reordering
5. Applying all remaining substitution features from <abbr>GSUB</abbr>
6. Applying all remaining positioning features from <abbr>GPOS</abbr>


As with other Indic scripts, the initial reordering stage and the
final reordering stage each involve applying a set of several
script-specific rules. The basic substitution features must be applied
to the run in a specific order. The remaining substitution features in
stage five, however, do not have a mandatory order.

Indic scripts follow many of the same shaping patterns, but they
differ in a few critical characteristics that the shaping engine must
track. These include:

  - The position of the base consonant in a syllable.
  
  - The final position of <samp>"Reph"</samp>.
  
  - Whether <samp>"Reph"</samp> must be requested explicitly or if it is formed by
    a specific, implicit sequence.
	
  - Whether the below-base forms feature is applied only to consonants
    before the syllable base, only to consonants after the base
    consonant, or to both.
	
  - The ordering positions for dependent vowels
    (matras). Specifically, right-side, above-base, and below-base
    matras follow different rules in different scripts. 
	All Indic scripts position left-side matras in the same
    manner, in the ordering position `POS_PREBASE_MATRA`. 

With regard to these common variations, Sinhala's specific shaping
characteristics include: 

  - `BASE_POS_LAST_SINHALA` = The base consonant of a syllable is the last
     consonant, not counting any special final-consonant
     forms. However, the algorithm used for locating the base
     consonant in `<sinh>` text differs from that used by other
     `BASE_POS_LAST` scripts.

  - `REPH_POS_AFTER_POST` = <samp>"Reph"</samp> is ordered after the last post-base
     consonant form.

  - `REPH_MODE_EXPLICIT` = <samp>"Reph"</samp> is formed by an initial <samp>"Ra,Halant,ZWJ"</samp> sequence.

  - `BLWF_MODE_PRE_AND_POST` = The below-forms feature is applied both to
     pre-base consonants and to post-base consonants.

  - `MATRA_POS_TOP` = `POS_AFTER_SUBJOINED` = Above-base matras are
     ordered after subjoined (i.e., below-base) consonant forms. 

  - `MATRA_POS_RIGHT` = `POS_AFTER_SUBJOINED` = Right-side matras are
     ordered after subjoined (i.e., below-base) consonant forms. 

  - `MATRA_POS_BOTTOM` = `POS_AFTER_SUBJOINED` = Below-base matras are
     ordered after all subjoined (i.e., below-base) consonant forms.

These characteristics determine how the shaping engine must reorder
certain glyphs, how base consonants are determined, and how <samp>"Reph"</samp>
should be encoded within a run of text.

### Stage 1: Identifying syllables and other sequences ###

A syllable in Sinhala consists of a valid orthographic sequence
that may be followed by a "tail" of modifier signs. 

> Note: The Sinhala Unicode block enumerates two modifier signs,
> "Anusvara" (`U+0D82`) and "Visarga" (`U+0D83`). In addition,
> Sanskrit text written in Sinhala may include additional signs from
> Vedic Extensions block. 

Each syllable contains exactly one vowel sound. Valid syllables may
begin with either a consonant or an independent vowel. 

If the syllable begins with a consonant, then the consonant that
provides the vowel sound is referred to as the "base" consonant. If
the syllable begins with an independent vowel, that independent vowel
is the syllable's only vowel sound and serves as the "base". 

> Note: A consonant that is not accompanied by a dependent vowel (matra) sign
> carries the script's inherent vowel sound. This vowel sound is changed
> by a dependent vowel (matra) sign following the consonant.

From the shaping engine's perspective, the main distinction between a
syllable with a base consonant and a syllable with an
independent-vowel base is that a syllable with an independent-vowel
base is less likely to include additional consonants in special forms
and less likely to include dependent vowel signs
(matras). Therefore, in the common case, vowel-based syllables may
involve less reordering, substitution feature applications, and other
processing than consonant-based syllables.

In some languages and orthographies, vowel-based syllables are
not permitted to include additional consonants or matras, and certain
<abbr title="Glyph Substitution table">GSUB</abbr> substitution features do not occur. However, there are often
known exceptions, and real-world text makes no such guarantees. 

> Note: Shaping engines may choose to treat independent-vowel bases 
> like base consonants for the sake of simplicity or code
> reuse.
>
> However, implementations that take this approach should note
> that removing the distinction between base consonants and
> independent-vowel bases entirely may have unintended
> consequences. Making guarantees about the correctness of the results
> or about language-specific tests is out of scope for this document.

Generally speaking, the base consonant is the final consonant of the
syllable that does not take on a subjoined form, and its vowel sound
designates the end of the syllable. This rule is synonymous with the
`BASE_POS_LAST_SINHALA` characteristic mentioned earlier. 

Valid consonant-based syllables may include one or more additional 
consonants that precede the base consonant. Each of these
other, pre-base consonants will be followed by the <samp>"Halant"</samp> mark, which
indicates that they carry no vowel. They affect pronunciation by
combining with the base consonant (e.g., "_str_", "_pl_") but they
do not add a vowel sound.

As with other Indic scripts, the consonant <samp>"Ra"</samp> receives special
treatment; in many circumstances it is replaced by a combining
mark-like form. 

  - A <samp>"Ra,Halant,ZWJ"</samp> sequence at the beginning of a syllable
    is replaced with an above-base mark called <samp>"Reph"</samp>. 
    This rule is synonymous with the `REPH_MODE_EXPLICIT`
    characteristic mentioned earlier.

In addition, the subjoined form of a post-base-consonant <samp>"Ra"</samp> can be
explicitly requested with a <samp>"Halant,ZWJ,Ra"</samp> sequence. This form is called
<samp>"Rakaaraansaya"</samp>.

<samp>"Reph"</samp> characters must be reordered after the syllable-identification
stage is complete. <samp>"Rakaaraansaya"</samp> is not reordered.


In addition to valid syllables, standalone sequences may occur, such
as when an isolated codepoint is shown in example text.

> Note: Foreign loanwords, when written in the Sinhala script, may
> not adhere to the syllable-formation rules described above. In
> particular, it is not uncommon to encounter foreign loanwords that
> contain a word-final suffix of consonants.
>
> Nevertheless, such word-final suffixes will be correctly matched by
> the regular expressions listed below. These loanwords are pronounced
> different, which raises issues for potential readers, but the
> character sequences do not affect the shaping process.


Syllables should be identified by examining the run and matching
glyphs, based on their categorization, using regular expressions. 

The following general-purpose Indic-shaping regular expressions can be
used to match Sinhala syllables. 

The regular expressions utilize the shaping classes from the tables
above. For the purpose of syllable identification, more general
classes can be used, as defined in the following table. This
simplifies the resulting expressions. 

```markdown
_ra_		= The consonant "Ra" 
_consonant_	= ( `CONSONANT` | `CONSONANT_DEAD` ) - _ra_
_vowel_		= `VOWEL_INDEPENDENT`
_nukta_	  	= `NUKTA`
_halant_	= `VIRAMA`
_zwj_		= `JOINER`
_zwnj_		= `NON_JOINER`
_matra_		= `VOWEL_DEPENDENT` | `PURE_KILLER`
_syllablemodifier_	= `SYLLABLE_MODIFIER` | `BINDU` | `VISARGA` | `GEMINATION_MARK`
_vedicsign_	= `CANTILLATION`
_placeholder_	= `PLACEHOLDER` | `CONSONANT_PLACEHOLDER` | `NUMBER` 
_dottedcircle_	= `DOTTED_CIRCLE`
_repha_		= `CONSONANT_PRE_REPHA`
_consonantmedial_	= `CONSONANT_MEDIAL`
_symbol_	= `SYMBOL` | `AVAGRAHA`
_consonantwithstacker_	= `CONSONANT_WITH_STACKER`
_other_		= `OTHER`| `MODIFYING_LETTER`
```


> Note: the _ra_ identification class is mutually exclusive with 
> the _consonant_ class. The union of the _consonant_ and _ra_ classes
> is used in the regular expression elements below in order to
> correctly identify <samp>"Ra"</samp> characters that do not trigger <samp>"Reph"</samp> or
> <samp>"Rakaaraansaya"</samp> shaping behavior.
>
> Note, also, that the cantillation mark "combining Ra" in the
> Devanagari Extended block does _not_ belong to the _ra_
> identification class, and that the other "combining consonant"
> cantillation marks in the Devanagari Extended block do not belong to
> the _consonant_ identification class.

> Note: The _placeholder_ identification class includes codepoints
> that are often used in place of vowels or consonants when a document
> needs to display a matra, mark, or special form in isolation or
> in another context beyond a standard syllable. Examples of
> _placeholder_ codepoints include hyphens and non-breaking
> spaces. Sequences that utilize this approach should be identified as
> "standalone" syllables.
>
> The _placeholder_ identification class also includes numerals, which
> are commonly used as word substitutes within normal text. Examples
> include ordinals (e.g., "4th").

> Note: The _other_ identification class includes codepoints that
> do not interact with adjacent characters for shaping purposes. Even
> though some of these codepoints (such as `MODIFYING_LETTER`) can
> occur within words, they evoke no behavior from the shaping
> engine and do not factor into the regular expressions that
> follow. Therefore, the shaping engine may choose to ignore them
> during syllable identification; they are listed here for completeness.

These identification classes form the bases of the following regular
expression elements:

```markdown
C	= _consonant_ | _ra_
Z	= _zwj_ | _zwnj_
REPH	= (_ra_ _halant_) | _repha_
CN		= C _zwj_? _nukta_?
FORCED_RAKAR	= _zwj_ _halant_ _zwj_ _ra_
S	= _symbol_ _nukta_?
MATRA_GROUP	= Z{0,3} _matra_ _nukta_? (_halant_ | FORCED_RAKAR)?
SYLLABLE_TAIL	= (Z? _syllablemodifier_ _syllablemodifier_? _zwnj_?)? _vedicsign_{0,3}
HALANT_GROUP	= Z? _halant_ (_zwj_ _nukta_?)?
FINAL_HALANT_GROUP	= HALANT_GROUP | (_halant_ _zwnj_)
MEDIAL_GROUP	= _consonantmedial_?
HALANT_OR_MATRA_GROUP	= FINAL_HALANT_GROUP | MATRA_GROUP*)
```

> Note: Practically speaking, shaping engines are highly unlikely to
> encounter more than 4 sequential `(MATRA_GROUP)`
> instances in any real-word syllables. Thus, implementations may
> choose to limit occurrences by limiting the above expressions to a
> finite length, such as `(MATRA_GROUP){0,4}` .


Using the above elements, the following regular expressions define the
possible syllable types:

A consonant-based syllable will match the expression:
```markdown
(_repha_|_consonantwithstacker_)? (CN HALANT_GROUP)* CN MEDIAL_GROUP HALANT_OR_MATRA_GROUP SYLLABLE_TAIL
```

> Note: Practically speaking, shaping engines are highly unlikely to
> encounter more than 4 sequential `(CN HALANT_GROUP)`
> instances in any real-word syllables. Thus, implementations may
> choose to limit occurrences by limiting the above expressions to a
> finite length, such as `(CN HALANT_GROUP){0,4}` .

A vowel-based syllable will match the expression:
```markdown
REPH? _vowel_ _nukta_? (_zwj_ | (HALANT_GROUP CN)* MEDIAL_GROUP HALANT_OR_MATRA_GROUP SYLLABLE_TAIL)
```

> Note: Practically speaking, shaping engines are highly unlikely to
> encounter more than 4 sequential `(HALANT_GROUP CN)`
> instances in any real-word syllables. Thus, implementations may
> choose to limit occurrences by limiting the above expressions to a
> finite length, such as `(HALANT_GROUP CN){0,4}` .

A standalone syllable will match the expression:
```markdown
((_repha_|_consonantwithstacker_)? _placeholder_ | REPH? _dottedcircle_) _nukta_? (HALANT_GROUP CN)* MEDIAL_GROUP HALANT_OR_MATRA_GROUP SYLLABLE_TAIL
```

> Note: Practically speaking, shaping engines are highly unlikely to
> encounter more than 4 sequential `(HALANT_GROUP CN)`
> instances in any real-word syllables. Thus, implementations may
> choose to limit occurrences by limiting the above expressions to a
> finite length, such as `(HALANT_GROUP CN){0,4}` .

> Note: Although they are labeled as "standalone syllables" here,
> many sequences that match the standalone regular expression above
> are instances where a document needs to display a matra, combining
> mark, or special form in isolation. Such sequences might not have
> any significance with regard to the definition of syllables used in
> the language or orthography of the text.

A symbol-based syllable will match the expression:
```markdown
S SYLLABLE_TAIL
```

A broken syllable will match the expression:
```markdown
REPH? _nukta_? (HALANT_GROUP CN)* MEDIAL_GROUP HALANT_OR_MATRA_GROUP SYLLABLE_TAIL
```

> Note: Practically speaking, shaping engines are highly unlikely to
> encounter more than 4 sequential `(HALANT_GROUP CN)`
> instances in any real-word syllables. Thus, implementations may
> choose to limit occurrences by limiting the above expressions to a
> finite length, such as `(HALANT_GROUP CN){0,4}` .


The primary problem involved in shaping broken syllables is the lack
of a syllable base (either a base consonant or an independent
vowel). Without a syllable base, the shaping engine cannot perform
<abbr title="Glyph Positioning table">GPOS</abbr> positioning and other contextual operations that are required
later in the shaping process.

To make up for this limitation, shaping engines should insert a
dotted-circle placeholder (`U+25CC`) character into the text stream
where the missing syllable base was expected to occur. This
placeholder allows the shaping process to proceed on a best-effort
basis at handling the broken-syllable sequence, but making guarantees
about the orthographic correctness or preferred appearance of the
final result is out of scope for this document.

Shaping engines can perform this dotted-circle insertion at any point
after the broken syllable has been recognized and before <abbr title="Glyph Substitution table">GSUB</abbr> features
are applied. However, the best results will likely be attained by
performing the insertion immediately, before proceeding to
stage 2. This will enable the maximum number of <abbr title="Glyph Substitution table">GSUB</abbr> and <abbr title="Glyph Positioning table">GPOS</abbr> features
in the active font to be correctly applied to the text run by ensuring
that all reordering, tagging, and sorting algorithms are executed as
usual.

> Note: In software stacks where other text-handling operations, such
> as Unicode normalization and localization, are performed before the
> text run is passed to the shaping engine, there is a potential for
> the dotted-circle insertion to cause unexpected effects.
>
> For example, if a `ccmp` or `locl` feature substitutes the default
> dotted-circle placeholder glyph with a variant glyph of a different
> size or weight for the (`U+25CC`) codepoint, then any shaping engine
> which relies on another software component to handle that
> functionality must take additional care to ensure consistency.


The expressions above use state-machine syntax from the Ragel
state-machine compiler. The operators represent:

```markdown
a* = zero or more copies of a
b+ = one or more copies of b
c? = optional instance of c
d{n} = exactly n copies of d
d{,n} = zero to n copies of d
d{n,} = n or more copies of d
d{n,m} = n to m copies of d
!e = not e
^f = character-level not f
g.h = concatenation of g and h
i|j = i or j
( ) = grouping of expression elements
```


After the syllables have been identified, each of the subsequent 
shaping stages occurs on a per-syllable basis.

### Stage 2: Initial reordering ###

The initial reordering stage is used to relocate glyphs from the
phonetic order in which they occur in a run of text to the
orthographic order in which they are presented visually.

> Note: Primarily, this means moving dependent-vowel (matra) glyphs, 
> <samp>"Ra,Halant,ZWJ"</samp> glyph sequences, and other consonants that take special
> treatment in some circumstances. <samp>"Ya"</samp> may take on special forms,
> depending on its position in the syllable. 
>
> These reordering moves are mandatory. The final-reordering stage
> may make additional moves, depending on the text and on the features
> implemented in the active font.

The syllable should be processed by tagging each glyph with its
intended position based on its ordering category. After all glyphs
have been tagged, the entire syllable should be sorted in stable order,
so that glyphs of the same ordering category remain in the same
relative position with respect to each other.

The final sort order of the ordering categories should be:


	POS_RA_TO_BECOME_REPH
	POS_PREBASE_MATRA
	POS_PREBASE_CONSONANT

	POS_SYLLABLE_BASE
	POS_AFTER_MAIN

	POS_ABOVEBASE_CONSONANT

	POS_BEFORE_SUBJOINED
	POS_BELOWBASE_CONSONANT
	POS_AFTER_SUBJOINED

	POS_BEFORE_POST
	POS_POSTBASE_CONSONANT
	POS_AFTER_POST

	POS_FINAL_CONSONANT
	POS_SMVD


This sort order enumerates all of the possible final positions to
which a codepoint might be reordered, across all of the Indic
scripts. It includes some ordering categories not utilized in
Sinhala. 

The basic positions (left to right) are <samp>"Reph"</samp>
(`POS_RA_TO_BECOME_REPH`), dependent vowels (matras) and consonants
positioned before the base consonant or syllable base
(`POS_PREBASE_MATRA` and `POS_PREBASE_CONSONANT`), the base consonant
or syllable base (`POS_SYLLABLE_BASE`), above-base consonants
(`POS_ABOVEBASE_CONSONANT`), below-base consonants
(`POS_BELOWBASE_CONSONANT`), consonants positioned after the base
consonant or syllable base (`POS_POSTBASE_CONSONANT`), syllable-final
consonants (`POS_FINAL_CONSONANT`), and syllable-modifying or Vedic
signs (`POS_SMVD`).

In addition, several secondary positions are defined to handle various
reordering rules that deal with relative, rather than absolute,
positioning. `POS_AFTER_MAIN` means that a character must be
positioned immediately after the syllable base. `POS_BEFORE_SUBJOINED`
and `POS_AFTER_SUBJOINED` mean that a character must be positioned
before or after any below-base consonants, respectively. Similarly,
`POS_BEFORE_POST` and `POS_AFTER_POST` mean that a character must be
positioned before or after any post-base consonants, respectively. 

For shaping-engine implementers, the names used for the ordering
categories matter only in that they are unambiguous. 

For a definition of the "base" consonant, refer to stage 2, step 1, which
follows.

#### Stage 2, step 1: Base consonant ####

The first step is to determine the base consonant of the syllable, if
there is one, and tag it as `POS_SYLLABLE_BASE`.

In a syllable that begins with an independent vowel, the independent
vowel will always serve as the syllable base, and it should be tagged
as `POS_SYLLABLE_BASE`. The shaping engine can then proceed to step 2.

In a standalone sequence or other syllable that begins with a placeholder
or dotted circle, the placeholder or dotted circle will always serve
as the syllable base, and it should be tagged as
`POS_SYLLABLE_BASE`. The shaping engine can then proceed to step 2.

In a syllable that begins with a consonant, the shaping engine must
determine the base consonant by a script-specific algorithm.

> Note: Shaping engines may choose to treat independent-vowel bases 
> like base consonants for the sake of simplicity or code
> reuse.
>
> However, implementations that take this approach should note
> that removing the distinction between base consonants and
> independent-vowel bases entirely may have unintended
> consequences. Making guarantees about the correctness of the results
> or about language-specific tests is out of scope for this document.

The base consonant is defined as the consonant in a consonant-based
syllable that carries the syllable's vowel sound. That vowel sound
will either be provided by the script's inherent vowel (in which case
it is not written with a separate character) or the sound will be designated
by the addition of a dependent-vowel (matra) sign.

Due to the different usage of <abbr title="Zero-Width Joiner">ZWJ</abbr> characters in `<sinh>` text runs, a
different algorithm is required for the shaper to identify the base
consonant of a syllable. The algorithm for determining the base
consonant in Sinhala is

  - If the syllable starts with <samp>"Ra,Halant,ZWJ"</samp>, exclude the starting
    <samp>"Ra"</samp> from the list of consonants to be considered. 
  - Starting from the end of the syllable, move backwards until a consonant is found.
      * If the consonant is immediately preceded by a <abbr title="Zero-Width Joiner">ZWJ</abbr>, move to the
        previous consonant. If the consonant is not immediately
        preceded by a <abbr title="Zero-Width Joiner">ZWJ</abbr>, stop.
      * If the consonant is the first consonant, stop.
  - The consonant stopped at will be the base consonant.


> Note: Unlike with many other Indic scripts, it is not necessary for
> the shaping engine to independently determine if any consonant has a
> post-base or below-base form in the active font. The use of a <abbr title="Zero-Width Joiner">ZWJ</abbr>
> character before a consonant in the search explicitly designates
> such a special form.


#### Stage 2, step 2: Matra decomposition ####

Second, any multi-part dependent vowels (matras) must be decomposed
into their individual components. 

Sinhala has four multi-part dependent vowels, "Ee" (`U+0DDA`), "O"
(`U+0DDC`), "Oo" (`U+0DDD`), and "Au" (`U+0DDE`). Each
has a canonical decomposition, so this step is unambiguous. 

> "Ee" (`U+0DDA`) decomposes to "`U+0DD9`,`U+0DCA`"
>
> "O" (`U+0DDC`)  decomposes to "`U+0DD9`,`U+0DCF`"
>
> "Oo" (`U+0DDD`) decomposes to "`U+0DD9`,`U+0DCF`, `U+0DCA`"
>
> "Au" (`U+0DDE`) decomposes to "`U+0DD9`,`U+0DDF`"

Because this decomposition is a character-level operation, the shaping
engine may choose to perform it earlier, such as during an initial
Unicode-normalization stage. However, all such decompositions must be
completed before the shaping engine begins step three, below.

> Note: The decomposition of "Oo" (`U+0DDD`) is atypical; Unicode
> specifies that the codepoint decomposes to "O" (`U+0DDC`) followed
> by `U+0DCA`; the "O" codepoint is then decomposed to
> "`U+0DD9`,`U+0DCF`". Shaping engines must take care not to miss this
> second decomposition.

> Note: For Sinhala, the `pstf` substitution feature of <abbr title="Glyph Substitution table">GSUB</abbr> is
> defined as replacing the entire multi-part matra with its right-side
> component. 
>
> The Microsoft Uniscribe shaping engine historically
> supported this behavior -- in a sense, decomposing each matra into
> its left-side component followed by a duplicate of the original
> matra, then substituting the duplicated matra with the right-side
> matra component in [stage 3, step 10](#stage-3-step-10-pstf), when the `pstf`
> feature is applied. 
>
> Fonts that were engineered to support this behavior might not
> include <abbr title="Glyph Positioning table">GPOS</abbr> positioning rules for the right-side matra components,
> relying instead on the `pstf` substitution to provide a suitable
> replacement. Shaping engines should do their best to deal gracefully
> with fonts that were developed only with this behavior in mind.

:::{figure-md}
![Multi-part matra decomposition](/images/sinhala/sinhala-matra-decompose.svg "Multi-part matra decomposition"){.shaping-demo .inline-svg .greyscale-svg #sinhala-matra-decompose}

Multi-part matra decomposition
:::

```{svg-color-toggle-button} sinhala-matra-decompose
```


#### Stage 2, step 3: Tag matras ####

Third, all left-side dependent-vowel (matra) signs must be tagged to be
moved to the beginning of the syllable, with `POS_PREBASE_MATRA`.

Above-base, right-side, and below-base dependent-vowel (matra) signs
must be tagged with `POS_AFTER_SUBJOINED`.

#### Stage 2, step 4: Adjacent marks ####

Fourth, any subsequences of marks that include a <samp>"Nukta"</samp> and a
<samp>"Halant"</samp> or Vedic sign must be reordered so that the <samp>"Nukta"</samp> appears
first.

This means that the subsequence <samp>"Halant,Nukta"</samp> is reordered to
<samp>"Nukta,Halant"</samp> and that the subsequence <samp>"_Vedic_sign_,Nukta"</samp> is
reordered to <samp>"Nukta,_Vedic_sign"</samp>.

For subsequences of affected marks that are longer than two, the
reordering operation must be repeated until the <samp>"Nukta"</samp> is the first
character in the subsequence. No other marks in the subsequence
should be reordered.

This order is canonical in Unicode and is required so that
<samp>"_consonant_,Nukta"</samp> substitution rules from <abbr title="Glyph Substitution table">GSUB</abbr> will be correctly
matched later in the shaping process.

> Note: Nukta usage in Sinhala is rare.

#### Stage 2, step 5: Pre-base consonants ####

Fifth, consonants that occur before the base consonant or syllable base must be tagged
with `POS_PREBASE_CONSONANT`.

#### Stage 2, step 6: Reph ####

Sixth, initial <samp>"Ra,Halant,ZWJ"</samp> sequences that will become <samp>"Reph"</samp>s must be tagged with
`POS_RA_TO_BECOME_REPH`.

> Note: an initial <samp>"Ra,Halant,ZWJ"</samp> sequence will always become a <samp>"Reph"</samp>.

#### Stage 2, step 7: Post-base consonants ####

Seventh, any non-base consonants that occur after a dependent vowel
(matra) sign must be tagged with `POS_POSTBASE_CONSONANT`. 

In Sinhala, the only consonants that can appear in this position are
<samp>"Ra"</samp> and <samp>"Ya"</samp>. A <samp>"Halant,ZWJ,Ya"</samp> sequence after the base consonant or syllable base will take on
the <samp>"Yansaya"</samp> form when the `vatu` feature is applied. A
<samp>"Halant,ZWJ,Ra"</samp> sequence after the base consonant or syllable base will take on 
the <samp>"Rakaaraansaya"</samp> form when the `vatu` feature is applied.

:::{figure-md}
![Yansaya ligation](/images/sinhala/sinhala-vatu-va.svg "Yansaya ligation"){.shaping-demo .inline-svg .greyscale-svg #sinhala-vatu-va}

Yansaya ligation
:::

```{svg-color-toggle-button} sinhala-vatu-va
```

:::{figure-md}
![Rakaaraansaya ligation](/images/sinhala/sinhala-vatu-ra.svg "Rakaaraansaya ligation"){.shaping-demo .inline-svg .greyscale-svg #sinhala-vatu-ra}

Rakaaraansaya ligation
:::

```{svg-color-toggle-button} sinhala-vatu-ra
```


#### Stage 2, step 8: Mark tagging ####

Eighth, all marks must be tagged. 

> Note: In this step, joiner and non-joiner characters must also be
> tagged according to the same rules given for marks, even though
> these characters are not categorized as marks in Unicode.

Marks in the `BINDU`, `VISARGA`, `AVAGRAHA`, `CANTILLATION`,
`SYLLABLE_MODIFIER`, `GEMINATION_MARK`, and `SYMBOL` categories should
be tagged with `POS_SMVD`. 

All <samp>"Nukta"</samp>s must be tagged with the same positioning tag as the
preceding consonant, independent vowel, placeholder, or dotted circle.

All remaining marks (not in the `POS_SMVD` category and not <samp>"Nukta"</samp>s)
must be tagged with the same positioning tag as the closest non-mark
character the mark has affinity with, so that they move together 
during the sorting step.

There are two possible cases: those marks before the syllable base
and those marks after the syllable base. In addition, an exception is
made for <samp>"Halant"</samp> marks that follow a left-side (pre-base) matra.

  1. Initially, all remaining marks should be tagged with the same
	 positioning tag as the closest preceding consonant.

  2. For each consonant after the syllable base (such as post-base
	 consonants, below-base consonants, or final consonants), all
	 remaining marks located between that current consonant and any
	 previous consonant should be tagged with the same positioning tag as
	 the current (later) consonant.
  
     In other words, all consonants preceding the syllable base "own" the
	 marks that follow them, while all consonants after the syllable base
	 "own" the marks that come before them. When a syllable does not have
	 any consonants after the syllable base, the syllable base should
	 "own" all the marks that follow it.
  
  3. Finally, <samp>"Halant"</samp> marks that follow a left-side dependent vowel
     (matra) should _not_ be tagged with the left-side matra's
     positioning tag. Instead, the <samp>"Halant"</samp> should be tagged with the
     positioning tag of the non-mark character preceding the left-side
     matra. This prevents the <samp>"Halant"</samp> mark from being moved with the
     left-side matra when the syllable is sorted.


<!--- HarfBuzz also tags everything between a post-base consonant or -->
<!--matra and another post-base consonant as belonging to the latter -->
<!--post-base consonant. --->


#### Stage 2, step 9: Sort syllable ####

With these steps completed, the syllable can be sorted into the final
sort order as listed at the beginning of stage 2.

The glyphs in the syllable should be sorted in stable order,
so that glyphs of the same ordering category remain in the same
relative position with respect to each other.


#### Stage 2, step 10: Flag sequences for possible feature applications ####

With the initial reordering complete, those glyphs in the syllable that
may have <abbr title="Glyph Substitution table">GSUB</abbr> or <abbr title="Glyph Positioning table">GPOS</abbr> features applied in stages 3, 5, and 6 should be
flagged for each potential feature. 

This flagging is preliminary; the set of potential features varies
between different scripts and which features are supported varies
between fonts. It is also possible that the application of
one feature on a glyph sequence will perform a substitution that makes
a later feature no longer applicable to the updated sequence.

Consequently, the flagging must be completed before shaping proceeds
to the stages during which features are applied.

Some shaping features, such as `locl`, can potentially apply to any
glyphs. Therefore it is not necessary to maintain a separate flag for
these features in the bitmask (or other data structure) used to track
the flags -- although shaping engines may do so if desired.

The sequences to flag are summarized in the list below; a full
description of each feature's function and interpretation is provided
in <abbr title="Glyph Substitution table">GSUB</abbr> and <abbr title="Glyph Positioning table">GPOS</abbr> application stages that follow.

  - `akhn` should match <samp>"_Consonant_,Halant,ZWJ,_Consonant_"</samp> and
           <samp>"_Consonant_,ZWJ,Halant,_Consonant_"</samp> sequences
  - `rphf` should match initial <samp>"Ra,Halant,ZWJ"</samp> sequences
  - `pstf` should match <samp>"_Matra_"</samp> in post-base position
  - `vatu` should match <samp>"Halant,ZWJ,Ra"</samp> and <samp>"Halant,ZWJ,Va"</samp>


### Stage 3: Applying the basic substitution features from <abbr>GSUB</abbr> ###

The basic-substitution stage applies mandatory substitution features
using the rules in the font's <abbr title="Glyph Substitution table">GSUB</abbr> table. In preparation for this
stage, glyph sequences should be flagged for possible application 
of <abbr title="Glyph Substitution table">GSUB</abbr> features in stage 2, step 10.

The order in which these substitutions must be performed is fixed for
all Indic scripts:

	locl
	nukt (not used in Sinhala)
	akhn
	rphf 
	rkrf (not used in Sinhala)
	pref (not used in Sinhala)
	blwf (not used in Sinhala)
	abvf (not used in Sinhala)
	half (not used in Sinhala)
	pstf
	vatu
	cjct (not used in Sinhala)
	cfar (not used in Sinhala)

#### Stage 3, step 1: locl ####

The `locl` feature replaces default glyphs with any language-specific
variants, based on examining the language setting of the text run.

> Note: Strictly speaking, the use of localized-form substitutions is
> not part of the shaping process, but of the localization process,
> and could take place at an earlier point while handling the text
> run. However, shaping engines are expected to complete the
> application of the `locl` feature before applying the subsequent
> <abbr title="Glyph Substitution table">GSUB</abbr> substitutions in the following steps.

#### Stage 3, step 2: nukt ####

> This feature is not used in Sinhala.


#### Stage 3, step 3: akhn ####

In Sinhala, the `akhn` feature provides two substitution types.

  - <samp>"Consonant,Halant,ZWJ,Consonant"</samp> sequences are used to specify a ligature. 
  - <samp>"Consonant,ZWJ,Halant,Consonant"</samp> sequences are used to specify
    "touching consonant" substitutions used in Pali and Sanskrit. 
  

:::{figure-md}
![Ligature substitution](/images/sinhala/sinhala-akhn-ligature.svg "Ligature substitution"){.shaping-demo .inline-svg .greyscale-svg #sinhala-akhn-ligature}

Ligature substitution
:::

```{svg-color-toggle-button} sinhala-akhn-ligature
```

:::{figure-md}
![Touching consonant substitution](/images/sinhala/sinhala-akhn-touching.svg "Touching consonant substitution"){.shaping-demo .inline-svg .greyscale-svg #sinhala-akhn-touching}

Touching consonant substitution
:::

```{svg-color-toggle-button} sinhala-akhn-touching
```


#### Stage 3, step 4: rphf ####

The `rphf` feature replaces initial <samp>"Ra,Halant,ZWJ"</samp> sequences with the
<samp>"Reph"</samp> glyph.
	

:::{figure-md}
![Reph composition](/images/sinhala/sinhala-rphf-1.svg "Reph composition"){.shaping-demo .inline-svg .greyscale-svg #sinhala-rphf-1}

Reph composition
:::

```{svg-color-toggle-button} sinhala-rphf-1
```
	
#### Stage 3, step 5: rkrf ####

> This feature is not used in Sinhala.


#### Stage 3, step 6: pref ####

> This feature is not used in Sinhala.


#### Stage 3, step 7: blwf ####

> This feature is not used in Sinhala.


#### Stage 3, step 8: abvf ####

> This feature is not used in Sinhala.


#### Stage 3, step 9: half ####

> This feature is not used in Sinhala.


#### Stage 3, step 10: pstf ####

In Sinhala, the `pstf` feature replaces multi-part dependent vowels
(matras) with the right-side matra component of the canonical
decomposition.

> Note: This substitution is possible because all multi-part dependent
> vowels in Sinhala use the same left-side matra component, `U+0DD9`.
>
> The Microsoft Uniscribe shaping engine historically
> supported this behavior by handling the decomposition of multi-part
> dependent vowels in [stage 2, step 2](#stage-2-step-2-matra-decomposition)
> differently for Sinhala -- in a sense, decomposing each matra into
> its left-side component followed by a duplicate of the original
> matra, then substituting the duplicated matra with the right-side
> matra component when the `pstf` feature is applied. 
> 
> Shaping engines may, optionally, decompose multi-part dependent
> vowels in [stage 2, step 2](#stage-2-step-2-matra-decomposition) into their
> canonical Unicode decompositions, as is done in other scripts, and
> substitute the decomposed right-side matra components at that point.
> 
> Doing so will negate the need to apply the `pstf` substitution.
> However, fonts that were engineered to support the
> Uniscribe-supported behavior might not include <abbr title="Glyph Positioning table">GPOS</abbr> positioning
> rules for the right-side matra components, relying instead on the
> `pstf` substitution to provide a suitable replacement. Shaping
> engines should do their best to deal gracefully with fonts that were
> developed only with this behavior in mind.

:::{figure-md}
![Post-base form substitution](/images/sinhala/sinhala-pstf.svg "Post-base form substitution"){.shaping-demo .inline-svg .greyscale-svg #sinhala-pstf}

Post-base form substitution
:::

```{svg-color-toggle-button} sinhala-pstf
```


#### Stage 3, step 11: vatu ####

In Sinhala, the `vatu` feature replaces certain sequences with
ligatures using the subjoined forms of <samp>"Ra"</samp> or <samp>"Ya"</samp>.

  - The sequence <samp>"Consonant,Halant,ZWJ,Ra"</samp> triggers the
    <samp>"Rakaaraansaya"</samp> form of the consonant.
  - The sequence <samp>"Consonant,Halant,ZWJ,Ya"</samp> triggers the <samp>"Yansaya"</samp> form
    of the consonant.
  

:::{figure-md}
![Rakaaraansaya ligation](/images/sinhala/sinhala-vatu-ra-1.svg "Rakaaraansaya ligation"){.shaping-demo .inline-svg .greyscale-svg #sinhala-vatu-ra-1}

Rakaaraansaya ligation
:::

```{svg-color-toggle-button} sinhala-vatu-ra-1
```

:::{figure-md}
![Yansaya ligation](/images/sinhala/sinhala-vatu-va-1.svg "Yansaya ligation"){.shaping-demo .inline-svg .greyscale-svg #sinhala-vatu-va-1}

Yansaya ligation
:::

```{svg-color-toggle-button} sinhala-vatu-va-1
```


#### Stage 3, step 12: cjct ####

> This feature is not used in Sinhala.


#### Stage 3, step 13: cfar ####

> This feature is not used in Sinhala.


### Stage 4: Final reordering ###

The final reordering stage repositions marks, dependent-vowel (matra)
signs, and <samp>"Reph"</samp> glyphs to the appropriate location with respect to
the base consonant or syllable base. Because multiple substitutions
may have occurred during the application of the basic-shaping features
in the preceding stage, these repositioning moves could not be
performed during the initial reordering stage.

Like the initial reordering stage, the steps involved in this stage
occur on a per-syllable basis.


#### Stage 4, step 1: Base consonant ####

The final reordering stage, like the initial reordering stage, begins
with determining the syllable base of each syllable, following the
same algorithm used in stage 2, step 1.

In a syllable that begins with an independent vowel, the independent
vowel will always serve as the syllable base. In a standalone sequence or
other syllable that begins with a placeholder or a dotted circle, the
placeholder or dotted circle will always serve as the syllable base.

In a syllable that begins with a consonant, the shaping engine must
repeat the base-consonant search algorithm used in stage 2, step 1.

The codepoint of the underlying base consonant or syllable base will
not change between the search performed in stage 2, step 1, and the
search repeated here. However, the application of <abbr title="Glyph Substitution table">GSUB</abbr> shaping
features in stage 3 means that several ligation and many-to-one
substitutions may have taken place. The final glyph produced by that
process may, therefore, be a conjunct or ligature form — in most
cases, such a glyph will not have an assigned Unicode codepoint.
   
#### Stage 4, step 2: Pre-base matras ####

Pre-base dependent vowels (matras) that were reordered during the
initial reordering stage must be moved to their final position. This
position is defined as:
   
   - after the last standalone <samp>"Halant"</samp> glyph that comes after the
     matra's starting position and also comes before the main
     consonant.
   - If a zero-width joiner follows this last standalone <samp>"Halant"</samp>, the
     final matra position is moved to after the joiner.

This means that the matra will move to the right of all explicit
<samp>"consonant,Halant"</samp> subsequences, but will stop to the left of the base
consonant or syllable base, all conjuncts or ligatures that contain
the base consonant or syllable base, and all half forms.

:::{figure-md}
![Pre-base matra positioning](/images/sinhala/sinhala-matra-position.svg "Pre-base matra positioning"){.shaping-demo .inline-svg .greyscale-svg #sinhala-matra-position}

Pre-base matra positioning
:::

```{svg-color-toggle-button} sinhala-matra-position
```


> Note: OpenType and Unicode both state that if the syllable includes
> a <abbr title="Zero-Width Joiner">ZWJ</abbr> immediately after the last <samp>"Halant"</samp>, then the final matra
> position should be after the <abbr title="Zero-Width Joiner">ZWJ</abbr>.
>
> However, there are several test sequences indicating that
> Microsoft's Uniscribe shaping engine did not follow this rule (in,
> at least, Devanagari and Bengali text), and in these circumstances
> Uniscribe instead makes the final matra position before the final
> <samp>"Consonant,Halant,ZWJ"</samp>.
>
> Subsequently, the HarfBuzz shaping engine has also followed the same
> pattern. If other shaping engine implementations prefer to maintain
> maximum compatibility with Uniscribe and HarfBuzz, then they should
> also follow suit.

> Note: The Microsoft script-development specifications for OpenType
> shaping also state that if a zero-width non-joiner follows the last
> standalone <samp>"Halant"</samp>, the final matra position is moved to after the
> non-joiner. However, it is unnecessary to test for this condition,
> because a <samp>"Halant,ZWNJ"</samp> subsequence is, by definition, the end of a
> syllable. Consequently, a <samp>"Halant,ZWNJ"</samp> cannot be followed by a
> pre-base dependent vowel.


#### Stage 4, step 3: Reph ####

<samp>"Reph"</samp> must be moved from the beginning of the syllable to its final
position. Because Sinhala incorporates the `REPH_POS_AFTER_POST`
shaping characteristic, this final position is defined to be
immediately after any post-base consonant forms.

The algorithm for finding the final <samp>"Reph"</samp> position is

  - Move the <samp>"Reph"</samp> to the position immediately before
    the first post-base matra, syllable modifier, or Vedic sign that
    has a positioning tag after the script's <samp>"Reph"</samp> position in the
    syllable sort order (as listed in [stage
    2](#stage-2-initial-reordering)). This will be the final <samp>"Reph"</samp>
    position. 
	> Note: Because Sinhala incorporates the
    > `REPH_POS_AFTER_POST` shaping characteristic, this means
    > any positioning tag of `POS_FINAL_CONSONANT` or later,
    > although a post-base matra, syllable modifier, or Vedic sign
    > would not typically be tagged with `POS_FINAL_CONSONANT`.
  - If no other location has been located in the previous step, move
    the <samp>"Reph"</samp> to the end of the syllable.

Finally, if the final position of <samp>"Reph"</samp> or <samp>"Repha"</samp> occurs after a
<samp>"_matra_,Halant"</samp> subsequence, then <samp>"Reph"</samp>/<samp>"Repha"</samp> must be repositioned to the
left of <samp>"Halant"</samp>, to allow for potential matching with `abvs` or
`psts` substitutions from <abbr title="Glyph Substitution table">GSUB</abbr>.


:::{figure-md}
![Reph positioning](/images/sinhala/sinhala-reph-position.svg "Reph positioning"){.shaping-demo .inline-svg .greyscale-svg #sinhala-reph-position}

Reph positioning
:::

```{svg-color-toggle-button} sinhala-reph-position
```


#### Stage 4, step 4: Pre-base-reordering consonants ####

Any pre-base-reordering consonants must be moved to immediately before
the base consonant or syllable base.

Sinhala does not use pre-base-reordering consonants, so this step will
involve no work when processing `<sinh>` text. It is included here in order
to maintain compatibility with the other Indic scripts.
  
  
#### Stage 4, step 5: Initial matras ####

Any left-side dependent vowels (matras) that are at the start of a
word must be flagged for potential substitution by the `init` feature
of <abbr title="Glyph Substitution table">GSUB</abbr>.

Sinhala does not use the `init` feature, so this step will
involve no work when processing `<sinh>` text. It is included here in
order to maintain compatibility with the other Indic scripts.

   
### Stage 5: Applying all remaining substitution features from <abbr>GSUB</abbr> ###

In this stage, the remaining substitution features from the <abbr title="Glyph Substitution table">GSUB</abbr> table
are applied. In preparation for this stage, glyph sequences should be
flagged for possible application of <abbr title="Glyph Substitution table">GSUB</abbr> features in stage 2,
step 10.

The order in which these features are applied is not canonical; they
should be applied in the order in which they appear in the <abbr title="Glyph Substitution table">GSUB</abbr> table
in the font.

	init (not used in Sinhala)
	pres
	abvs
	blws
	psts
	haln (not used in Sinhala)

The `init` feature is not used in Sinhala.

The `pres` feature replaces pre-base-consonant glyphs with special
presentations forms. This can include ligatures, "touching consonant" forms,
and stylistic variants of left-side dependent vowels (matras). 

:::{figure-md}
![Pre-base substitutions](/images/sinhala/sinhala-pres.svg "Pre-base substitutions"){.shaping-demo .inline-svg .greyscale-svg #sinhala-pres}

Pre-base substitutions
:::

```{svg-color-toggle-button} sinhala-pres
```


The `abvs` feature replaces above-base-consonant glyphs with special
presentation forms. This usually includes contextual variants of
above-base marks or contextually appropriate mark-and-base ligatures.

:::{figure-md}
![Above-base substitutions](/images/sinhala/sinhala-abvs.svg "Above-base substitutions"){.shaping-demo .inline-svg .greyscale-svg #sinhala-abvs}

Above-base substitutions
:::

```{svg-color-toggle-button} sinhala-abvs
```


The `blws` feature replaces below-base-consonant glyphs with special
presentation forms. This usually includes replacing base consonants or
syllable bases
and attached below-base marks with contextual ligatures.

:::{figure-md}
![Below-base substitutions](/images/sinhala/sinhala-blws.svg "Below-base substitutions"){.shaping-demo .inline-svg .greyscale-svg #sinhala-blws}

Below-base substitutions
:::

```{svg-color-toggle-button} sinhala-blws
```

The `psts` feature replaces post-base-consonant glyphs with special
presentation forms. This usually includes replacing right-side
dependent vowels (matras) with stylistic variants or replacing
base-consonant/matra pairs with contextual ligatures. 

:::{figure-md}
![Post-base substitutions](/images/sinhala/sinhala-psts.svg "Post-base substitutions"){.shaping-demo .inline-svg .greyscale-svg #sinhala-psts}

Post-base substitutions
:::

```{svg-color-toggle-button} sinhala-psts
```


The `haln` feature is not used in Sinhala.

> Note: The `calt` feature, which allows for generalized application
> of contextual alternate substitutions, is usually applied at this
> point. However, `calt` is not mandatory for correct Sinhala shaping
> and may be disabled in the application by user preference.


### Stage 6: Applying remaining positioning features from <abbr>GPOS</abbr> ###

In this stage, mark positioning, kerning, and other <abbr title="Glyph Positioning table">GPOS</abbr> features are
applied.

As with the preceding stage, the order in which these features are
applied is not canonical; they should be applied in the order in which
they appear in the <abbr title="Glyph Positioning table">GPOS</abbr> table in the font.

        dist
        abvm
        blwm

> Note: The `kern` feature is usually applied at this stage, if it is
> present in the font. However, `kern` (like `calt`, above) is not
> mandatory for shaping Sinhala text and may be disabled by user preference.

The `dist` feature adjusts the horizontal positioning of
glyphs. Unlike `kern`, adjustments made with `dist` do not require the
application or the user to enable any software _kerning_ features, if
such features are optional. 

:::{figure-md}
![Distance positioning](/images/sinhala/sinhala-dist.svg "Distance positioning"){.shaping-demo .inline-svg .greyscale-svg #sinhala-dist}

Distance positioning
:::

```{svg-color-toggle-button} sinhala-dist
```

The `abvm` feature positions above-base marks for attachment to base
characters. In Sinhala, this includes <samp>"Reph"</samp> in addition to
above-base dependent vowels (matras), diacritical marks, and Vedic signs. 

:::{figure-md}
![Above-base mark positioning](/images/sinhala/sinhala-abvm.svg "Above-base mark positioning"){.shaping-demo .inline-svg .greyscale-svg #sinhala-abvm}

Above-base mark positioning
:::

```{svg-color-toggle-button} sinhala-abvm
```

The `blwm` feature positions below-base marks for attachment to base
characters. In Sinhala, this includes below-base dependent vowels
(matras) and diacritical marks.

:::{figure-md}
![Below-base mark positioning](/images/sinhala/sinhala-blwm.svg "Below-base mark positioning"){.shaping-demo .inline-svg .greyscale-svg #sinhala-blwm}

Below-base mark positioning
:::

```{svg-color-toggle-button} sinhala-blwm
```


================================================
FILE: opentype-shaping-syriac.md
================================================
```{include} /_global.md
```

# Syriac script shaping in OpenType #

This document details the general shaping procedure shared by all
Syriac script styles, and defines the common pieces that style-specific
implementations share. 


**Contents**

  - [General information](#general-information)
  - [Terminology](#terminology)
  - [Glyph classification](#glyph-classification)
      - [Joining properties](#joining-properties)
	  - [Mark classification](#mark-classification)
	  - [Character tables](#character-tables)
  - [The `<syrc>` shaping model](#the-syrc-shaping-model)
      - [Stage 1: Transient reordering of modifier combining marks](#stage-1-transient-reordering-of-modifier-combining-marks)
      - [Stage 2: Compound character composition and decomposition](#stage-2-compound-character-composition-and-decomposition)
      - [Stage 3: Computing letter joining states](#stage-3-computing-letter-joining-states)
      - [Stage 4: Applying the `stch` feature](#stage-4-applying-the-stch-feature)
      - [Stage 5: Applying the language-form substitution features from <abbr>GSUB</abbr>](#stage-5-applying-the-language-form-substitution-features-from-gsub)
      - [Stage 6: Applying the typographic-form substitution features from <abbr>GSUB</abbr>](#stage-6-applying-the-typographic-form-substitution-features-from-gsub)
      - [Stage 7: Applying the positioning features from <abbr>GPOS</abbr>](#stage-7-applying-the-positioning-features-from-gpos)
  

## General information ##

The Syriac script is used to write multiple languages, most commonly
Classical Syriac and multiple dialects of Aramaic. In addition,
historical texts use Syriac to write Arabic, Malayalam, Turkish,
Kurdish, and Armenian.

The Syriac script encompasses multiple distinct styles, including
ʾEsṭrangēlā (classical), Maḏnḥāyā (Eastern), and Serṭā (Western), that
share a number of common features and rules, but that differ
considerably in their final appearance. Due to the common features
found between the styles, a shaping engine can support all styles of
Syriac with a single shaping model.

In OpenType, Syriac shaping shares most of the same features that are
defined for [Arabic](opentype-shaping-arabic.md) and related scripts, but with a few
Syriac-specific additions. Therefore, shaping engines are advised to
support Syriac and Arabic using the [same shaping model](opentype-shaping-arabic-general.md).

Syriac is a joining script that uses inter-word spaces, so each
codepoint in a text run may be substituted with one of several
contextual forms corresponding to what, if any, characters appear
before and after the codepoint. Most, but not all, letter sequences
join; shaping engines must track which positions trigger joining
behavior for each letter. 

:::{figure-md}
![Isolated, initial, medial, and final contextual forms of a letter](/images/syriac/syriac-joining.svg "Isolated, initial, medial, and final contextual forms of a letter"){.shaping-demo .inline-svg .greyscale-svg #syriac-joining}

Isolated, initial, medial, and final contextual forms of a letter
:::

```{svg-color-toggle-button} syriac-joining
```

Syriac is written (and, therefore, rendered) from right to
left. Shaping engines must track the directionality of the text run
when scripts of different direction are mixed.

## Terminology ##

OpenType shaping uses a standard set of terms for elements of the
Syriac script. The terms used colloquially in any particular language
may vary, however, potentially causing confusion.

**Base** glyph or character is the standard term for a Syriac
character that is capable of taking a diacritical mark. 

All of the base characters in Syriac are consonants by definition, but
several of these consonants are also used to represent vowels as base
characters in certain circumstances.

Vowels that are not base characters are frequently omitted from the
text run entirely. Alternatively, such a vowel may appear as a
diacritical mark in the Maḏnḥāyā and Serṭā script styles. The standard
term for these marks is vowel **points**.

**Kashida** (or **tatweel**) is the term for a glyph inserted into a
sequence for the purpose of elongating the baseline stroke of a
letter. Unicode documents use the term "tatweel" most frequently,
while OpenType documents use the term "kashida" most
frequently. Kashidas are typically inserted in order to justify lines
of text. 

**Majlīyānā** is the name for the diacritical mark that is attached to
a native Syriac letter in order to change it to a foreign loan letter.

**Syāmē** is the name for the diacritical mark that is used to
indicate the pluralization of a word.

The **Syriac Abbreviation Mark** is a Unicode control character used
to trigger the addition of an overline glyph that may span the length
of multiple letters. The Syriac Abbreviation Mark is often used to
denote the elision of letters from a word; it can also be used to
denote that a sequence of letters represents a number rather than a
word.


## Glyph classification ##

Because Syriac is a joining (or cursive) script, proper shaping of
text runs involves identifying the joining behavior of each character,
then combining that information with any preceding or subsequent
characters to determine the contextually correct form for display.

### Joining properties ###

Syriac characters are assigned a `JOINING_TYPE` property in the
Unicode standard that indicates how they join to adjacent
characters. There are six possible values: 

  - `JOINING_TYPE_LEFT` indicates that a character joins with
    the subsequent character, but does not join with the preceding
    character. 
	
  - `JOINING_TYPE_RIGHT` indicates that a character joins with the
    preceding character, but does not join with the subsequent character.	

  - `JOINING_TYPE_DUAL` indicates that a character joins with the
    preceding character and joins with the subsequent character.
	
  - `JOINING_TYPE_NON_JOINING` indicates that a character does not
    join with the preceding or with the subsequent character.
	
  - `JOINING_TYPE_TRANSPARENT` indicates that the character does not
    join with adjacent characters _and_ that the character must be
    skipped over when the shaping engine is evaluating the joining
    positions in a sequence of characters. When a
    `JOINING_TYPE_TRANSPARENT` character is encountered in a sequence,
    the `JOINING_TYPE` of the preceding character passes
    through. Diacritical marks are frequently assigned this value. 
	
  - `JOINING_TYPE_JOIN_CAUSING` indicates that the character forces
    the use of joining forms with the preceding and subsequent
    characters. Kashidas and the Zero Width Joiner (`U+200D`) are both
    `JOIN_CAUSING` characters.
  

Syriac letters are also assigned to a `JOINING_GROUP` that indicates
which fundamental character they behave like with regard to joining
behavior. Each of the basic letters in the Syriac block tends to
belong to its own `JOINING_GROUP`, while extended letters are often
assigned to the `JOINING_GROUP` that corresponds to the character's
base letter. 

For example, the letter "Persian Bheth" is rendered as the base Syriac
"Beth" with an additional stroke at the top. Therefore, it is assigned
to the `BETH` joining group.

In addition to the standard joining types, `<syrc>` text features two
`JOINING_GROUP`s that trigger special behavior: `ALAPH` and
`DALATH_RISH`.

The `fin2`, `fin3`, and `med2` <abbr title="Glyph Substitution table">GSUB</abbr> features implement Syriac-specific
shaping rules that affect glyphs in the `ALAPH` joining group, based
on the preceding glyph.

  - `fin2` and `fin3` substitute special terminal forms of `ALAPH`
    glyphs, depending on whether or not the preceding character
    belongs to the `DALATH_RISH` joining group.
  - `med2` substitutes special medial forms of `ALAPH` glyphs,
    depending on whether or not the preceding character is
    left-joining (that is, belonging to the `DUAL`, `LEFT`, or
    `JOIN_CAUSING` `JOINING_GROUP`s.)

The `DALATH_RISH` joining group includes the standard letters "Dalath"
and "Rish" as well as the "Dotless Dalath-Rish", an ambiguous letter
that is used in Old Syriac text, when neither the "Dalath" and "Rish"
letters featured a dot, and may also be used in transcribing
historical documents where it is impossible to distinguish whether the
letter in the source text is "Dalath" or "Rish".

:::{figure-md}
![Dalath, Rish, Dotless Dalath-Rish](/images/syriac/syriac-dalath-rish.svg "Dalath, Rish, Dotless Dalath-Rish"){.shaping-demo .inline-svg .greyscale-svg #syriac-dalath-rish}

Dalath, Rish, Dotless Dalath-Rish
:::

```{svg-color-toggle-button} syriac-dalath-rish
```


Shaping engines may choose to define pseudo-`JOINING_TYPE`s
corresponding to the `ALAPH` and `DALATH_RISH` joining groups, or may
track the appropriate `JOINING_GROUP` properties by any other means
preferred.


### Mark classification ###

The Unicode standard defines a _canonical combining class_ for each
codepoint that is used whenever a sequence needs to be sorted into
canonical order. 

Several of the Syriac marks belong to standard combining
classes:

:::{table} Mark-classification table

| Codepoint | Combining class | Glyph                              |
|:----------|:----------------|:-----------------------------------|
|`U+0711`   | 36              | &#x0711; Superscript Alaph         |
|           | 220             | Other below-base combining marks   |
|           | 230             | Other above-base combining marks   |
:::


The numeric values of these combining classes are used during Unicode
normalization.


These classifications are used in the [mark-transient-reordering
stage](#stage-1-transient-reordering-of-modifier-combining-marks).

			
### Character tables ###

Separate character tables are provided for the Syriac and Syriac
Supplement Unicode blocks, as well as for other miscellaneous
characters that are used in `<syrc>` text runs:

  - [Syriac character table](character-tables/character-tables-syriac.md#syriac-character-table)
  - [Syriac Supplement character table](character-tables/character-tables-syriac.md#syriac-supplement-character-table)
  - [Miscellaneous character table](character-tables/character-tables-syriac.md#miscellaneous-character-table)


The tables list each codepoint along with its Unicode general
category and its joining type. For letters, the table lists the
codepoint's joining group. For diacritical marks, the table lists the
codepoint's mark combining class. The codepoint's Unicode name and an example
glyph are also provided.

For example:

:::{table} Example character table

| Codepoint | Unicode category | Joining type | Joining group | Mark class | Glyph                        |
|:----------|:-----------------|:-------------|:--------------|:-----------|:-----------------------------|
|`U+0712`   | Letter           | DUAL         | BETH          | _null_     | &#x0712; Beth                |
| | | | | |
|`U+0737`   | Mark [Mn]        | TRANSPARENT  | _null_        | 220        | &#x0737; Rbasa Below         |
:::


Codepoints with no assigned meaning are
designated as _unassigned_ in the _Unicode category_ column. 


#### Special-function codepoints ####

Other important characters that may be encountered when shaping runs
of Syriac text include the dotted-circle placeholder (`U+25CC`), the
combining grapheme joiner (`U+034F`), the zero-width joiner (`U+200D`)
and zero-width non-joiner (`U+200C`), the left-to-right text marker
(`U+200E`) and right-to-left text marker (`U+200F`), and the no-break
space (`U+00A0`).

Each of these is of particular importance to shaping engines, because
these codepoints interact with the shaping engine, the text run, and
the active font, either to mediate non-default shaping behavior or to
relay information about the current shaping process.

The dotted-circle placeholder is frequently used when displaying a
combining mark in isolation. Real-world text syllables may also use
other characters, such as hyphens or dashes, in a similar placeholder
fashion; shaping engines should cope with this situation gracefully.

Dotted-circle placeholder characters (like any Unicode codepoint) can
appear anywhere in text input sequences and should be rendered
normally. <abbr title="Glyph Positioning table">GPOS</abbr> positioning lookups should attach mark glyphs to dotted
circles as they would to other non-mark characters. As visible glyphs,
dotted circles can also be involved in <abbr title="Glyph Substitution table">GSUB</abbr> substitutions.

In addition to the default input-text handling process, shaping
engines may also insert dotted-circle placeholders into the text
sequence. Dotted-circle insertions are required when a non-spacing
mark or dependent sign is formed with no base character present.

This requirement covers:

  - Dependent signs that are assigned their own individual Unicode
    codepoints (such as most dependent-vowel marks or matras)
  
  - Dependent signs that are formed only by specific sequences of
    other codepoints (which is not common in Syriac but can occur in
    other scripts)


In addition, Syriac text runs may include the "tatweel" or kashida
(`U+0640`) and "shadda" (`U+0651`) codepoints from the Arabic block,
because the Syriac block does not encode a separate kashida or shadda
character. 

Modern texts may also make use of Arabic punctuation marks, and texts
using Syriac to write Arabic (called "Garshuni") may also employ
Arabic ḥarakah (vowel) marks.

The combining grapheme joiner (<abbr>CGJ</abbr>) is primarily used to alter the
order in which adjacent marks are positioned during the
mark-reordering stage, in order to adhere to the needs of a
non-default language orthography.

By default, OpenType shaping reorders sequences of adjacent marks by
sorting the sequence on the marks' Canonical_Combining_Class (<abbr>Ccc</abbr>)
values. The presence of a <abbr title="Combining Grapheme Joiner">CGJ</abbr> character within a sequence of marks has
the effect of splitting the sequence into two sequences of marks and,
therefore, halting any mark-reordering that would have occurred
between the marks on either side of the <abbr title="Combining Grapheme Joiner">CGJ</abbr>.

The zero-width joiner (<abbr>ZWJ</abbr>) is primarily used to force the usage of the
cursive connecting form of a letter even when the context of the
adjoining letters would not trigger the connecting form. 

For example, to show the initial form of a letter in isolation (such
as for displaying it in a table of forms), the sequence <samp>"_Letter_,ZWJ"</samp>
would be used. To show the medial form of a letter in isolation, the
sequence <samp>"ZWJ,_Letter_,ZWJ"</samp> would be used.

The zero-width non-joiner (<abbr>ZWNJ</abbr>) is primarily used to prevent a
cursive connection between two adjacent characters that would, under
normal circumstances, form a join. 

The <abbr title="Zero-Width Joiner">ZWJ</abbr> and <abbr title="Zero-Width Non Joiner">ZWNJ</abbr> characters are, by definition, non-printing control
characters and have the _Default_Ignorable_ property in the Unicode
Character Database. In standard text-display scenarios, their function
is to signal a request from the user to the shaping engine for some
particular non-default behavior. As such, they are not rendered
visually.

> Note: Naturally, there are special circumstances where a user or
> document might need to request that a <abbr title="Zero-Width Joiner">ZWJ</abbr> or <abbr title="Zero-Width Non Joiner">ZWNJ</abbr> be rendered
> visually, such as when illustrating the OpenType shaping process, or
> displaying Unicode tables.

Because the <abbr title="Zero-Width Joiner">ZWJ</abbr> and <abbr title="Zero-Width Non Joiner">ZWNJ</abbr> are non-printing control characters, they can
be ignored by any portion of a software text-handling stack not
involved in the shaping operations that the <abbr title="Zero-Width Joiner">ZWJ</abbr> and <abbr title="Zero-Width Non Joiner">ZWNJ</abbr> are designed
to interface with. For example, spell-checking or collation functions
will typically ignore <abbr title="Zero-Width Joiner">ZWJ</abbr> and <abbr title="Zero-Width Non Joiner">ZWNJ</abbr>.

Similarly, the <abbr title="Zero-Width Joiner">ZWJ</abbr> and <abbr title="Zero-Width Non Joiner">ZWNJ</abbr> should be ignored by the shaping engine
when matching sequences of codepoints against the backtrack and
lookahead sequences of a font's <abbr title="Glyph Substitution table">GSUB</abbr> or <abbr title="Glyph Positioning table">GPOS</abbr> lookups.


The right-to-left mark (<abbr>RLM</abbr>) and left-to-right mark (<abbr>LRM</abbr>) are used by
the Unicode bidirectionality algorithm (BiDi) to indicate the points
in a text run at which the writing direction changes. Generally
speaking <abbr title="Right-to-Left Mark">RLM</abbr> and <abbr title="Left-to-Right Mark">LRM</abbr> codepoints do not interact with shaping.

The no-break space is primarily used to display those codepoints that
are defined as non-spacing (such as vowel points or diacritical marks) in an
isolated context, as an alternative to displaying them superimposed on
the dotted-circle placeholder.


## The `<syrc>` shaping model ##

Processing a run of `<syrc>` text involves seven top-level stages:

1. Transient reordering of modifier combining marks
2. Compound character composition and decomposition
3. Computing letter joining states
4. Applying the `stch` feature
5. Applying the language-form substitution features from <abbr>GSUB</abbr>
6. Applying the typographic-form substitution features from <abbr>GSUB</abbr>
7. Applying the positioning features from <abbr>GPOS</abbr>


### Stage 1: Transient reordering of modifier combining marks ###

<!--- http://www.unicode.org/reports/tr53/tr53-1.pdf --->

> Note: The following algorithm contains steps specific to reordering
> Arabic marks. Since Garshuni text, which uses the Syriac script to
> write the Arabic language, employs Arabic marks, shaping engines
> should not omit the mark-reordering logic. 

Sequences of adjacent marks must be reordered so that they appear in
the appropriate visual order before the mark-to-base and mark-to-mark
positioning features from <abbr title="Glyph Positioning table">GPOS</abbr> can be correctly applied.

In particular, those marks that have strong affinity to the base
character must be placed closest to the base.

This mark-reordering operation is distinct from the standard,
cross-script mark-reordering performed during Unicode
normalization. The standard Unicode mark-reordering algorithm is based
on comparing the _Canonical_Combining_Class_ (<abbr>Ccc</abbr>) properties of mark
codepoints, whereas this script-specific reordering utilizes the
_Modifier_Combining_Mark_ (<abbr>MCM</abbr>) subclasses specified in the
character tables.

The algorithm for reordering a sequence of marks is:

  - First, move any <samp>"Shadda"</samp> (combining class `33`) characters to the
    beginning of the mark sequence.
	
  -	Second, move any subsequence of combining-class-`230` characters that begins
       with a `230_MCM` character to the beginning of the sequence,
       before all <samp>"Shadda"</samp> characters. The subsequence must be moved
       as a group.

  - Finally, move any subsequence of combining-class-`220` characters that begins
       with a `220_MCM` character to the beginning of the sequence,
       before all <samp>"Shadda"</samp> characters and before all class-`230`
       characters. The subsequence must be moved as a group.

> Note: Unicode describes this mark-reordering operation, the Arabic
> Mark Transient Reordering Algorithm (<abbr>AMTRA</abbr>), in Technical Report 53,
> which describes it in terms that are distinct from standard,
> <abbr>Ccc</abbr>-based mark reordering.
>
> Specifically, <abbr title="Arabic Mark Transient Reordering Algorithm">AMTRA</abbr> is designated as an operation performed during
> text rendering only, which therefore does not impact other
> Unicode-compliance issues such as allowable input sequences or text
> encoding.
>
> However, shaping engines may choose to perform the reordering of
> modifier combining marks in conjunction with their Unicode
> normalization functionality for increased efficiency.

### Stage 2: Compound character composition and decomposition ###

The `ccmp` feature allows a font to substitute

 - mark-and-base sequences with a pre-composed glyph including both
    the mark and the base (as is done in with a ligature substitution)
	
  - individual compound glyphs with the equivalent sequence of
    decomposed glyphs (such as decomposing a letter with Majlīyānā or
    other marks into a separate fundamental-letter glyph followed by a
    mark-only glyph, to permit more precise positioning)
 
If present, these composition and decomposition substitutions must be
performed before applying any other <abbr title="Glyph Substitution table">GSUB</abbr> or <abbr title="Glyph Positioning table">GPOS</abbr> lookups, because
those lookups may be written to match only the `ccmp`-substituted
glyphs. 

:::{figure-md}
![`ccmp` feature application](/images/syriac/syriac-ccmp.svg "`ccmp` feature application"){.shaping-demo .inline-svg .greyscale-svg #syriac-ccmp}

`ccmp` feature application
:::

```{svg-color-toggle-button} syriac-ccmp
```


### Stage 3: Computing letter joining states ###

In order to correctly apply the initial, medial, and final form
substitutions from <abbr title="Glyph Substitution table">GSUB</abbr> during stage 6, the shaping engine must
tag every letter for possible application of the appropriate feature.

To determine which feature is appropriate, the shaping engine must
examine each word in turn and compute each letter's joining state from
the letter's `JOINING_TYPE` and the `JOINING_TYPE` of the
preceding character (if any).

> Note: Although Syriac uses inter-word spaces, the `init` feature
> does _not_ refer to word-initial letters only and the `fina` feature
> does _not_ refer to word-final letters only.
>
> Rather, both of these terms are defined with respect to whether or
> not the preceding and subsequent letters form joins with the current
> letter. The letters at word boundaries will, naturally, take on
> initial and final forms, but initial and final forms of letters also
> occur regularly within words, when the letter in question is
> adjacent to a letter than does not form joins.

This computation starts from the first letter of the word, temporarily
tagging the letter for `isol` substitution. If the first
letter is the only letter in the word, the `isol` tag will remain unchanged.

From here, the algorithm consumes each character in the string, one at
a time, keeping track of the JOINING_TYPE of the previous character. 

If the current character is JOINING_TYPE_TRANSPARENT, move on to the next
character but preserve the currently-tracked JOINING_TYPE at its previous state.

If the preceding character's JOINING_TYPE is LEFT, DUAL, or
JOIN_CAUSING:
  - In `<syrc>` text, if the current character is <samp>"Alaph"</samp>, tag the
    current character for `med2`, then update the tag for the
    preceding character:
	  - `isol` becomes `init`
	  - `fina` becomes `medi`
	  - `init` remains `init`
	  - `medi` remains `medi`
  - If the current character's JOINING_TYPE is RIGHT, DUAL, or
    JOIN_CAUSING, tag the current character for `fina`, then update
    the tag for the preceding character:
	  - `isol` becomes `init`
	  - `fina` becomes `medi`
	  - `init` remains `init`
	  - `medi` remains `medi`

Otherwise, tag the current character for `isol`.

After testing the final character of the word, if the text is in `<syrc>` and
if the last character that is not JOINING_TYPE_TRANSPARENT or
JOINING_TYPE_NON_JOINING is <samp>"Alaph"</samp>, perform an additional test:
  - If the preceding character is JOINING_TYPE_LEFT, tag the current character
    for `fina`
  - If the preceding character's JOINING_GROUP is DALATH_RISH, tag the current
    character for `fin3`
  - Otherwise, tag the current character for `fin2`


Once the last character of the word has been processed, proceed to the
next word and repeat the algorithm, starting at the beginning of the
next word.

> Note: Because the processing of the characters in the algorithm
> described above is deterministic, shaping engines may choose to
> implement the joining-state computation as a state machine, in a lookup
> table, or by any other means desirable.

At the end of this process, all letters should be tagged for possible
substitution by one of the `isol`, `init`, `medi`, `med2`, `fina`, `fin2`, or
`fin3` features.

### Stage 4: Applying the `stch` feature ###

The `stch` feature decomposes and stretches special marks that are
meant to extend to the full width of words to which they are
attached. It was defined for use in `<syrc>` text runs for the <samp>"Syriac
Abbreviation Mark"</samp> (`U+070F`) but it can be used with similar marks in
other scripts.

To apply the `stch` feature, the shaping engine should first decompose the
`U+070F` glyph into components, which results in a beginning point,
midpoint, and endpoint glyphs plus one (or more) extension glyphs: at
least one extension between the beginning and midpoint glyphs and at
least one extension between the midpoint and endpoint glyphs. 

The shaping engine must then calculate the total length of the word to
which the mark applies. That length, minus the advance widths of the
beginning, middle, and endpoint glyphs of the mark, must be divided by
two. 

The result, divided by the advance width of the extension glyph
and rounded up to the next integer, tells the shaping engine how many
copies of the extension glyph must be placed between the midpoint and
each end of the mark.

Following this procedure ensures that the same number of extensions is
used on each side of the mark so that it remains symmetrical.

Finally, the decomposed mark must be reordered as follows: 

  - All of the glyphs in the sequence for the mark, _except_ for
    the final glyph, are repositioned as a group so that they precede
    the word to which the mark is attached.
  - The final glyph in the mark sequence is repositioned to the end of
    the word.

:::{figure-md}
![Application of Syriac Abbreviation Mark stretching feature](/images/syriac/syriac-stch.svg "Application of Syriac Abbreviation Mark stretching feature"){.shaping-demo .inline-svg .greyscale-svg #syriac-stch}

Application of Syriac Abbreviation Mark stretching feature
:::

```{svg-color-toggle-button} syriac-stch
```


### Stage 5: Applying the language-form substitution features from <abbr>GSUB</abbr> ###

The language-substitution phase applies mandatory substitution
features using the rules in the font's <abbr title="Glyph Substitution table">GSUB</abbr> table. In preparation for
this stage, glyph sequences should be tagged for possible application 
of <abbr title="Glyph Substitution table">GSUB</abbr> features.

The order in which these substitutions must be performed is fixed for
all scripts implemented in the Arabic shaping model:

	locl
	isol
	fina
	fin2
	fin3
	medi
	med2
	init
	rlig
	rclt (not used in Syriac)
	calt
	

#### Stage 5, step 1: locl ####

The `locl` feature replaces default glyphs with any language-specific
variants, based on examining the language setting of the text run.

> Note: Strictly speaking, the use of localized-form substitutions is
> not part of the shaping process, but of the localization process,
> and could take place at an earlier point while handling the text
> run. However, shaping engines are expected to complete the
> application of the `locl` feature before applying the subsequent
> <abbr title="Glyph Substitution table">GSUB</abbr> substitutions in the following steps.

<!--- ![Localized form substitution](/images/syriac/syriac-locl.svg) --->


#### Stage 5, step 2: isol ####

The `isol` feature substitutes the default glyph for a codepoint with
the isolated form of the letter.

> Note: It is common for a font to use the isolated form of a letter
> as the default, in which case the `isol` feature would apply no
> substitutions. However, this is only a convention, and the active
> font may use other forms as the default glyphs for any or all
> codepoints.

<!--- ![Isolated form substitution](/images/syriac/syriac-isol.svg) --->


#### Stage 5, step 3: fina ####

The `fina` feature substitutes the default glyph for a codepoint with
the terminal (or final) form of the letter.

:::{figure-md}
![Final form substitution](/images/syriac/syriac-fina.svg "Final form substitution"){.shaping-demo .inline-svg .greyscale-svg #syriac-fina}

Final form substitution
:::

```{svg-color-toggle-button} syriac-fina
```


#### Stage 5, step 4: fin2 ####

The `fin2` feature replaces word-final Alaph glyph that are not
preceded by Dalath, Rish, or dotless Dalath-Rish with a special
terminal-form Alaph glyph.

:::{figure-md}
![Final form-2 substitution](/images/syriac/syriac-fin2.svg "Final form-2 substitution"){.shaping-demo .inline-svg .greyscale-svg #syriac-fin2}

Final form-2 substitution
:::

```{svg-color-toggle-button} syriac-fin2
```


#### Stage 5, step 5: fin3 ####

The `fin3` feature replaces word-final Alaph glyph that are 
preceded by Dalath, Rish, or dotless Dalath-Rish with a special
terminal-form Alaph glyph.

:::{figure-md}
![Final form-3 substitution](/images/syriac/syriac-fin3.svg "Final form-3 substitution"){.shaping-demo .inline-svg .greyscale-svg #syriac-fin3}

Final form-3 substitution
:::

```{svg-color-toggle-button} syriac-fin3
```


#### Stage 5, step 6: medi ####

The `medi` feature substitutes the default glyph for a codepoint with
the medial form of the letter.

:::{figure-md}
![Medial form substitution](/images/syriac/syriac-medi.svg "Medial form substitution"){.shaping-demo .inline-svg .greyscale-svg #syriac-medi}

Medial form substitution
:::

```{svg-color-toggle-button} syriac-medi
```


#### Stage 5, step 7: med2 ####

The `med2` feature replaces Alaph glyphs in the middle of a
word that are preceded by a base character which can form a right-side
join with a special medial-form Alaph glyph.

:::{figure-md}
![Medial form-2 substitution](/images/syriac/syriac-med2.svg "Medial form-2 substitution"){.shaping-demo .inline-svg .greyscale-svg #syriac-med2}

Medial form-2 substitution
:::

```{svg-color-toggle-button} syriac-med2
```


#### Stage 5, step 8: init ####

The `init` feature substitutes the default glyph for a codepoint with
the initial form of the letter.

:::{figure-md}
![Initial form substitution](/images/syriac/syriac-init.svg "Initial form substitution"){.shaping-demo .inline-svg .greyscale-svg #syriac-init}

Initial form substitution
:::

```{svg-color-toggle-button} syriac-init
```


#### Stage 5, step 9: rlig ####

The `rlig` feature substitutes glyph sequences with mandatory
ligatures. Substitutions made by `rlig` cannot be disabled by
application-level user interfaces.

:::{figure-md}
![Required ligature substitution](/images/syriac/syriac-rlig.svg "Required ligature substitution"){.shaping-demo .inline-svg .greyscale-svg #syriac-rlig}

Required ligature substitution
:::

```{svg-color-toggle-button} syriac-rlig
```


#### Stage 5, step 10: rclt ####

This feature is not used in `<syrc>` text.


#### Stage 5, step 11: calt ####

The `calt` feature substitutes glyphs with contextual alternate
forms. In general, this involves replacing the default form of a
connecting glyph with an alternate that provides a preferable
connection to an adjacent glyph.

The substitutions made by `calt`
can be disabled by application-level user interfaces.

:::{figure-md}
![Contextual alternate substitution](/images/syriac/syriac-calt.svg "Contextual alternate substitution"){.shaping-demo .inline-svg .greyscale-svg #syriac-calt}

Contextual alternate substitution
:::

```{svg-color-toggle-button} syriac-calt
```


### Stage 6: Applying the typographic-form substitution features from <abbr>GSUB</abbr> ###

The typographic-substitution phase applies optional substitution
features using the rules in the font's <abbr title="Glyph Substitution table">GSUB</abbr> table.

The order in which these substitutions must be performed is fixed for
all scripts implemented in the Arabic shaping model:

    liga
	dlig
	cswh (not used in Syriac)
	mset (not used in Syriac)
	

#### Stage 6, step 1: liga ####

The `liga` feature substitutes standard, optional ligatures that are on
by default. Substitutions made by `liga` may be disabled by
application-level user interfaces.

:::{figure-md}
![Standard ligature substitution](/images/syriac/syriac-liga.svg "Standard ligature substitution"){.shaping-demo .inline-svg .greyscale-svg #syriac-liga}

Standard ligature substitution
:::

```{svg-color-toggle-button} syriac-liga
```


#### Stage 6, step 2: dlig ####

The `dlig` feature substitutes additional optional ligatures that are
off by default. Substitutions made by `dlig` may be disabled by
application-level user interfaces.

:::{figure-md}
![Discretionary ligature substitution](/images/syriac/syriac-dlig.svg "Discretionary ligature substitution"){.shaping-demo .inline-svg .greyscale-svg #syriac-dlig}

Discretionary ligature substitution
:::

```{svg-color-toggle-button} syriac-dlig
```


#### Stage 6, step 3: cswh ####

This feature is not used in `<syrc>` text.


#### Stage 6, step 4: mset ####

This feature is not used in `<syrc>` text.


### Stage 7: Applying the positioning features from <abbr>GPOS</abbr> ###

The positioning stage adjusts the positions of mark and base
glyphs.

The order in which these features are applied is fixed for
all scripts implemented in the Arabic shaping model:

    curs (not used in Syriac)
	kern
	mark
	mkmk

#### Stage 7, step 1: curs ####

This feature is not used in `<syrc>` text.


#### Stage 7, step 2: kern ####

The `kern` adjusts glyph spacing between pairs of adjacent glyphs.

:::{figure-md}
![Kerning positioning](/images/syriac/syriac-kern.svg "Kerning positioning"){.shaping-demo .inline-svg .greyscale-svg #syriac-kern}

Kerning positioning
:::

```{svg-color-toggle-button} syriac-kern
```


#### Stage 7, step 3: mark ####

The `mark` feature positions marks with respect to base glyphs.

:::{figure-md}
![Mark positioning](/images/syriac/syriac-mark.svg "Mark positioning"){.shaping-demo .inline-svg .greyscale-svg #syriac-mark}

Mark positioning
:::

```{svg-color-toggle-button} syriac-mark
```


#### Stage 7, step 4: mkmk ####

The `mkmk` feature positions marks with respect to preceding marks,
providing proper positioning for sequences of marks that attach to the
same base glyph.

:::{figure-md}
![Mark-to-mark positioning](/images/syriac/syriac-mkmk.svg "Mark-to-mark positioning"){.shaping-demo .inline-svg .greyscale-svg #syriac-mkmk}

Mark-to-mark positioning
:::

```{svg-color-toggle-button} syriac-mkmk
```


================================================
FILE: opentype-shaping-tamil.md
================================================
```{include} /_global.md
```

# Tamil shaping in OpenType #

This document details the shaping procedure needed to display text
runs in the Tamil script.


**Contents**

  - [General information](#general-information)
  - [Terminology](#terminology)
  - [Glyph classification](#glyph-classification)
      - [Shaping classes and subclasses](#shaping-classes-and-subclasses)
      - [Tamil character tables](#tamil-character-tables)
  - [The `<tml2>` shaping model](#the-tml2-shaping-model)
      - [Stage 1: Identifying syllables and other sequences](#stage-1-identifying-syllables-and-other-sequences)
      - [Stage 2: Initial reordering](#stage-2-initial-reordering)
      - [Stage 3: Applying the basic substitution features from <abbr>GSUB</abbr>](#stage-3-applying-the-basic-substitution-features-from-gsub)
      - [Stage 4: Final reordering](#stage-4-final-reordering)
      - [Stage 5: Applying all remaining substitution features from <abbr>GSUB</abbr>](#stage-5-applying-all-remaining-substitution-features-from-gsub)
      - [Stage 6: Applying remaining positioning features from <abbr>GPOS</abbr>](#stage-6-applying-remaining-positioning-features-from-gpos)
  - [The `<taml>` shaping model](#the-taml-shaping-model)
      - [Distinctions from `<tml2>`](#distinctions-from-tml2)
      - [Advice for handling fonts with `<taml>` features only](#advice-for-handling-fonts-with-taml-features-only)
      - [Advice for handling text runs composed in `<taml>` format](#advice-for-handling-text-runs-composed-in-taml-format)


## General information ##

The Tamil script belongs to the Indic family, and follows
the same general patterns as the other Indic scripts. More
specifically, it belongs to the South Indic subgroup.

The Tamil script is used to write multiple languages, most commonly
Tamil, Irula, and Saurashtra. In addition, Sanskrit may be written
in Tamil, so Tamil script runs may include glyphs from the Vedic
Extensions block of Unicode. 

There are two extant Tamil script tags defined in OpenType, `<taml>`
and `<tml2>`. The older script tag, `<taml>`, was deprecated in 2005.
Therefore, new fonts should be engineered to work with the `<tml2>`
shaping model. However, if a font is encountered that supports only
`<taml>`, the shaping engine should deal with it gracefully.

## Terminology ##

OpenType shaping uses a standard set of terms for Indic scripts.  The
terms used colloquially in any particular language may vary, however,
potentially causing confusion.

**Matra** is the standard term for a dependent vowel sign. 

**Halant** and **Virama** are both standard terms for the above-base "vowel-killer"
sign. Unicode documents use the term "virama" most frequently, while
OpenType documents use the term "halant" most frequently. In the Tamil
language, this sign is known as _pulli_.

**Chandrabindu** (or simply **Bindu**) is the standard term for the diacritical mark
indicating that the preceding vowel should be nasalized. Tamil does
not include a "chandrabindu" character, but the term is still found in
multiple places in OpenType shaping documents.

The term **base consonant** is also critical to Indic shaping. The
base consonant of a syllable is the consonant that carries the
syllable's vowel sound, either the inherent vowel (for an unmarked
base consonant) or a dependent vowel (with the addition of a matra).

A syllable's base consonant is generally rendered in its full form
(although it may form ligatures), while other consonants in the
syllable frequently take on secondary forms. Different <abbr title="Glyph Substitution table">GSUB</abbr>
substitutions may apply to a script's **pre-base** and **post-base**
consonants. Some of these substitutions create **above-base** or
**below-base** forms. The **Reph** form of the consonant "Ra" is an
example.

Syllables may also begin with an **independent vowel** instead of a
consonant. In these syllables, the independent vowel is rendered in
full-letter form, not as a matra, and the independent vowel serves as the
syllable base, similar to a base consonant.

Where possible, using the standard terminology is preferred, as the
use of a language-specific term necessitates choosing one language
over all of the others that share a common script.

## Glyph classification ##

Shaping Tamil text depends on the shaping engine correctly
classifying each glyph in the run. As with most other scripts, the
classifications must distinguish between consonants, vowels
(independent and dependent), numerals, punctuation, and various types
of diacritical mark.

For most codepoints, the `General Category` property defined in the Unicode
standard is correct, but it is not sufficient to fully capture the
expected shaping behavior (such as glyph reordering). Therefore,
Tamil glyphs must additionally be classified by how they are treated
when shaping a run of text.

### Shaping classes and subclasses ###

The shaping classes listed in the tables that follow are defined so
that they capture the positioning rules used by Indic scripts. 

For most codepoints, the _Shaping class_ is synonymous with the `Indic
Syllabic Category` defined in Unicode. However, there are some
distinctions, where the defined category does not fully capture the
behavior of the character in the shaping process.

Several of the diacritic and syllable-modifying marks behave according
to their own rules and, thus, have a special class. These include
`BINDU`, `VISARGA`, `AVAGRAHA`, `NUKTA`, and `VIRAMA`. Some
less-common marks behave according to rules that are similar to these
common marks, and are therefore classified with the corresponding
common mark. The Vedic Extensions also include a `CANTILLATION`
class for tone marks.

Letters generally fall into the classes `CONSONANT`,
`VOWEL_INDEPENDENT`, and `VOWEL_DEPENDENT`. These classes help the
shaping engine parse and identify key positions in a syllable. For
example, Unicode categorizes dependent vowels as `Mark [Mn]`, but the
shaping engine must be able to distinguish between dependent vowels
and diacritical marks (which are categorized as `Mark [Mn]`).

Tamil includes one special class of letter, `MODIFYING_LETTER`, which
is used only for "Visarga" (`U+0B83`). This denotes the character's
usage in the Tamil language, which treats "Visarga" differently than
other Indic scripts. In older Tamil texts, "Visarga" may indicate the
presence of a silent letter; in recent Tamil texts, "Visarga" is used
to modify the following letter in order to denote a foreign phoneme,
such as "f". In shaping, "Visarga" should match tests for letters, but
it is neither a consonant nor a vowel.

Other characters, such as symbols and miscellaneous letters (for
example, letter-like symbols that only occur as standalone entities
and do not occur within syllables), need no special attention from the
shaping engine, so they are not assigned a shaping class.

Numbers are classified as `NUMBER`, even though they evoke no special
behavior from the Indic shaping rules, because there are OpenType features that
might affect how the respective glyphs are drawn, such as `tnum`,
which specifies the usage of tabular-width numerals, and `sups`, which
replaces the default glyphs with superscript variants.

Marks and dependent vowels are further labeled with a mark-placement
subclass, which indicates where the glyph will be placed with respect
to the base character to which it is attached. The actual position of
the glyphs is determined by the lookups found in the font's <abbr title="Glyph Positioning table">GPOS</abbr>
table, however, the shaping rules for Indic scripts require that the
shaping engine be able to identify marks by their general
position. 

For example, left-side dependent vowels (matras), classified
with `LEFT_POSITION`, must frequently be reordered, with the final
position determined by whether or not other letters in the syllable
have formed ligatures or combined into conjunct forms. Therefore, the
`LEFT_POSITION` subclass of the character must be tracked throughout
the shaping process.

There are four basic _mark-placement subclasses_ for dependent vowels
(matras). Each corresponds to the visual position of the matra with
respect to the syllable base to which it is attached:

  - `LEFT_POSITION` matras are positioned to the left of the syllable base.
  - `RIGHT_POSITION` matras are positioned to the right of the syllable base.
  - `TOP_POSITION` matras are positioned above the syllable base.
  - `BOTTOM_POSITION` matras are positioned below syllable base.
  
These positions may also be referred to elsewhere in shaping documents as:

  - _Pre-base_ matras
  - _Post-base_ matras
  - _Above-base_ matras
  - _Below-base_ matras
  
respectively. The `LEFT`, `RIGHT`, `TOP`, and `BOTTOM` designations
corresponds to Unicode's preferred terminology. The _Pre_, _Post_,
_Above_, and _Below_ terminology is used in the official descriptions
of OpenType <abbr title="Glyph Substitution table">GSUB</abbr> and <abbr title="Glyph Positioning table">GPOS</abbr> features. Shaping engines may, internally,
use whichever terminology is preferred.

In addition, dependent-vowel codepoints that are composed of multiple
components will be designated in character tables as having a compound
_mark-placement subclass_, such as `TOP_AND_RIGHT` or `LEFT_AND_RIGHT`. 

However, these multi-part matras are decomposed into separate matra
components during the shaping process. After the decomposition, each
matra component will belong to exactly one of the four basic
_mark-placement subclasses_.

For most mark and dependent-vowel codepoints, the _mark-placement
subclass_ is synonymous with the `Indic Positional Category` defined
in Unicode. However, there are some distinctions, where the defined
category does not fully capture the behavior of the character in the
shaping process. 

### Tamil character tables ###

Separate character tables are provided for the Tamil, Tamil
Supplement, Grantha marks, and Vedic Extensions block as well as for
other miscellaneous characters that are used in `<tml2>` text runs:

  - [Tamil character table](character-tables/character-tables-tamil.md#tamil-character-table)
  - [Tamil Supplement character table](character-tables/character-tables-tamil.md#tamil-supplement-character-table)
  - [Grantha marks character table](character-tables/character-tables-tamil.md#grantha-marks-character-table)
  - [Vedic Extensions character table](character-tables/character-tables-tamil.md#vedic-extensions-character-table)
  - [Miscellaneous character table](character-tables/character-tables-tamil.md#miscellaneous-character-table)

The tables list each codepoint along with its Unicode general
category, its shaping class, and its mark-placement subclass. The
codepoint's Unicode name and an example glyph are also provided.

For example:

:::{table} Example character table

| Codepoint | Unicode category | Shaping class     | Mark-placement subclass    | Glyph                        |
|:----------|:-----------------|:------------------|:---------------------------|:-----------------------------|
|`U+0B82`   | Mark [Mn]        | BINDU             | TOP_POSITION               | &#x0B82; Anusvara            |
| | | | |
|`U+0B95`   | Letter           | CONSONANT         | _null_                     | &#x0B95; Ka                  |
:::


Codepoints with no assigned meaning are designated as _unassigned_ in
the _Unicode category_ column.

Assigned codepoints with a _null_ in the _Shaping class_
column evoke no special behavior from the shaping engine. 

The _Mark-placement subclass_ column indicates mark-placement
positioning for codepoints in the _Mark_ category. Assigned, non-mark
codepoints have a _null_ in this column and evoke no special
mark-placement behavior. Marks tagged with [Mn] in the _Unicode
category_ column are categorized as non-spacing; marks tagged with
[Mc] are categorized as spacing-combining.

Some codepoints in the tables use a _Shaping class_ that
differs from the codepoint's Unicode _General Category_. The _Shaping
class_ takes precedence during OpenType shaping, as it captures more
specific, script-aware behavior.

In addition to the marks in the Tamil Unicode block, Tamil text can
also include several diacritical marks from the Grantha Unicode block,
such as Grantha Candrabindu (`U+11301`), Grantha Visarga (`U+11303`),
and Grantha Nukta (`U+1133C`).


#### Special-function codepoints ####

Other important characters that may be encountered when shaping runs
of Tamil text include the dotted-circle placeholder (`U+25CC`), the
zero-width joiner (`U+200D`) and zero-width non-joiner (`U+200C`), and
the no-break space (`U+00A0`).

Each of these is of particular importance to shaping engines, because
these codepoints interact with the shaping engine, the text run, and
the active font, either to mediate non-default shaping behavior or to
relay information about the current shaping process.

The dotted-circle placeholder is frequently used when displaying a
dependent vowel (matra) or a combining mark in isolation. Real-world
text syllables may also use other characters, such as hyphens or dashes,
in a similar placeholder fashion; shaping engines should cope with
this situation gracefully.

Dotted-circle placeholder characters (like any Unicode codepoint) can
appear anywhere in text input sequences and should be rendered
normally. <abbr title="Glyph Positioning table">GPOS</abbr> positioning lookups should attach mark glyphs to dotted
circles as they would to other non-mark characters. As visible glyphs,
dotted circles can also be involved in <abbr title="Glyph Substitution table">GSUB</abbr> substitutions.

In addition to the default input-text handling process, shaping
engines may also insert dotted-circle placeholders into the text
sequence. Dotted-circle insertions are required when a non-spacing
mark or dependent sign is formed with no base character present.

This requirement covers:

  - Dependent signs that are assigned their own individual Unicode
    codepoints (such as most dependent-vowel marks or matras)
  
  - Dependent signs that are formed only by specific sequences of
    other codepoints (such as <samp>"Reph"</samp>)


The zero-width joiner (<abbr>ZWJ</abbr>) is primarily used to prevent the formation
of a conjunct from a <samp>"_Consonant_,Halant,_Consonant_"</samp> sequence.

  - The sequence <samp>"_Consonant_,Halant,ZWJ,_Consonant_"</samp> blocks the
    formation of a conjunct between the two consonants. 

Note, however, that the <samp>"_Consonant_,Halant"</samp> subsequence in the above
example may still trigger a half-forms feature. To prevent the
application of the half-forms feature in addition to preventing the
conjunct, the zero-width non-joiner (<abbr>ZWNJ</abbr>) must be used instead.

  - The sequence <samp>"_Consonant_,Halant,ZWNJ,_Consonant_"</samp> should produce
    the first consonant in its standard form, followed by an explicit
    <samp>"Halant"</samp>. 

A secondary usage of the zero-width joiner is to prevent the formation of
<samp>"Reph"</samp>.

  - An initial <samp>"Ra,Halant,ZWJ"</samp> sequence should not produce a <samp>"Reph"</samp>,
    even where an initial <samp>"Ra,Halant"</samp> sequence without the zero-width
    joiner would otherwise produce a <samp>"Reph"</samp>.

The <abbr title="Zero-Width Joiner">ZWJ</abbr> and <abbr title="Zero-Width Non Joiner">ZWNJ</abbr> characters are, by definition, non-printing control
characters and have the _Default_Ignorable_ property in the Unicode
Character Database. In standard text-display scenarios, their function
is to signal a request from the user to the shaping engine for some
particular non-default behavior. As such, they are not rendered
visually.

> Note: Naturally, there are special circumstances where a user or
> document might need to request that a <abbr title="Zero-Width Joiner">ZWJ</abbr> or <abbr title="Zero-Width Non Joiner">ZWNJ</abbr> be rendered
> visually, such as when illustrating the OpenType shaping process, or
> displaying Unicode tables.

Because the <abbr title="Zero-Width Joiner">ZWJ</abbr> and <abbr title="Zero-Width Non Joiner">ZWNJ</abbr> are non-printing control characters, they can
be ignored by any portion of a software text-handling stack not
involved in the shaping operations that the <abbr title="Zero-Width Joiner">ZWJ</abbr> and <abbr title="Zero-Width Non Joiner">ZWNJ</abbr> are designed
to interface with. For example, spell-checking or collation functions
will typically ignore <abbr title="Zero-Width Joiner">ZWJ</abbr> and <abbr title="Zero-Width Non Joiner">ZWNJ</abbr>.

Similarly, the <abbr title="Zero-Width Joiner">ZWJ</abbr> and <abbr title="Zero-Width Non Joiner">ZWNJ</abbr> should be ignored by the shaping engine
when matching sequences of codepoints against the backtrack and
lookahead sequences of a font's <abbr title="Glyph Substitution table">GSUB</abbr> or <abbr title="Glyph Positioning table">GPOS</abbr> lookups.

For example:

  - A lookup that substitutes an alternate version of a
    dependent-vowel (matra) glyph when it is preceded by <samp>"Ka,Halant,Tta"</samp>
    should still be applied if the dependent-vowel codepoint is preceded
    by <samp>"Ka,Halant,ZWJ,Tta"</samp> in the text run.

The no-break space (<abbr>NBSP</abbr>) is primarily used to display those
codepoints that are defined as non-spacing (marks, dependent vowels
(matras), below-base consonant forms, and post-base consonant forms)
in an isolated context, as an alternative to displaying them
superimposed on the dotted-circle placeholder. These sequences will
match <samp>"NBSP,ZWJ,Halant,_Consonant_"</samp>, <samp>"NBSP,_mark_"</samp>, or <samp>"NBSP,_matra_"</samp>.

Tamil text sometimes uses the Latin numerals 2, 3, and 4 in
superscript or subscript positions to annotate Sanskrit. When used in
this fashion, the superscripts and subscripts are treated as
`SYLLABLE_MODIFIER` signs for shaping purposes.


## The `<tml2>` shaping model ##

Processing a run of `<tml2>` text involves six top-level stages:

1. Identifying syllables and other sequences
2. Initial reordering
3. Applying the basic substitution features from <abbr>GSUB</abbr>
4. Final reordering
5. Applying all remaining substitution features from <abbr>GSUB</abbr>
6. Applying all remaining positioning features from <abbr>GPOS</abbr>


As with other Indic scripts, the initial reordering stage and the
final reordering stage each involve applying a set of several
script-specific rules. The basic substitution features must be applied
to the run in a specific order. The remaining substitution features in
stage five, however, do not have a mandatory order.

Indic scripts follow many of the same shaping patterns, but they
differ in a few critical characteristics that the shaping engine must
track. These include:

  - The position of the base consonant in a syllable.
  
  - The final position of <samp>"Reph"</samp>.
  
  - Whether <samp>"Reph"</samp> must be requested explicitly or if it is formed by
    a specific, implicit sequence.
	
  - Whether the below-base forms feature is applied only to consonants
    before the syllable base, only to consonants after the base
    consonant, or to both.
	
  - The ordering positions for dependent vowels
    (matras). Specifically, right-side, above-base, and below-base
    matras follow different rules in different scripts. 
	All Indic scripts position left-side matras in the same
    manner, in the ordering position `POS_PREBASE_MATRA`. 

With regard to these common variations, Tamil's specific shaping
characteristics include: 

  - `BASE_POS_LAST` = The base consonant of a syllable is the last
     consonant, not counting any special final-consonant forms.

  - `REPH_POS_AFTER_POST` = <samp>"Reph"</samp> is ordered after all post-base consonant forms.

  - `REPH_MODE_IMPLICIT` = <samp>"Reph"</samp> is formed by an initial <samp>"Ra,Halant"</samp> sequence.

  - `BLWF_MODE_PRE_AND_POST` = The below-forms feature is applied both to
     pre-base consonants and to post-base consonants.

  - `MATRA_POS_TOP` = `POS_AFTER_SUBJOINED` = Above-base matras are
     ordered after subjoined (i.e., below-base) consonant forms. 

  - `MATRA_POS_RIGHT` = `POS_AFTER_POST` = Right-side matras are
     ordered after all post-base consonant forms. 

  - `MATRA_POS_BOTTOM` = `POS_AFTER_POST` = Below-base matras are
     ordered after all post-base consonant forms.

These characteristics determine how the shaping engine must reorder
certain glyphs, how base consonants are determined, and how <samp>"Reph"</samp>
should be encoded within a run of text.

### Stage 1: Identifying syllables and other sequences ###

A syllable in Tamil consists of a valid orthographic sequence
that may be followed by a "tail" of modifier signs. 

> Note: The Tamil Unicode block enumerates one modifier sign,
> "Anusvara" (`U+0B82`). Tamil text can also include several modifier
> signs from the Grantha Unicode block, such as Grantha Candrabindu
> (`U+11301`), Grantha Visarga (`U+11303`), and Grantha Nukta
> (`U+1133C`).In addition, Sanskrit text written in Tamil 
> may include additional signs from Vedic Extensions block. 
>
> Note: Unlike many other Indic scripts, the Tamil Unicode block
> categorizes "Visarga" (`U+0B83`) as a letter, not as a modifier sign.


Each syllable contains exactly one vowel sound. Valid syllables may
begin with either a consonant or an independent vowel. 

If the syllable begins with a consonant, then the consonant that
provides the vowel sound is referred to as the "base" consonant. If
the syllable begins with an independent vowel, that independent vowel
is the syllable's only vowel sound and serves as the "base". 

> Note: A consonant that is not accompanied by a dependent vowel (matra) sign
> carries the script's inherent vowel sound. This vowel sound is changed
> by a dependent vowel (matra) sign following the consonant.

From the shaping engine's perspective, the main distinction between a
syllable with a base consonant and a syllable with an
independent-vowel base is that a syllable with an independent-vowel
base is less likely to include additional consonants in special forms
and less likely to include dependent vowel signs
(matras). Therefore, in the common case, vowel-based syllables may
involve less reordering, substitution feature applications, and other
processing than consonant-based syllables.

In some languages and orthographies, vowel-based syllables are
not permitted to include additional consonants or matras, and certain
<abbr title="Glyph Substitution table">GSUB</abbr> substitution features do not occur. However, there are often
known exceptions, and real-world text makes no such guarantees. 

> Note: Shaping engines may choose to treat independent-vowel bases 
> like base consonants for the sake of simplicity or code
> reuse.
>
> However, implementations that take this approach should note
> that removing the distinction between base consonants and
> independent-vowel bases entirely may have unintended
> consequences. Making guarantees about the correctness of the results
> or about language-specific tests is out of scope for this document.

Generally speaking, the base consonant is the final consonant of the
syllable and its vowel sound designates the end of the syllable. This
rule is synonymous with the `BASE_POS_LAST` characteristic mentioned
earlier. 

Valid consonant-based syllables may include one or more additional 
consonants that precede the base consonant. Each of these
other, pre-base consonants will be followed by the <samp>"Halant"</samp> mark, which
indicates that they carry no vowel. They affect pronunciation by
combining with the base consonant (e.g., "_str_", "_pl_") but they
do not add a vowel sound.

Unlike many other Indic scripts, the consonant <samp>"Ra"</samp> does not receive special
treatment; <samp>"Ra,Halant"</samp> sequences are not replaced with <samp>"Reph"</samp>.

> Note: Generally speaking, OpenType fonts will implement support for
> any below-base, post-base, and pre-base-reordering consonant forms
> by including the necessary substitution rules in their `blwf`,
> `pstf`, and `pref` lookups in <abbr title="Glyph Substitution table">GSUB</abbr>.
>
> Consequently, whenever shaping engines need to determine whether or 
> not a given consonant can take on such a special form, the most
> appropriate test is to check if the consonant is included in the
> relevant <abbr title="Glyph Substitution table">GSUB</abbr> lookup. Other implementations are possible, such as
> maintaining static tables of consonants, but checking for <abbr title="Glyph Substitution table">GSUB</abbr>
> support ensures that the expected behavior is implemented in the
> active font, and is therefore the most reliable approach.


In addition to valid syllables, standalone sequences may occur, such
as when an isolated codepoint is shown in example text.

> Note: Foreign loanwords, when written in the Tamil script, may
> not adhere to the syllable-formation rules described above. In
> particular, it is not uncommon to encounter foreign loanwords that
> contain a word-final suffix of consonants.
>
> Nevertheless, such word-final suffixes will be correctly matched by
> the regular expressions listed below. These loanwords are pronounced
> different, which raises issues for potential readers, but the
> character sequences do not affect the shaping process.


Syllables should be identified by examining the run and matching
glyphs, based on their categorization, using regular expressions. 

The following general-purpose Indic-shaping regular expressions can be
used to match Tamil syllables.

The regular expressions utilize the shaping classes from the tables
above. For the purpose of syllable identification, more general
classes can be used, as defined in the following table. This
simplifies the resulting expressions. 

```markdown
_ra_		= The consonant "Ra" 
_consonant_	= ( `CONSONANT` | `CONSONANT_DEAD` ) - _ra_
_vowel_		= `VOWEL_INDEPENDENT`
_nukta_	  	= `NUKTA`
_halant_	= `VIRAMA`
_zwj_		= `JOINER`
_zwnj_		= `NON_JOINER`
_matra_		= `VOWEL_DEPENDENT` | `PURE_KILLER`
_syllablemodifier_	= `SYLLABLE_MODIFIER` | `BINDU` | `VISARGA` | `GEMINATION_MARK`
_vedicsign_	= `CANTILLATION`
_placeholder_	= `PLACEHOLDER` | `CONSONANT_PLACEHOLDER` | `NUMBER`
_dottedcircle_	= `DOTTED_CIRCLE`
_repha_		= `CONSONANT_PRE_REPHA`
_consonantmedial_	= `CONSONANT_MEDIAL`
_symbol_	= `SYMBOL` | `AVAGRAHA`
_consonantwithstacker_	= `CONSONANT_WITH_STACKER`
_other_		= `OTHER` | `MODIFYING_LETTER`
```


> Note: the _ra_ identification class is mutually exclusive with 
> the _consonant_ class. The union of the _consonant_ and _ra_ classes
> is used in the regular expression elements below in order to
> correctly identify <samp>"Ra"</samp> characters that do not trigger <samp>"Reph"</samp> or
> <samp>"Rakaar"</samp> shaping behavior.
>
> Note, also, that the cantillation mark "combining Ra" in the
> Devanagari Extended block does _not_ belong to the _ra_
> identification class, and that the other "combining consonant"
> cantillation marks in the Devanagari Extended block do not belong to
> the _consonant_ identification class.

> Note: The _placeholder_ identification class includes codepoints
> that are often used in place of vowels or consonants when a document
> needs to display a matra, mark, or special form in isolation or
> in another context beyond a standard syllable. Examples of
> _placeholder_ codepoints include hyphens and non-breaking
> spaces. Sequences that utilize this approach should be identified as
> "standalone" syllables.
>
> The _placeholder_ identification class also includes numerals, which
> are commonly used as word substitutes within normal text. Examples
> include ordinals (e.g., "4th").

> Note: The _other_ identification class includes codepoints that
> do not interact with adjacent characters for shaping purposes. Even
> though some of these codepoints (such as `MODIFYING_LETTER`) can
> occur within words, they evoke no behavior from the shaping
> engine and do not factor into the regular expressions that
> follow. Therefore, the shaping engine may choose to ignore them
> during syllable identification; they are listed here for completeness.

These identification classes form the bases of the following regular
expression elements:

```markdown
C	= _consonant_ | _ra_
Z	= _zwj_ | _zwnj_
REPH	= (_ra_ _halant_) | _repha_
CN		= C _zwj_? _nukta_?
FORCED_RAKAR	= _zwj_ _halant_ _zwj_ _ra_
S	= _symbol_ _nukta_?
MATRA_GROUP	= Z{0,3} _matra_ _nukta_? (_halant_ | FORCED_RAKAR)?
SYLLABLE_TAIL	= (Z? _syllablemodifier_ _syllablemodifier_? _zwnj_?)? _vedicsign_{0,3}
HALANT_GROUP	= Z? _halant_ (_zwj_ _nukta_?)?
FINAL_HALANT_GROUP	= HALANT_GROUP | (_halant_ _zwnj_)
MEDIAL_GROUP	= _consonantmedial_?
HALANT_OR_MATRA_GROUP	= FINAL_HALANT_GROUP | MATRA_GROUP*)
```

> Note: Practically speaking, shaping engines are highly unlikely to
> encounter more than 4 sequential `(MATRA_GROUP)`
> instances in any real-word syllables. Thus, implementations may
> choose to limit occurrences by limiting the above expressions to a
> finite length, such as `(MATRA_GROUP){0,4}` .


Using the above elements, the following regular expressions define the
possible syllable types:

A consonant-based syllable will match the expression:
```markdown
(_repha_|_consonantwithstacker_)? (CN HALANT_GROUP)* CN MEDIAL_GROUP HALANT_OR_MATRA_GROUP SYLLABLE_TAIL
```

> Note: Practically speaking, shaping engines are highly unlikely to
> encounter more than 4 sequential `(CN HALANT_GROUP)`
> instances in any real-word syllables. Thus, implementations may
> choose to limit occurrences by limiting the above expressions to a
> finite length, such as `(CN HALANT_GROUP){0,4}` .

A vowel-based syllable will match the expression:
```markdown
REPH? _vowel_ _nukta_? (_zwj_ | (HALANT_GROUP CN)* MEDIAL_GROUP HALANT_OR_MATRA_GROUP SYLLABLE_TAIL)
```

> Note: Practically speaking, shaping engines are highly unlikely to
> encounter more than 4 sequential `(HALANT_GROUP CN)`
> instances in any real-word syllables. Thus, implementations may
> choose to limit occurrences by limiting the above expressions to a
> finite length, such as `(HALANT_GROUP CN){0,4}` .

A standalone syllable will match the expression:
```markdown
((_repha_|_consonantwithstacker_)? _placeholder_ | REPH? _dottedcircle_) _nukta_? (HALANT_GROUP CN)* MEDIAL_GROUP HALANT_OR_MATRA_GROUP SYLLABLE_TAIL
```

> Note: Practically speaking, shaping engines are highly unlikely to
> encounter more than 4 sequential `(HALANT_GROUP CN)`
> instances in any real-word syllables. Thus, implementations may
> choose to limit occurrences by limiting the above expressions to a
> finite length, such as `(HALANT_GROUP CN){0,4}` .

> Note: Although they are labeled as "standalone syllables" here,
> many sequences that match the standalone regular expression above
> are instances where a document needs to display a matra, combining
> mark, or special form in isolation. Such sequences might not have
> any significance with regard to the definition of syllables used in
> the language or orthography of the text.

A symbol-based syllable will match the expression:
```markdown
S SYLLABLE_TAIL
```

A broken syllable will match the expression:
```markdown
REPH? _nukta_? (HALANT_GROUP CN)* MEDIAL_GROUP HALANT_OR_MATRA_GROUP SYLLABLE_TAIL
```

> Note: Practically speaking, shaping engines are highly unlikely to
> encounter more than 4 sequential `(HALANT_GROUP CN)`
> instances in any real-word syllables. Thus, implementations may
> choose to limit occurrences by limiting the above expressions to a
> finite length, such as `(HALANT_GROUP CN){0,4}` .


The primary problem involved in shaping broken syllables is the lack
of a syllable base (either a base consonant or an independent
vowel). Without a syllable base, the shaping engine cannot perform
<abbr title="Glyph Positioning table">GPOS</abbr> positioning and other contextual operations that are required
later in the shaping process.

To make up for this limitation, shaping engines should insert a
dotted-circle placeholder (`U+25CC`) character into the text stream
where the missing syllable base was expected to occur. This
placeholder allows the shaping process to proceed on a best-effort
basis at handling the broken-syllable sequence, but making guarantees
about the orthographic correctness or preferred appearance of the
final result is out of scope for this document.

Shaping engines can perform this dotted-circle insertion at any point
after the broken syllable has been recognized and before <abbr title="Glyph Substitution table">GSUB</abbr> features
are applied. However, the best results will likely be attained by
performing the insertion immediately, before proceeding to
stage 2. This will enable the maximum number of <abbr title="Glyph Substitution table">GSUB</abbr> and <abbr title="Glyph Positioning table">GPOS</abbr> features
in the active font to be correctly applied to the text run by ensuring
that all reordering, tagging, and sorting algorithms are executed as
usual.

> Note: In software stacks where other text-handling operations, such
> as Unicode normalization and localization, are performed before the
> text run is passed to the shaping engine, there is a potential for
> the dotted-circle insertion to cause unexpected effects.
>
> For example, if a `ccmp` or `locl` feature substitutes the default
> dotted-circle placeholder glyph with a variant glyph of a different
> size or weight for the (`U+25CC`) codepoint, then any shaping engine
> which relies on another software component to handle that
> functionality must take additional care to ensure consistency.


The expressions above use state-machine syntax from the Ragel
state-machine compiler. The operators represent:

```markdown
a* = zero or more copies of a
b+ = one or more copies of b
c? = optional instance of c
d{n} = exactly n copies of d
d{,n} = zero to n copies of d
d{n,} = n or more copies of d
d{n,m} = n to m copies of d
!e = not e
^f = character-level not f
g.h = concatenation of g and h
i|j = i or j
( ) = grouping of expression elements
```


After the syllables have been identified, each of the subsequent 
shaping stages occurs on a per-syllable basis.

### Stage 2: Initial reordering ###

The initial reordering stage is used to relocate glyphs from the
phonetic order in which they occur in a run of text to the
orthographic order in which they are presented visually.

> Note: Primarily, this means moving dependent-vowel (matra) glyphs.
>
> These reordering moves are mandatory. The final-reordering stage
> may make additional moves, depending on the text and on the features
> implemented in the active font.

The syllable should be processed by tagging each glyph with its
intended position based on its ordering category. After all glyphs
have been tagged, the entire syllable should be sorted in stable order,
so that glyphs of the same ordering category remain in the same
relative position with respect to each other.

The final sort order of the ordering categories should be:


	POS_RA_TO_BECOME_REPH
	POS_PREBASE_MATRA
	POS_PREBASE_CONSONANT

	POS_SYLLABLE_BASE
	POS_AFTER_MAIN

	POS_ABOVEBASE_CONSONANT

	POS_BEFORE_SUBJOINED
	POS_BELOWBASE_CONSONANT
	POS_AFTER_SUBJOINED

	POS_BEFORE_POST
	POS_POSTBASE_CONSONANT
	POS_AFTER_POST

	POS_FINAL_CONSONANT
	POS_SMVD


This sort order enumerates all of the possible final positions to
which a codepoint might be reordered, across all of the Indic
scripts. It includes some ordering categories not utilized in
Tamil. 

The basic positions (left to right) are <samp>"Reph"</samp>
(`POS_RA_TO_BECOME_REPH`), dependent vowels (matras) and consonants
positioned before the base consonant or syllable base
(`POS_PREBASE_MATRA` and `POS_PREBASE_CONSONANT`), the base consonant
or syllable base (`POS_SYLLABLE_BASE`), above-base consonants
(`POS_ABOVEBASE_CONSONANT`), below-base consonants
(`POS_BELOWBASE_CONSONANT`), consonants positioned after the base
consonant or syllable base (`POS_POSTBASE_CONSONANT`), syllable-final
consonants (`POS_FINAL_CONSONANT`), and syllable-modifying or Vedic
signs (`POS_SMVD`).

In addition, several secondary positions are defined to handle various
reordering rules that deal with relative, rather than absolute,
positioning. `POS_AFTER_MAIN` means that a character must be
positioned immediately after the syllable base. `POS_BEFORE_SUBJOINED`
and `POS_AFTER_SUBJOINED` mean that a character must be positioned
before or after any below-base consonants, respectively. Similarly,
`POS_BEFORE_POST` and `POS_AFTER_POST` mean that a character must be
positioned before or after any post-base consonants, respectively. 

For shaping-engine implementers, the names used for the ordering
categories matter only in that they are unambiguous. 

For a definition of the "base" consonant, refer to stage 2, step 1, which
follows.

#### Stage 2, step 1: Base consonant ####

The first step is to determine the base consonant of the syllable, if
there is one, and tag it as `POS_SYLLABLE_BASE`.

In a syllable that begins with an independent vowel, the independent
vowel will always serve as the syllable base, and it should be tagged
as `POS_SYLLABLE_BASE`. The shaping engine can then proceed to step 2.

In a standalone sequence or other syllable that begins with a placeholder
or dotted circle, the placeholder or dotted circle will always serve
as the syllable base, and it should be tagged as
`POS_SYLLABLE_BASE`. The shaping engine can then proceed to step 2.

In a syllable that begins with a consonant, the shaping engine must
determine the base consonant by a script-specific algorithm.

> Note: Shaping engines may choose to treat independent-vowel bases 
> like base consonants for the sake of simplicity or code
> reuse.
>
> However, implementations that take this approach should note
> that removing the distinction between base consonants and
> independent-vowel bases entirely may have unintended
> consequences. Making guarantees about the correctness of the results
> or about language-specific tests is out of scope for this document.

The base consonant is defined as the consonant in a consonant-based
syllable that carries the syllable's vowel sound. That vowel sound
will either be provided by the script's inherent vowel (in which case
it is not written with a separate character) or the sound will be designated
by the addition of a dependent-vowel (matra) sign.


<!--- > Because vowel-based syllables will not include consonants and
> because independent vowels do not take on special forms or require
> reordering, many of the steps that follow will involve no
> work for a vowel-based syllable. However, vowel-based syllables must
> still be sorted and their marks handled correctly, and <abbr title="Glyph Substitution table">GSUB</abbr> and <abbr title="Glyph Positioning table">GPOS</abbr>
> lookups must be applied. These steps of the shaping process follow
> the same rules that are employed for consonant-based syllables.
--->

While performing the base-consonant search, shaping engines may
also encounter special-form consonants, including below-base
consonants and post-base consonants. Each of these special-form
consonants must also be tagged (`POS_BELOWBASE_CONSONANT`,
`POS_POSTBASE_CONSONANT`, respectively). 

Any pre-base-reordering consonant (such as a pre-base-reordering <samp>"Ra"</samp>)
encountered during the base-consonant search must be tagged
`POS_POSTBASE_CONSONANT`. 
 
> Note: Shaping engines may choose any method to identify consonants that
> have below-base, post-base, or pre-base-reordering forms while
> executing the above algorithm. For example, one implementation may
> choose to maintain a static table of special-form consonants to
> compare against the text run. Another implementation might examine
> the active font to see if it includes a `blwf`, `pstf`, or `pref`
> lookup in the <abbr title="Glyph Substitution table">GSUB</abbr> table that affects the consonants encountered in
> the syllable.
>
> However, checking for <abbr title="Glyph Substitution table">GSUB</abbr> support ensures that the expected
> behavior is implemented in the active font, and is therefore the
> most reliable approach.


The algorithm for determining the base consonant is

  - If the syllable starts with <samp>"Ra,Halant"</samp> and the syllable contains
    more than one consonant, exclude the starting <samp>"Ra"</samp> from the list of
    consonants to be considered. 
  - Starting from the end of the syllable, move backwards until a consonant is found.
      * If the consonant is the first consonant, stop.
      * If the consonant is preceded by the sequence <samp>"Halant,ZWJ"</samp>, stop.
      * If the consonant has a below-base form, tag it as
        `POS_BELOWBASE_CONSONANT`, then move to the previous consonant. 
      * If the consonant has a post-base form, tag it as
        `POS_POSTBASE_CONSONANT`, then move to the previous consonant. 
      * If the consonant is a pre-base-reordering <samp>"Ra"</samp>, tag it as
        `POS_POSTBASE_CONSONANT`, then move to the previous consonant. 
      * If none of the above conditions is true, stop.
  - The consonant stopped at will be the base consonant.

> Note: The algorithm is designed to work for all Indic
> scripts. However, Tamil does not utilize pre-base-reordering <samp>"Ra"</samp>.

Tamil does not usually incorporate post-base or below-base
consonant forms. However, it is possible for a font to incorporate
them for typographic variation.

> Note: Because Tamil employs the `BLWF_MODE_PRE_AND_POST` shaping
> characteristic, consonants with below-base special forms may occur
> before or after the syllable base. 
> 
> During the base-consonant search, only the <samp>"Halant,_consonant_"</samp> 
> pattern following the syllable base for these below-base forms will
> be encountered. Stage 2, step 5 below ensures that the <samp>"_consonant_,Halant"</samp>
> pattern preceding the syllable base for these below-base forms will
> also be tagged correctly.


#### Stage 2, step 2: Matra decomposition ####

Second, any multi-part dependent vowels (matras) must be decomposed
into their components. Tamil has three multi-part dependent vowels,
"O" (`U+0BCA`), "Oo" (`U+0BCB`), and "Au" (`U+0BCC`). Each
has a canonical decomposition, so this step is unambiguous. 


> "O" (`U+0BCA`) decomposes to "`U+0BC6`,`U+0BBE`"
>
> "Oo" (`U+0BCB`) decomposes to "`U+0BC7`,`U+0BBE`"
> 
> "Au" (`U+0BCC`) decomposes to "`U+0BC6`,`U+0BD7`"


Because this decomposition is a character-level operation, the shaping
engine may choose to perform it earlier, such as during an initial
Unicode-normalization stage. However, all such decompositions must be
completed before the shaping engine begins step three, below.

:::{figure-md}
![Two-part matra decomposition](/images/tamil/tamil-matra-decompose.svg "Two-part matra decomposition"){.shaping-demo .inline-svg .greyscale-svg #tamil-matra-decompose}

Two-part matra decomposition
:::

```{svg-color-toggle-button} tamil-matra-decompose
```


#### Stage 2, step 3: Tag matras ####

Third, all left-side dependent-vowel (matra) signs must be tagged to be
moved to the beginning of the syllable, with `POS_PREBASE_MATRA`.

Above-base dependent-vowel (matra) signs must be tagged with `POS_AFTER_SUBJOINED`.

Right-side dependent-vowel (matra) signs must be tagged with `POS_AFTER_POST`.

Below-base dependent-vowel (matra) signs must be tagged with `POS_AFTER_POST`.

#### Stage 2, step 4: Adjacent marks ####

Fourth, any subsequences of marks that include a <samp>"Nukta"</samp> and a
<samp>"Halant"</samp> or Vedic sign must be reordered so that the <samp>"Nukta"</samp> appears
first.

This means that the subsequence <samp>"Halant,Nukta"</samp> is reordered to
<samp>"Nukta,Halant"</samp> and that the subsequence <samp>"_Vedic_sign_,Nukta"</samp> is
reordered to <samp>"Nukta,_Vedic_sign"</samp>.

For subsequences of affected marks that are longer than two, the
reordering operation must be repeated until the <samp>"Nukta"</samp> is the first
character in the subsequence. No other marks in the subsequence
should be reordered.

This order is canonical in Unicode and is required so that
<samp>"_consonant_,Nukta"</samp> substitution rules from <abbr title="Glyph Substitution table">GSUB</abbr> will be correctly
matched later in the shaping process.

> Note: The Tamil Unicode block does not include a "Nukta"
> codepoint. However, Tamil text may include "Grantha Nukta" (`U+1133C`)
> and other modifier signs from the Grantha Unicode block.
>
> In addition, `<tml2>` text runs in minority languages that
> use the Tamil script may incorporate nukta characters from other
> blocks. Therefore shaping engines must apply the appropriate
> mark-reordering move if a character matching the NUKTA shaping class
> is encountered.


#### Stage 2, step 5: Pre-base consonants ####

Fifth, consonants that occur before the syllable base must be tagged
with `POS_PREBASE_CONSONANT`. Excluding initial <samp>"Ra,Halant"</samp> sequences
that will become <samp>"Reph"</samp>s: 

  - If the consonant has a below-base form, tag it as
          `POS_BELOWBASE_CONSONANT`. 
  - Otherwise, tag it as `POS_PREBASE_CONSONANT`.
  
> Note: Shaping engines may choose any method to identify consonants that
> have below-base, post-base, or pre-base-reordering forms while
> executing the above algorithm. For example, one implementation may
> choose to maintain a static table of special-form consonants to
> compare against the text run. Another implementation might examine
> the active font to see if it includes a `blwf`, `pstf`, or `pref`
> lookup in the <abbr title="Glyph Substitution table">GSUB</abbr> table that affects the consonants encountered in
> the syllable.
>
> However, checking for <abbr title="Glyph Substitution table">GSUB</abbr> support ensures that the expected
> behavior is implemented in the active font, and is therefore the
> most reliable approach.

Tamil does not usually incorporate post-base or below-base
consonant forms. However, it is possible for a font to incorporate
them for typographic variation.

> Note: Because Tamil employs the `BLWF_MODE_PRE_AND_POST` shaping
> characteristic, consonants with below-base special forms may occur
> before or after the syllable base. 
> 
> During the base-consonant search in stage 2, step 1, any instances of the
> <samp>"Halant,_consonant_"</samp>  pattern following the syllable base for these
> below-base forms will be encountered. The tagging in this step
> ensures that the <samp>"_consonant_,Halant"</samp> pattern preceding the syllable
> base for these below-base forms will also be tagged correctly.


#### Stage 2, step 6: Reph ####

Sixth, initial <samp>"Ra,Halant"</samp> sequences that will become <samp>"Reph"</samp>s must be tagged with
`POS_RA_TO_BECOME_REPH`.

Tamil does not use <samp>"Reph"</samp>, so this step will
involve no work when shaping `<tml2>` text. It is included here in order
to maintain compatibility with the other Indic scripts.

#### Stage 2, step 7: Final consonants ####

Seventh, all final consonants must be tagged. Consonants that occur
after the syllable base _and_ after a dependent vowel (matra) sign
must be tagged with  `POS_FINAL_CONSONANT`.

> Note: Final consonants occur only in Sinhala and should not be
> expected in `<bng2>` text runs. This step is included here to
> maintain compatibility across Indic scripts.

#### Stage 2, step 8: Mark tagging ####

Eighth, all marks must be tagged. 

> Note: In this step, joiner and non-joiner characters must also be
> tagged according to the same rules given for marks, even though
> these characters are not categorized as marks in Unicode.

Marks in the `BINDU`, `VISARGA`, `AVAGRAHA`, `CANTILLATION`,
`SYLLABLE_MODIFIER`, `GEMINATION_MARK`, and `SYMBOL` categories should
be tagged with `POS_SMVD`. 

All <samp>"Nukta"</samp>s must be tagged with the same positioning tag as the
preceding consonant, independent vowel, placeholder, or dotted circle.

All remaining marks (not in the `POS_SMVD` category and not <samp>"Nukta"</samp>s)
must be tagged with the same positioning tag as the closest non-mark
character the mark has affinity with, so that they move together 
during the sorting step.

There are two possible cases: those marks before the syllable base
and those marks after the syllable base. In addition, an exception is
made for <samp>"Halant"</samp> marks that follow a left-side (pre-base) matra.

  1. Initially, all remaining marks should be tagged with the same
	 positioning tag as the closest preceding consonant.

  2. For each consonant after the syllable base (such as post-base
	 consonants, below-base consonants, or final consonants), all
	 remaining marks located between that current consonant and any
	 previous consonant should be tagged with the same positioning tag as
	 the current (later) consonant.
  
     In other words, all consonants preceding the syllable base "own" the
	 marks that follow them, while all consonants after the syllable base
	 "own" the marks that come before them. When a syllable does not have
	 any consonants after the syllable base, the syllable base should
	 "own" all the marks that follow it.
  
  3. Finally, <samp>"Halant"</samp> marks that follow a left-side dependent vowel
     (matra) should _not_ be tagged with the left-side matra's
     positioning tag. Instead, the <samp>"Halant"</samp> should be tagged with the
     positioning tag of the non-mark character preceding the left-side
     matra. This prevents the <samp>"Halant"</samp> mark from being moved with the
     left-side matra when the syllable is sorted.


<!--- HarfBuzz also tags everything between a post-base consonant or -->
<!--matra and another post-base consonant as belonging to the latter -->
<!--post-base consonant. --->


#### Stage 2, step 9: Sort syllable ####

With these steps completed, the syllable can be sorted into the final
sort order as listed at the beginning of stage 2.

The glyphs in the syllable should be sorted in stable order,
so that glyphs of the same ordering category remain in the same
relative position with respect to each other.


#### Stage 2, step 10: Flag sequences for possible feature applications ####

With the initial reordering complete, those glyphs in the syllable that
may have <abbr title="Glyph Substitution table">GSUB</abbr> or <abbr title="Glyph Positioning table">GPOS</abbr> features applied in stages 3, 5, and 6 should be
flagged for each potential feature. 

This flagging is preliminary; the set of potential features varies
between different scripts and which features are supported varies
between fonts. It is also possible that the application of
one feature on a glyph sequence will perform a substitution that makes
a later feature no longer applicable to the updated sequence.

Consequently, the flagging must be completed before shaping proceeds
to the stages during which features are applied.

Some shaping features, such as `locl`, can potentially apply to any
glyphs. Therefore it is not necessary to maintain a separate flag for
these features in the bitmask (or other data structure) used to track
the flags -- although shaping engines may do so if desired.

The sequences to flag are summarized in the list below; a full
description of each feature's function and interpretation is provided
in <abbr title="Glyph Substitution table">GSUB</abbr> and <abbr title="Glyph Positioning table">GPOS</abbr> application stages that follow.

  - `nukt` should match <samp>"_Consonant_,Nukta"</samp> sequences
  - `akhn` should match <samp>"Ka,Halant,Ssa"</samp>
  - `pref` should match <samp>"_Consonant_,Halant"</samp> sequences in
            pre-base position but _not_ match <samp>"Ra,Halant"</samp> sequences
            flagged for `rphf`
  - `blwf` should match <samp>"Halant,_Consonant_"</samp> in
            post-base positions and <samp>"_Consonant_,Halant"</samp> in
            non-initial pre-base positions 
  - `abvf` should match initial <samp>"_Consonant_,Halant"</samp> sequences but _not_ match
  - `half` should match <samp>"_Consonant_,Halant"</samp> in pre-base position but
           _not_ match <samp>"Ra,Halant"</samp> sequences flagged for `rphf` and
           _not_ match <samp>"_Consonant_,Halant,ZWNJ,_Consonant_"</samp> sequences
  - `pstf` should match <samp>"Halant,_Consonant_"</samp> in post-base position
  - `cjct` should match <samp>"_Consonant_,Halant,_Consonant_"</samp> but _not_
            match <samp>"_Consonant_,Halant,ZWJ,_Consonant_"</samp> or
            <samp>"_Consonant_,Halant,ZWNJ,_Consonant_"</samp>


### Stage 3: Applying the basic substitution features from <abbr>GSUB</abbr> ###

The basic-substitution stage applies mandatory substitution features
using the rules in the font's <abbr title="Glyph Substitution table">GSUB</abbr> table. In preparation for this
stage, glyph sequences should be flagged for possible application 
of <abbr title="Glyph Substitution table">GSUB</abbr> features in stage 2, step 10.

The order in which these substitutions must be performed is fixed for
all Indic scripts:

	locl
	nukt 
	akhn
	rphf (not used in Tamil) 
	rkrf (not used in Tamil)
	pref 
	blwf 
	abvf 
	half
	pstf 
	vatu (not used in Tamil)
	cjct 
	cfar (not used in Tamil)

#### Stage 3, step 1: locl ####

The `locl` feature replaces default glyphs with any language-specific
variants, based on examining the language setting of the text run.

> Note: Strictly speaking, the use of localized-form substitutions is
> not part of the shaping process, but of the localization process,
> and could take place at an earlier point while handling the text
> run. However, shaping engines are expected to complete the
> application of the `locl` feature before applying the subsequent
> <abbr title="Glyph Substitution table">GSUB</abbr> substitutions in the following steps.

#### Stage 3, step 2: nukt ####


The `nukt` feature replaces <samp>"_Consonant_,Nukta"</samp> sequences with a
precomposed nukta-variant of the consonant glyph. 

> Note: The Tamil Unicode block does not include a "Nukta"
> codepoint. However, Tamil text may include "Grantha Nukta" (`U+1133C`)
> from the Grantha Unicode block.
>
> In addition, `<tml2>` text runs in minority languages that
> use the Tamil script may incorporate nukta characters from other
> blocks. Therefore shaping engines must apply the `nukt` feature if
> it is used in the active font.

  - The context defined for a `nukt` feature is:

:::{table} `nukt` feature context
    
| Backtrack     | Matching sequence             | Lookahead     |
|:--------------|:------------------------------|:--------------|
| _none_        | `_consonant_`(full),`_nukta_` | _none_        |
:::


#### Stage 3, step 3: akhn ####

The `akhn` feature replaces one specific sequence with a required ligature. 

  - <samp>"Ka,Halant,Ssa"</samp> is substituted with the <samp>"KSsa"</samp> ligature. 
  
These sequences can occur anywhere in a syllable. The <samp>"KSsa"</samp> 
character has orthographic status equivalent to full
consonants in some languages, and fonts may have `cjct` substitution
rules designed to match them in subsequences. Therefore, this
feature must be applied before all other many-to-one substitutions.

  - The context defined for an `akhn` feature is:

:::{table} `akhn` feature context
    
| Backtrack     | Matching sequence           | Lookahead     |
|:--------------|:----------------------------|:--------------|
| _none_        | `AKHAND_CONSONANT_SEQUENCE` | _none_        |
:::


:::{figure-md}
![Akhand KSsa formation](/images/tamil/tamil-akhn-kssa.svg "Akhand KSsa formation"){.shaping-demo .inline-svg .greyscale-svg #tamil-akhn-kssa}

Akhand KSsa formation
:::

```{svg-color-toggle-button} tamil-akhn-kssa
```


#### Stage 3, step 4: rphf ####

> This feature is not used in Tamil.

  - The context defined for a `rphf` feature is:

:::{table} `rphf` feature context
    
| Backtrack        | Matching sequence       | Lookahead     |
|:-----------------|:------------------------|:--------------|
| `SYLLABLE_START` | "Ra"(full),`_halant_`   | _none_        |
:::

	
#### Stage 3, step 5: rkrf ####

> This feature is not used in Tamil.


#### Stage 3, step 6: pref ####

The `pref` feature replaces pre-base-reordering consonant glyphs with
any special forms.

The substitution of the nominal glyph for its special form takes place
at this stage. However, the actual reordering move is performed later,
in stage 4, step 4.

> Note: Tamil does not usually incorporate pre-base-consonant forms, but it is
> possible for a font to implement them in order to provide for
> desired typographic variation.


#### Stage 3, step 7: blwf ####

The `blwf` feature replaces below-base-consonant glyphs with any
special forms. 

> Note: Tamil does not usually incorporate below-base-consonant forms, but it is
> possible for a font to implement them in order to provide for
> desired typographic variation.


Because Tamil incorporates the `BLWF_MODE_PRE_AND_POST` shaping
characteristic, any pre-base consonants and any post-base consonants
may potentially match a `blwf` substitution; therefore, both cases must
be flagged for comparison. Note that this is not necessarily the case in other
Indic scripts that use a different `BLWF_MODE_` shaping
characteristic. 


#### Stage 3, step 8: abvf ####

The `abvf` feature replaces above-base-consonant glyphs with any
special forms. 

> Note: Tamil does not usually incorporate above-base-consonant forms, but it is
> possible for a font to implement them in order to provide for
> desired typographic variation.


#### Stage 3, step 9: half ####

The `half` feature replaces <samp>"_Consonant_,Halant"</samp> sequences before the
base consonant or syllable base with "half forms" of the consonant
glyphs.

In the most common case, this substitution applies to
<samp>"_Consonant_,Halant"</samp> sequences that are followed by another
<samp>"_Consonant_"</samp>.

In addition, a sequence matching <samp>"_Consonant_,Halant,ZWJ"</samp> must also be
flagged for potential `half` substitutions.

> Note: The presence of the <samp>"ZWJ"</samp> at the end of the sequence means
> that the sequence may match the regular-expression test in stage 1
> as the end of a syllable, even without being followed by a base
> consonant or syllable base.
>
> The fact that the regular-expression tests identify a syllable break
> after the <samp>"_Consonant_,Halant,ZWJ"</samp> is a byproduct of OpenType
> shaping and Unicode encoding, however, and might not have any
> significance with regard to the definition of syllables used in the
> language or orthography of the text.

There are two exceptions to the default behavior, for which the
shaping engine must test:

  - Initial <samp>"Ra,Halant"</samp> sequences, which should have been flagged for
    the `rphf` feature earlier, must not be flagged for potential
    `half` substitutions.

  - A sequence matching <samp>"_Consonant_,Halant,ZWNJ,_Consonant_"</samp> must not be
    flagged for potential `half` substitutions.


> Note: Tamil does not usually incorporate half forms, but it is
> possible for a font to implement them in order to provide for
> desired typographic variation. For example, a font may substitute a
> ligature of the <samp>"_Consonant_"</samp> and <samp>"Halant"</samp> glyphs.

:::{figure-md}
![half-form feature application](/images/tamil/tamil-half.svg "half-form feature application"){.shaping-demo .inline-svg .greyscale-svg #tamil-half}

half-form feature application
:::

```{svg-color-toggle-button} tamil-half
```


#### Stage 3, step 10: pstf ####

The `pstf` feature replaces post-base-consonant glyphs with any
special forms. 

> Note: Tamil does not usually incorporate post-base-consonant forms, but it is
> possible for a font to implement them in order to provide for
> desired typographic variation.


#### Stage 3, step 11: vatu ####

> This feature is not used in Tamil.

#### Stage 3, step 12: cjct ####

The `cjct` feature replaces sequences of adjacent consonants with
conjunct ligatures. These sequences must match <samp>"_Consonant_,Halant,_Consonant_"</samp>.

A sequence matching <samp>"_Consonant_,Halant,ZWJ,_Consonant_"</samp> or
<samp>"_Consonant_,Halant,ZWNJ,_Consonant_"</samp> must not be flagged to form a conjunct.

> Note: The presence of the <samp>"ZWJ"</samp> in a
> <samp>"_Consonant_,Halant,ZWJ,_Consonant_"</samp> sequence should automatically
> inhibit any `cjct` feature rules from matching the sequence as valid
> input, and thus prevent the `cjct` substitution from being applied.

> Note: The presence of the <samp>"ZWNJ"</samp> in a
> <samp>"_Consonant_,Halant,ZWNJ,_Consonant_"</samp> sequence means that the
> <samp>"_Consonant_,Halant,ZWNJ"</samp> subsequence will match the
> regular-expression test in stage 1 as the end of a syllable.
> 
> Because OpenType shaping features in `<tml2>` are defined as
> applying only within an individual syllable, this means that the
> presence of the <samp>"ZWNJ"</samp> will automatically prevent the application of
> a `cjct` feature by triggering the identification of a syllable
> break between the two consonants.
>
> The fact that the regular-expression tests identify a syllable break
> after the <samp>"_Consonant_,Halant,ZWNJ"</samp> is a byproduct of OpenType
> shaping and Unicode encoding, however, and might not have any
> significance with regard to the definition of syllables used in the
> language or orthography of the text.
>
> Note, also: The presence of the <samp>"ZWJ"</samp> means that a
> <samp>"_Consonant_,Halant,ZWJ"</samp> sequence may match the regular-expression
> test in stage 1 as the end of a syllable, even without being
> followed by a base consonant or syllable base. By definition,
> however, a <samp>"_Consonant_,Halant,ZWJ"</samp> syllable identified in stage 1
> cannot also include a <samp>"_Consonant_"</samp> after the <abbr title="Zero-Width Joiner">ZWJ</abbr>.

The font's <abbr title="Glyph Substitution table">GSUB</abbr> rules might be implemented so that `cjct`
substitutions apply to half-form consonants; therefore, this feature
must be applied after the `half` feature. 


> Note: Tamil does not usually incorporate conjunct forms, but it is
> possible for a font to implement them in order to provide for
> desired typographic variation.


:::{figure-md}
![Conjunct formation](/images/tamil/tamil-cjct.svg "Conjunct formation"){.shaping-demo .inline-svg .greyscale-svg #tamil-cjct}

Conjunct formation
:::

```{svg-color-toggle-button} tamil-cjct
```


#### Stage 3, step 13: cfar ####

> This feature is not used in Tamil.


### Stage 4: Final reordering ###

The final reordering stage repositions marks, dependent-vowel (matra)
signs, and <samp>"Reph"</samp> glyphs to the appropriate location with respect to
the base consonant or syllable base. Because multiple substitutions
may have occurred during the application of the basic-shaping features
in the preceding stage, these repositioning moves could not be
performed during the initial reordering stage.

Like the initial reordering stage, the steps involved in this stage
occur on a per-syllable basis.

<!--- Check that classifications have not been mangled. If the -->
<!--character is a Halant AND a ligature was formed AND a multiple
substitution was performed, restore the classification to VIRAMA
because it was almost certainly lost in the preceding <abbr title="Glyph Substitution table">GSUB</abbr> stage.
--->

#### Stage 4, step 1: Base consonant ####

The final reordering stage, like the initial reordering stage, begins
with determining the syllable base of each syllable, following the
same algorithm used in stage 2, step 1.

In a syllable that begins with an independent vowel, the independent
vowel will always serve as the syllable base. In a standalone sequence or
other syllable that begins with a placeholder or a dotted circle, the
placeholder or dotted circle will always serve as the syllable base.

In a syllable that begins with a consonant, the shaping engine must
repeat the base-consonant search algorithm used in stage 2, step 1.

The codepoint of the underlying base consonant or syllable base will
not change between the search performed in stage 2, step 1, and the
search repeated here. However, the application of <abbr title="Glyph Substitution table">GSUB</abbr> shaping
features in stage 3 means that several ligation and many-to-one
substitutions may have taken place. The final glyph produced by that
process may, therefore, be a conjunct or ligature form — in most
cases, such a glyph will not have an assigned Unicode codepoint.
   
#### Stage 4, step 2: Pre-base matras ####

Pre-base dependent vowels (matras) that were reordered during the
initial reordering stage must be moved to their final position. This
position is defined as:

   - after any ligature glyphs that resulted from the substitution of
     a <samp>"_Consonant_,Halant,ZWJ"</samp> subsequence
   - after the last standalone <samp>"Halant"</samp> glyph that comes after the
     matra's starting position and also comes before the main
     consonant.
   - If a zero-width joiner follows this last standalone <samp>"Halant"</samp>, the
     final matra position is moved to after the joiner.

This means that the matra will move to the right of all explicit
<samp>"consonant,Halant"</samp> subsequences and all glyphs that resulted from a
substitution on a <samp>"_Consonant_,Halant,ZWJ"</samp> subsequence, but will stop
to the left of the base consonant or syllable base, and all conjuncts
or ligatures that contain the base consonant or syllable base.

:::{figure-md}
![Pre-base matra positioning](/images/tamil/tamil-matra-position.svg "Pre-base matra positioning"){.shaping-demo .inline-svg .greyscale-svg #tamil-matra-position}

Pre-base matra positioning
:::

```{svg-color-toggle-button} tamil-matra-position
```

> Note: OpenType and Unicode both state that if the syllable includes
> a <abbr title="Zero-Width Joiner">ZWJ</abbr> immediately after the last <samp>"Halant"</samp>, then the final matra
> position should be after the <abbr title="Zero-Width Joiner">ZWJ</abbr>.
>
> However, there are several test sequences indicating that
> Microsoft's Uniscribe shaping engine did not follow this rule (in,
> at least, Devanagari and Bengali text), and in these circumstances
> Uniscribe instead makes the final matra position before the final
> <samp>"Consonant,Halant,ZWJ"</samp>.
>
> Subsequently, the HarfBuzz shaping engine has also followed the same
> pattern. If other shaping engine implementations prefer to maintain
> maximum compatibility with Uniscribe and HarfBuzz, then they should
> also follow suit.

> Note: The Microsoft script-development specifications for OpenType
> shaping also state that if a zero-width non-joiner follows the last
> standalone <samp>"Halant"</samp>, the final matra position is moved to after the
> non-joiner. However, it is unnecessary to test for this condition,
> because a <samp>"Halant,ZWNJ"</samp> subsequence is, by definition, the end of a
> syllable. Consequently, a <samp>"Halant,ZWNJ"</samp> cannot be followed by a
> pre-base dependent vowel.


#### Stage 4, step 3: Reph ####

<samp>"Reph"</samp> must be moved from the beginning of the syllable to its final
position. Because Tamil incorporates the `REPH_POS_AFTER_POST`
shaping characteristic, this final position is immediately after
any post-base consonant forms.


The algorithm for finding the final <samp>"Reph"</samp> position is

  - Move the <samp>"Reph"</samp> to the position immediately before
    the first post-base matra, syllable modifier, or Vedic sign that
    has a positioning tag after the script's <samp>"Reph"</samp> position in the
    syllable sort order (as listed in [stage
    2](#stage-2-initial-reordering)). This will be the final <samp>"Reph"</samp>
    position. 
	> Note: Because Tamil incorporates the
    > `REPH_POS_AFTER_POST` shaping characteristic, this means
    > any positioning tag of `POS_FINAL_CONSONANT` or later,
    > although a post-base matra, syllable modifier, or Vedic sign
    > would not typically be tagged with `POS_FINAL_CONSONANT`.
  - If no other location has been located in the previous step, move
    the <samp>"Reph"</samp> to the end of the syllable.


Finally, if the final position of <samp>"Reph"</samp> occurs after a
<samp>"_matra_,Halant"</samp> subsequence, then <samp>"Reph"</samp> must be repositioned to the
left of <samp>"Halant"</samp>, to allow for potential matching with `abvs` or
`psts` substitutions from <abbr title="Glyph Substitution table">GSUB</abbr>.

Tamil does not use <samp>"Reph"</samp>, so this step will involve no work when
processing `<tml2>` text. It is included here in order to maintain
compatibility with the other Indic scripts. 


#### Stage 4, step 4: Pre-base-reordering consonants ####

Any pre-base-reordering consonants must be moved to immediately before
the base consonant or syllable base.
  
Tamil does not use pre-base-reordering consonants, so this step will
involve no work when processing `<tml2>` text. It is included here in order
to maintain compatibility with the other Indic scripts.
  
#### Stage 4, step 5: Initial matras ####

Any left-side dependent vowels (matras) that are at the start of a
word must be flagged for potential substitution by the `init` feature
of <abbr title="Glyph Substitution table">GSUB</abbr>.

Tamil does not use the `init` feature, so this step will
involve no work when processing `<tml2>` text. It is included here in
order to maintain compatibility with the other Indic scripts.


### Stage 5: Applying all remaining substitution features from <abbr>GSUB</abbr> ###

In this stage, the remaining substitution features from the <abbr title="Glyph Substitution table">GSUB</abbr> table
are applied. In preparation for this stage, glyph sequences should be
flagged for possible application of <abbr title="Glyph Substitution table">GSUB</abbr> features in stage 2,
step 10.

The order in which these features are applied is not canonical; they
should be applied in the order in which they appear in the <abbr title="Glyph Substitution table">GSUB</abbr> table
in the font.

	init (not used in Tamil)
	pres
	abvs
	blws
	psts
	haln

The `init` feature is not used in Tamil.

The `pres` feature replaces pre-base-consonant glyphs with special
presentations forms. This can include consonant conjuncts, half-form
consonants, and stylistic variants of left-side dependent vowels
(matras). 

:::{figure-md}
![pres feature application](/images/tamil/tamil-pres.svg "pres feature application"){.shaping-demo .inline-svg .greyscale-svg #tamil-pres}

pres feature application
:::

```{svg-color-toggle-button} tamil-pres
```

The `abvs` feature replaces above-base-consonant glyphs with special
presentation forms. This usually includes contextual variants of
above-base marks or contextually appropriate mark-and-base ligatures.

:::{figure-md}
![abvs feature application](/images/tamil/tamil-abvs.svg "abvs feature application"){.shaping-demo .inline-svg .greyscale-svg #tamil-abvs}

abvs feature application
:::

```{svg-color-toggle-button} tamil-abvs
```

The `blws` feature replaces below-base-consonant glyphs with special
presentation forms. This usually includes replacing base consonants or
syllable bases that
are adjacent to the below-base marks with contextually appropriate
ligatures.


The `psts` feature replaces post-base-consonant glyphs with special
presentation forms. This usually includes replacing right-side
dependent vowels (matras) with stylistic variants or replacing
post-base-consonant/matra pairs with contextual ligatures. 

:::{figure-md}
![psts feature application](/images/tamil/tamil-psts.svg "psts feature application"){.shaping-demo .inline-svg .greyscale-svg #tamil-psts}

psts feature application
:::

```{svg-color-toggle-button} tamil-psts
```

The `haln` feature replaces syllable-final <samp>"_Consonant_,Halant"</samp> pairs with
special presentation forms. This can include stylistic variants of the
consonant where placing the <samp>"Halant"</samp> mark on its own is
typographically problematic. 

:::{figure-md}
![haln feature application](/images/tamil/tamil-haln.svg "haln feature application"){.shaping-demo .inline-svg .greyscale-svg #tamil-haln}

haln feature application
:::

```{svg-color-toggle-button} tamil-haln
```

> Note: The `calt` feature, which allows for generalized application
> of contextual alternate substitutions, is usually applied at this
> point. However, `calt` is not mandatory for correct Tamil shaping
> and may be disabled in the application by user preference.

### Stage 6: Applying remaining positioning features from <abbr>GPOS</abbr> ###

In this stage, mark positioning, kerning, and other <abbr title="Glyph Positioning table">GPOS</abbr> features are
applied.

As with the preceding stage, the order in which these features are
applied is not canonical; they should be applied in the order in which
they appear in the <abbr title="Glyph Positioning table">GPOS</abbr> table in the font.

        dist
        abvm
        blwm

> Note: The `kern` feature is usually applied at this stage, if it is
> present in the font. However, `kern` (like `calt`, above) is not
> mandatory for shaping Tamil text and may be disabled by user preference.

The `dist` feature adjusts the horizontal positioning of
glyphs. Unlike `kern`, adjustments made with `dist` do not require the
application or the user to enable any software _kerning_ features, if
such features are optional. 

:::{figure-md}
![Distance application](/images/tamil/tamil-dist.svg "Distance application"){.shaping-demo .inline-svg .greyscale-svg #tamil-dist}

Distance application
:::

```{svg-color-toggle-button} tamil-dist
```

The `abvm` feature positions above-base marks for attachment to base
characters. In Tamil, this includes above-base dependent vowels
(matras), diacritical marks, and Vedic signs.

:::{figure-md}
![abvm feature application](/images/tamil/tamil-abvm.svg "abvm feature application"){.shaping-demo .inline-svg .greyscale-svg #tamil-abvm}

abvm feature application
:::

```{svg-color-toggle-button} tamil-abvm
```


The `blwm` feature positions below-base marks for attachment to base
characters. In Tamil, this includes below-base diacritical marks.


## The `<taml>` shaping model ##

The older Tamil script tag, `<taml>`, has been deprecated. However,
shaping engines may still encounter fonts that were built to work with
`<taml>` and some users may still have documents that were written to
take advantage of `<taml>` shaping.

### Distinctions from `<tml2>` ###

The most significant distinction between the shaping models is that the
sequence of <samp>"Halant"</samp> and consonant glyphs used to trigger shaping
features) was altered when migrating from `<taml>` to
`<tml2>`. 

Specifically, shaping engines were expected to reorder post-base
<samp>"Halant,_Consonant_"</samp> sequences to <samp>"_Consonant_,Halant"</samp>.

As a result, a font's <abbr title="Glyph Substitution table">GSUB</abbr> substitutions would be written to match
<samp>"_Consonant_,Halant"</samp> sequences in all pre-base and post-base positions.


The `<taml>` syllable

	Pre-baseC Halant BaseC Halant Post-baseC

would be reordered to

	Pre-baseC Halant BaseC Post-baseC Halant

before features are applied.

In `<tml2>` text, as described above in this document, there is no
such reordering. The correct sequence to match for <abbr title="Glyph Substitution table">GSUB</abbr> substitutions is
<samp>"_Consonant_,Halant"</samp> for pre-base consonants, but <samp>"Halant,_Consonant_"</samp>
for post-base consonants.

The old Indic shaping model also did not recognize the
`BLWF_MODE_PRE_AND_POST` shaping characteristic. Instead, `<taml>`
was treated as if it followed the `BLWF_MODE_POST_ONLY`
characteristic. In other words, below-base form substitutions were
only applied to consonants after the base consonant or syllable base.


### Advice for handling fonts with `<taml>` features only ###

Shaping engines may choose to match post-base <samp>"_Consonant_,Halant"</samp>
sequences in order to apply <abbr title="Glyph Substitution table">GSUB</abbr> substitutions when it is known that
the font in use supports only the `<taml>` shaping model.

### Advice for handling text runs composed in `<taml>` format ###

Shaping engines may choose to match post-base <samp>"_Consonant_,Halant"</samp>
sequences for <abbr title="Glyph Substitution table">GSUB</abbr> substitutions or to reorder them to
<samp>"Halant,_Consonant_"</samp> when processing text runs that are tagged with
the `<taml>` script tag and it is known that the font in use supports
only the `<tml2>` shaping model.

Shaping engines may also choose to apply `blwf` substitutions to
below-base consonants occurring before the base consonant or syllable base when it is
known that the font in use supports an applicable substitution lookup.

Shaping engines may also choose to position left-side matras according
to the `<taml>` ordering scheme; however, doing so might interfere
with matching <abbr title="Glyph Substitution table">GSUB</abbr> or <abbr title="Glyph Positioning table">GPOS</abbr> features.


================================================
FILE: opentype-shaping-telugu.md
================================================
```{include} /_global.md
```

# Telugu shaping in OpenType #

This document details the shaping procedure needed to display text
runs in the Telugu script.


**Contents**

  - [General information](#general-information)
  - [Terminology](#terminology)
  - [Glyph classification](#glyph-classification)
      - [Shaping classes and subclasses](#shaping-classes-and-subclasses)
      - [Telugu character tables](#telugu-character-tables)
  - [The `<tel2>` shaping model](#the-tel2-shaping-model)
      - [Stage 1: Identifying syllables and other sequences](#stage-1-identifying-syllables-and-other-sequences)
      - [Stage 2: Initial reordering](#stage-2-initial-reordering)
      - [Stage 3: Applying the basic substitution features from <abbr>GSUB</abbr>](#stage-3-applying-the-basic-substitution-features-from-gsub)
      - [Stage 4: Final reordering](#stage-4-final-reordering)
      - [Stage 5: Applying all remaining substitution features from <abbr>GSUB</abbr>](#stage-5-applying-all-remaining-substitution-features-from-gsub)
      - [Stage 6: Applying remaining positioning features from <abbr>GPOS</abbr>](#stage-6-applying-remaining-positioning-features-from-gpos)
  - [The `<telu>` shaping model](#the-telu-shaping-model)
      - [Distinctions from `<tel2>`](#distinctions-from-tel2)
      - [Advice for handling fonts with `<telu>` features only](#advice-for-handling-fonts-with-telu-features-only)
      - [Advice for handling text runs composed in `<telu>` format](#advice-for-handling-text-runs-composed-in-telu-format)


## General information ##

The Telugu script belongs to the Indic family, and follows
the same general patterns as the other Indic scripts. More
specifically, it belongs to the South Indic subgroup, in which
sequences of adjacent consonants are often represented as below-base forms.

The Telugu script is used to write multiple languages, most commonly
Telugu and Gondi. In addition, Sanskrit may be written
in Telugu, so Telugu script runs may include glyphs from the Vedic
Extensions block of Unicode. 

There are two extant Telugu script tags defined in OpenType, `<telu>`
and `<tel2>`. The older script tag, `<telu>`, was deprecated in 2005.
Therefore, new fonts should be engineered to work with the `<tel2>`
shaping model. However, if a font is encountered that supports only
`<telu>`, the shaping engine should deal with it gracefully.

## Terminology ##

OpenType shaping uses a standard set of terms for Indic scripts.  The
terms used colloquially in any particular language may vary, however,
potentially causing confusion.

**Matra** is the standard term for a dependent vowel sign. 

The term "matra" is also used to refer to the headline in other Indic
scripts, and may be used to describe the distinctive up-tick stroke above most
Telugu letters by comparison. To avoid ambiguity, the term **headline** is
used in most Unicode and OpenType shaping documents.

**Halant** and **Virama** are both standard terms for the below-base "vowel-killer"
sign. Unicode documents use the term "virama" most frequently, while
OpenType documents use the term "halant" most frequently. In the Telugu
language, this sign is known as the _halantamu_.

**Chandrabindu** (or simply **Bindu**) is the standard term for the diacritical mark
indicating that the preceding vowel should be nasalized. In the Telugu
language, this mark is known as the _candrabindu_.

The term **base consonant** is also critical to Indic shaping. The
base consonant of a syllable is the consonant that carries the
syllable's vowel sound, either the inherent vowel (for an unmarked
base consonant) or a dependent vowel (with the addition of a matra).

A syllable's base consonant is generally rendered in its full form
(although it may form ligatures), while other consonants in the
syllable frequently take on secondary forms. Different <abbr title="Glyph Substitution table">GSUB</abbr>
substitutions may apply to a script's **pre-base** and **post-base**
consonants. Some of these substitutions create **above-base** or
**below-base** forms. The **Reph** form of the consonant "Ra" is an
example.

Syllables may also begin with an **independent vowel** instead of a
consonant. In these syllables, the independent vowel is rendered in
full-letter form, not as a matra, and the independent vowel serves as the
syllable base, similar to a base consonant.

Where possible, using the standard terminology is preferred, as the
use of a language-specific term necessitates choosing one language
over all of the others that share a common script.

## Glyph classification ##

Shaping Telugu text depends on the shaping engine correctly
classifying each glyph in the run. As with most other scripts, the
classifications must distinguish between consonants, vowels
(independent and dependent), numerals, punctuation, and various types
of diacritical mark. 

For most codepoints, the `General Category` property defined in the Unicode
standard is correct, but it is not sufficient to fully capture the
expected shaping behavior (such as glyph reordering). Therefore,
Telugu glyphs must additionally be classified by how they are treated
when shaping a run of text.

### Shaping classes and subclasses ###

The shaping classes listed in the tables that follow are defined so
that they capture the positioning rules used by Indic scripts. 

For most codepoints, the _Shaping class_ is synonymous with the `Indic
Syllabic Category` defined in Unicode. However, there are some
distinctions, where the defined category does not fully capture the
behavior of the character in the shaping process.

Several of the diacritic and syllable-modifying marks behave according
to their own rules and, thus, have a special class. These include
`BINDU`, `VISARGA`, `AVAGRAHA`, and `VIRAMA`. Some
less-common marks behave according to rules that are similar to these
common marks, and are therefore classified with the corresponding
common mark. The Vedic Extensions also include a `CANTILLATION`
class for tone marks.

Letters generally fall into the classes `CONSONANT`,
`VOWEL_INDEPENDENT`, and `VOWEL_DEPENDENT`. These classes help the
shaping engine parse and identify key positions in a syllable. For
example, Unicode categorizes dependent vowels as `Mark [Mn]`, but the
shaping engine must be able to distinguish between dependent vowels
and diacritical marks (some of which are also categorized as `Mark [Mn]`).


Other characters, such as symbols and miscellaneous letters (for
example, letter-like symbols that only occur as standalone entities
and do not occur within syllables), need no special attention from the
shaping engine, so they are not assigned a shaping class.

Numbers are classified as `NUMBER`, even though they evoke no special
behavior from the Indic shaping rules, because there are OpenType features that
might affect how the respective glyphs are drawn, such as `tnum`,
which specifies the usage of tabular-width numerals, and `sups`, which
replaces the default glyphs with superscript variants.

Marks and dependent vowels are further labeled with a mark-placement
subclass, which indicates where the glyph will be placed with respect
to the base character to which it is attached. The actual position of
the glyphs is determined by the lookups found in the font's <abbr title="Glyph Positioning table">GPOS</abbr>
table, however, the shaping rules for Indic scripts require that the
shaping engine be able to identify marks by their general
position. 

For example, left-side dependent vowels (matras), classified
with `LEFT_POSITION`, must frequently be reordered, with the final
position determined by whether or not other letters in the syllable
have formed ligatures or combined into conjunct forms. Therefore, the
`LEFT_POSITION` subclass of the character must be tracked throughout
the shaping process.

There are four basic _mark-placement subclasses_ for dependent vowels
(matras). Each corresponds to the visual position of the matra with
respect to the syllable base to which it is attached:

  - `LEFT_POSITION` matras are positioned to the left of the syllable base.
  - `RIGHT_POSITION` matras are positioned to the right of the syllable base.
  - `TOP_POSITION` matras are positioned above the syllable base.
  - `BOTTOM_POSITION` matras are positioned below syllable base.
  
These positions may also be referred to elsewhere in shaping documents as:

  - _Pre-base_ matras
  - _Post-base_ matras
  - _Above-base_ matras
  - _Below-base_ matras
  
respectively. The `LEFT`, `RIGHT`, `TOP`, and `BOTTOM` designations
corresponds to Unicode's preferred terminology. The _Pre_, _Post_,
_Above_, and _Below_ terminology is used in the official descriptions
of OpenType <abbr title="Glyph Substitution table">GSUB</abbr> and <abbr title="Glyph Positioning table">GPOS</abbr> features. Shaping engines may, internally,
use whichever terminology is preferred.

In addition, dependent-vowel codepoints that are composed of multiple
components will be designated in character tables as having a compound
_mark-placement subclass_, such as `TOP_AND_RIGHT` or `LEFT_AND_RIGHT`. 

However, these multi-part matras are decomposed into separate matra
components during the shaping process. After the decomposition, each
matra component will belong to exactly one of the four basic
_mark-placement subclasses_.

For most mark and dependent-vowel codepoints, the _mark-placement
subclass_ is synonymous with the `Indic Positional Category` defined
in Unicode. However, there are some distinctions, where the defined
category does not fully capture the behavior of the character in the
shaping process. 

### Telugu character tables ###

Separate character tables are provided for the Telugu and Vedic
Extensions blocks as well as for other miscellaneous characters that
are used in `<tel2>` text runs:

  - [Telugu character table](character-tables/character-tables-telugu.md#telugu-character-table)
  - [Vedic Extensions character table](character-tables/character-tables-telugu.md#vedic-extensions-character-table)
  - [Miscellaneous character table](character-tables/character-tables-telugu.md#miscellaneous-character-table)

The tables list each codepoint along with its Unicode general
category, its shaping class, and its mark-placement subclass. The
codepoint's Unicode name and an example glyph are also provided.

For example:

:::{table} Example character table

| Codepoint | Unicode category | Shaping class     | Mark-placement subclass    | Glyph                        |
|:----------|:-----------------|:------------------|:---------------------------|:-----------------------------|
|`U+0C01`   | Mark [Mn]        | BINDU             | TOP_POSITION               | &#x0C01; Candrabindu         |
| | | | |
|`U+0C15`   | Letter           | CONSONANT         | _null_                     | &#x0C15; Ka                  |
:::


Codepoints with no assigned meaning are
designated as _unassigned_ in the _Unicode category_ column. 

Assigned codepoints with a _null_ in the _Shaping class_
column evoke no special behavior from the shaping engine.

The _Mark-placement subclass_ column indicates mark-placement
positioning for codepoints in the _Mark_ category. Assigned, non-mark
codepoints have a _null_ in this column and evoke no special
mark-placement behavior. Marks tagged with [Mn] in the _Unicode
category_ column are categorized as non-spacing; marks tagged with
[Mc] are categorized as spacing-combining.

Some codepoints in the tables use a _Shaping class_ that
differs from the codepoint's Unicode _General Category_. The _Shaping
class_ takes precedence during OpenType shaping, as it captures more
specific, script-aware behavior.


#### Special-function codepoints ####

Other important characters that may be encountered when shaping runs
of Telugu text include the dotted-circle placeholder (`U+25CC`), the
zero-width joiner (`U+200D`) and zero-width non-joiner (`U+200C`), and
the no-break space (`U+00A0`).

Each of these is of particular importance to shaping engines, because
these codepoints interact with the shaping engine, the text run, and
the active font, either to mediate non-default shaping behavior or to
relay information about the current shaping process.

The dotted-circle placeholder is frequently used when displaying a
dependent vowel (matra) or a combining mark in isolation. Real-world
text syllables may also use other characters, such as hyphens or dashes,
in a similar placeholder fashion; shaping engines should cope with
this situation gracefully.

Dotted-circle placeholder characters (like any Unicode codepoint) can
appear anywhere in text input sequences and should be rendered
normally. <abbr title="Glyph Positioning table">GPOS</abbr> positioning lookups should attach mark glyphs to dotted
circles as they would to other non-mark characters. As visible glyphs,
dotted circles can also be involved in <abbr title="Glyph Substitution table">GSUB</abbr> substitutions.

In addition to the default input-text handling process, shaping
engines may also insert dotted-circle placeholders into the text
sequence. Dotted-circle insertions are required when a non-spacing
mark or dependent sign is formed with no base character present.

This requirement covers:

  - Dependent signs that are assigned their own individual Unicode
    codepoints (such as most dependent-vowel marks or matras)
  
  - Dependent signs that are formed only by specific sequences of
    other codepoints (such as <samp>"Reph"</samp>)


The zero-width joiner (<abbr>ZWJ</abbr>) is primarily used to prevent the formation
of a conjunct from a <samp>"_Consonant_,Halant,_Consonant_"</samp> sequence.

  - The sequence <samp>"_Consonant_,Halant,ZWJ,_Consonant_"</samp> blocks the
    formation of a conjunct between the two consonants. 

Note, however, that the <samp>"_Consonant_,Halant"</samp> subsequence in the above
example may still trigger a half-forms feature. To prevent the
application of the half-forms feature in addition to preventing the
conjunct, the zero-width non-joiner (<abbr>ZWNJ</abbr>) must be used instead.

  - The sequence <samp>"_Consonant_,Halant,ZWNJ,_Consonant_"</samp> should produce
    the first consonant in its standard form, followed by an explicit
    <samp>"Halant"</samp>. 

A secondary usage of the zero-width joiner is to prevent the formation of
<samp>"Reph"</samp> in some scripts, or to explicitly request a <samp>"Reph"</samp> form in
other scripts.

  - In Telugu, the default behavior for a syllable beginning with
    <samp>"Ra,Halant"</samp> is for the <samp>"Ra"</samp> to be displayed in full form. An
    explicit <samp>"Ra,Halant,ZWJ"</samp> sequence is required to produce a <samp>"Reph"</samp>
    instead of this default behavior.
	
  - In Telugu, a <samp>"Ra,ZWJ,Halant"</samp> sequence will prevent the formation
    of a <samp>"Reph"</samp> form.

The <abbr title="Zero-Width Joiner">ZWJ</abbr> and <abbr title="Zero-Width Non Joiner">ZWNJ</abbr> characters are, by definition, non-printing control
characters and have the _Default_Ignorable_ property in the Unicode
Character Database. In standard text-display scenarios, their function
is to signal a request from the user to the shaping engine for some
particular non-default behavior. As such, they are not rendered
visually.

> Note: Naturally, there are special circumstances where a user or
> document might need to request that a <abbr title="Zero-Width Joiner">ZWJ</abbr> or <abbr title="Zero-Width Non Joiner">ZWNJ</abbr> be rendered
> visually, such as when illustrating the OpenType shaping process, or
> displaying Unicode tables.

Because the <abbr title="Zero-Width Joiner">ZWJ</abbr> and <abbr title="Zero-Width Non Joiner">ZWNJ</abbr> are non-printing control characters, they can
be ignored by any portion of a software text-handling stack not
involved in the shaping operations that the <abbr title="Zero-Width Joiner">ZWJ</abbr> and <abbr title="Zero-Width Non Joiner">ZWNJ</abbr> are designed
to interface with. For example, spell-checking or collation functions
will typically ignore <abbr title="Zero-Width Joiner">ZWJ</abbr> and <abbr title="Zero-Width Non Joiner">ZWNJ</abbr>.

Similarly, the <abbr title="Zero-Width Joiner">ZWJ</abbr> and <abbr title="Zero-Width Non Joiner">ZWNJ</abbr> should be ignored by the shaping engine
when matching sequences of codepoints against the backtrack and
lookahead sequences of a font's <abbr title="Glyph Substitution table">GSUB</abbr> or <abbr title="Glyph Positioning table">GPOS</abbr> lookups.

For example:

  - A lookup that substitutes an alternate version of a
    dependent-vowel (matra) glyph when it is preceded by <samp>"Ka,Halant,Tta"</samp>
    should still be applied if the dependent-vowel codepoint is preceded
    by <samp>"Ka,Halant,ZWJ,Tta"</samp> in the text run.

The no-break space (<abbr>NBSP</abbr>) is primarily used to display those
codepoints that are defined as non-spacing (marks, dependent vowels
(matras), below-base consonant forms, and post-base consonant forms)
in an isolated context, as an alternative to displaying them
superimposed on the dotted-circle placeholder. These sequences will
match <samp>"NBSP,ZWJ,Halant,_Consonant_"</samp>, <samp>"NBSP,_mark_"</samp>, or <samp>"NBSP,_matra_"</samp>.

In addition to general punctuation, runs of Telugu text often use the
danda (`U+0964`) and double danda (`U+0965`) punctuation marks from
the Devanagari block.


## The `<tel2>` shaping model ##

Processing a run of `<tel2>` text involves six top-level stages:

1. Identifying syllables and other sequences
2. Initial reordering
3. Applying the basic substitution features from <abbr>GSUB</abbr>
4. Final reordering
5. Applying all remaining substitution features from <abbr>GSUB</abbr>
6. Applying all remaining positioning features from <abbr>GPOS</abbr>


As with other Indic scripts, the initial reordering stage and the
final reordering stage each involve applying a set of several
script-specific rules. The basic substitution features must be applied
to the run in a specific order. The remaining substitution features in
stage five, however, do not have a mandatory order.

Indic scripts follow many of the same shaping patterns, but they
differ in a few critical characteristics that the shaping engine must
track. These include:

  - The position of the base consonant in a syllable.
  
  - The final position of <samp>"Reph"</samp>.
  
  - Whether <samp>"Reph"</samp> must be requested explicitly or if it is formed by
    a specific, implicit sequence.
	
  - Whether the below-base forms feature is applied only to consonants
    before the syllable base, only to consonants after the base
    consonant, or to both.
	
  - The ordering positions for dependent vowels
    (matras). Specifically, right-side, above-base, and below-base
    matras follow different rules in different scripts. 
	All Indic scripts position left-side matras in the same
    manner, in the ordering position `POS_PREBASE_MATRA`. 

With regard to these common variations, Telugu's specific shaping
characteristics include:

  - `BASE_POS_LAST` = The base consonant of a syllable is the last
     consonant, not counting any consonants with post-base forms.
	 
	 - Telugu differs somewhat from other `BASE_POS_LAST` scripts in
       that all consonants can use post-base forms. Therefore, the
       general base-consonant search algorithm should identify the first
       non-<samp>"Reph"</samp> consonant as the base. This is the expected
       behavior, as it allows the same search algorithm to be used
       with all `BASE_POS_LAST` scripts.

  - `REPH_POS_AFTER_POST` = <samp>"Reph"</samp> is ordered after the last post-base
     consonant form.

  - `REPH_MODE_EXPLICIT` = <samp>"Reph"</samp> is formed by an initial <samp>"Ra,Halant,ZWJ"</samp> sequence.

  - `BLWF_MODE_POST_ONLY` = The below-forms feature is applied only to
     post-base consonants.

  - `MATRA_POS_TOP` = `POS_BEFORE_SUBJOINED`  = Above-base matras are
    ordered before any subjoined (i.e., below-base) consonant forms.

  - `MATRA_POS_RIGHT` = Telugu includes right-side matras that follow two
     different reordering rules. 
	 
	 - Matras "Sign Vocalic R" (`0C43`) and "Sign Vocalic Rr" (`0C44`),
       use `POS_AFTER_SUBJOINED` = These right-side matras are ordered
       after all subjoined (i.e., below-base) consonant forms. 
	   
	 - Matras "Sign U" (`0C41`) and "Sign Uu" (`0C42`) use
       `POS_BEFORE_SUBJOINED` = These right-side matras are ordered before
       all subjoined (i.e., below-base) consonant forms.

  - `MATRA_POS_BOTTOM` = `POS_BEFORE_SUBJOINED` = Below-base matras are
     ordered before the any subjoined (i.e., below-base) consonant forms.

These characteristics determine how the shaping engine must reorder
certain glyphs, how base consonants are determined, and how <samp>"Reph"</samp>
should be encoded within a run of text.


### Stage 1: Identifying syllables and other sequences ###

A syllable in Telugu consists of a valid orthographic sequence
that may be followed by a "tail" of modifier signs. 

> Note: The Telugu Unicode block enumerates five modifier signs,
> "Combining Candrabindu Above" (`U+0C00`), "Candrabindu" (`U+0C01`),
> "Anusvara" (`U+0C02`), "Visarga" (`U+0C03`), and "Avagraha"
> (`U+0C3D`) In addition, Sanskrit text written in Telugu may include
> additional signs from Vedic Extensions block. 

Each syllable contains exactly one vowel sound. Valid syllables may
begin with either a consonant or an independent vowel. 

If the syllable begins with a consonant, then the consonant that
provides the vowel sound is referred to as the "base" consonant. If
the syllable begins with an independent vowel, that independent vowel
is the syllable's only vowel sound and serves as the "base". 

> Note: A consonant that is not accompanied by a dependent vowel (matra) sign
> carries the script's inherent vowel sound. This vowel sound is changed
> by a dependent vowel (matra) sign following the consonant.

From the shaping engine's perspective, the main distinction between a
syllable with a base consonant and a syllable with an
independent-vowel base is that a syllable with an independent-vowel
base is less likely to include additional consonants in special forms
and less likely to include dependent vowel signs
(matras). Therefore, in the common case, vowel-based syllables may
involve less reordering, substitution feature applications, and other
processing than consonant-based syllables.

In some languages and orthographies, vowel-based syllables are
not permitted to include additional consonants or matras, and certain
<abbr title="Glyph Substitution table">GSUB</abbr> substitution features do not occur. However, there are often
known exceptions, and real-world text makes no such guarantees. 

> Note: Shaping engines may choose to treat independent-vowel bases 
> like base consonants for the sake of simplicity or code
> reuse.
>
> However, implementations that take this approach should note
> that removing the distinction between base consonants and
> independent-vowel bases entirely may have unintended
> consequences. Making guarantees about the correctness of the results
> or about language-specific tests is out of scope for this document.


Telugu uses the `BASE_POS_LAST` characteristic mentioned
earlier. However, because all consonants in the script can potentially
take on post-base consonant forms, the outcome of the shaping
characteristic may be counterintuitive.

Generally speaking, the base consonant is the first logical consonant of the
syllable, which is rendered in full form, and any subsequent
consonants are rendered in special post-base forms. 

Each of these post-base consonants will be preceded by the <samp>"Halant"</samp> mark, which
indicates that they carry no vowel. They affect pronunciation by
combining with the base consonant (e.g., "_str_", "_pl_") but they
do not add a vowel sound.

As with other Indic scripts, the consonant <samp>"Ra"</samp> receives special
treatment; in many circumstances it is replaced by a combining
mark-like form. 

  - A <samp>"Ra,Halant,ZWJ"</samp> sequence at the beginning of a syllable is replaced
    with a right-side mark called <samp>"Reph"</samp>. This rule is synonymous with the
    `REPH_MODE_EXPLICIT` characteristic mentioned earlier.
  - A post-base <samp>"Ra"</samp> is reordered to before the base consonant or
    syllable base during the final-reordering stage of the shaping
    process.

<samp>"Reph"</samp> characters must be reordered after the syllable-identification
stage is complete.

> Note: Generally speaking, OpenType fonts will implement support for
> any below-base, post-base, and pre-base-reordering consonant forms
> by including the necessary substitution rules in their `blwf`,
> `pstf`, and `pref` lookups in <abbr title="Glyph Substitution table">GSUB</abbr>.
>
> Consequently, whenever shaping engines need to determine whether or 
> not a given consonant can take on such a special form, the most
> appropriate test is to check if the consonant is included in the
> relevant <abbr title="Glyph Substitution table">GSUB</abbr> lookup. Other implementations are possible, such as
> maintaining static tables of consonants, but checking for <abbr title="Glyph Substitution table">GSUB</abbr>
> support ensures that the expected behavior is implemented in the
> active font, and is therefore the most reliable approach.


In addition to valid syllables, standalone sequences may occur, such
as when an isolated codepoint is shown in example text.

> Note: Foreign loanwords, when written in the Telugu script, may
> not adhere to the syllable-formation rules described above. In
> particular, it is not uncommon to encounter foreign loanwords that
> contain a word-final suffix of consonants.
>
> Nevertheless, such word-final suffixes will be correctly matched by
> the regular expressions listed below. These loanwords are pronounced
> different, which raises issues for potential readers, but the
> character sequences do not affect the shaping process.


Syllables should be identified by examining the run and matching
glyphs, based on their categorization, using regular expressions. 

The following general-purpose Indic-shaping regular expressions can be
used to match Telugu syllables.

The regular expressions utilize the shaping classes from the tables
above. For the purpose of syllable identification, more general
classes can be used, as defined in the following table. This
simplifies the resulting expressions. 

```markdown
_ra_		= The consonant "Ra" 
_consonant_	= ( `CONSONANT` | `CONSONANT_DEAD` ) - _ra_
_vowel_		= `VOWEL_INDEPENDENT`
_nukta_	  	= `NUKTA`
_halant_	= `VIRAMA`
_zwj_		= `JOINER`
_zwnj_		= `NON_JOINER`
_matra_		= `VOWEL_DEPENDENT` | `PURE_KILLER`
_syllablemodifier_	= `SYLLABLE_MODIFIER` | `BINDU` | `VISARGA` | `GEMINATION_MARK`
_vedicsign_	= `CANTILLATION`
_placeholder_	= `PLACEHOLDER` | `CONSONANT_PLACEHOLDER` | `NUMBER`
_dottedcircle_	= `DOTTED_CIRCLE`
_repha_		= `CONSONANT_PRE_REPHA`
_consonantmedial_	= `CONSONANT_MEDIAL`
_symbol_	= `SYMBOL` | `AVAGRAHA`
_consonantwithstacker_	= `CONSONANT_WITH_STACKER`
_other_		= `OTHER` | `MODIFYING_LETTER`
```


> Note: the _ra_ identification class is mutually exclusive with 
> the _consonant_ class. The union of the _consonant_ and _ra_ classes
> is used in the regular expression elements below in order to
> correctly identify <samp>"Ra"</samp> characters that do not trigger <samp>"Reph"</samp> or
> <samp>"Rakaar"</samp> shaping behavior.
>
> Note, also, that the cantillation mark "combining Ra" in the
> Devanagari Extended block does _not_ belong to the _ra_
> identification class, and that the other "combining consonant"
> cantillation marks in the Devanagari Extended block do not belong to
> the _consonant_ identification class.

> Note: The _placeholder_ identification class includes codepoints
> that are often used in place of vowels or consonants when a document
> needs to display a matra, mark, or special form in isolation or
> in another context beyond a standard syllable. Examples of
> _placeholder_ codepoints include hyphens and non-breaking
> spaces. Sequences that utilize this approach should be identified as
> "standalone" syllables.
>
> The _placeholder_ identification class also includes numerals, which
> are commonly used as word substitutes within normal text. Examples
> include ordinals (e.g., "4th").

> Note: The _other_ identification class includes codepoints that
> do not interact with adjacent characters for shaping purposes. Even
> though some of these codepoints (such as `MODIFYING_LETTER`) can
> occur within words, they evoke no behavior from the shaping
> engine and do not factor into the regular expressions that
> follow. Therefore, the shaping engine may choose to ignore them
> during syllable identification; they are listed here for completeness.

These identification classes form the bases of the following regular
expression elements:

```markdown
C	= _consonant_ | _ra_
Z	= _zwj_ | _zwnj_
REPH	= (_ra_ _halant_) | _repha_
CN		= C _zwj_? _nukta_?
FORCED_RAKAR	= _zwj_ _halant_ _zwj_ _ra_
S	= _symbol_ _nukta_?
MATRA_GROUP	= Z{0,3} _matra_ _nukta_? (_halant_ | FORCED_RAKAR)?
SYLLABLE_TAIL	= (Z? _syllablemodifier_ _syllablemodifier_? _zwnj_?)? _vedicsign_{0,3}
HALANT_GROUP	= Z? _halant_ (_zwj_ _nukta_?)?
FINAL_HALANT_GROUP	= HALANT_GROUP | (_halant_ _zwnj_)
MEDIAL_GROUP	= _consonantmedial_?
HALANT_OR_MATRA_GROUP	= FINAL_HALANT_GROUP | MATRA_GROUP*)
```

> Note: Practically speaking, shaping engines are highly unlikely to
> encounter more than 4 sequential `(MATRA_GROUP)`
> instances in any real-word syllables. Thus, implementations may
> choose to limit occurrences by limiting the above expressions to a
> finite length, such as `(MATRA_GROUP){0,4}` .


Using the above elements, the following regular expressions define the
possible syllable types:

A consonant-based syllable will match the expression:
```markdown
(_repha_|_consonantwithstacker_)? (CN HALANT_GROUP)* CN MEDIAL_GROUP HALANT_OR_MATRA_GROUP SYLLABLE_TAIL
```

> Note: Practically speaking, shaping engines are highly unlikely to
> encounter more than 4 sequential `(CN HALANT_GROUP)`
> instances in any real-word syllables. Thus, implementations may
> choose to limit occurrences by limiting the above expressions to a
> finite length, such as `(CN HALANT_GROUP){0,4}` .

A vowel-based syllable will match the expression:
```markdown
REPH? _vowel_ _nukta_? (_zwj_ | (HALANT_GROUP CN)* MEDIAL_GROUP HALANT_OR_MATRA_GROUP SYLLABLE_TAIL)
```

> Note: Practically speaking, shaping engines are highly unlikely to
> encounter more than 4 sequential `(HALANT_GROUP CN)`
> instances in any real-word syllables. Thus, implementations may
> choose to limit occurrences by limiting the above expressions to a
> finite length, such as `(HALANT_GROUP CN){0,4}` .

A standalone syllable will match the expression:
```markdown
((_repha_|_consonantwithstacker_)? _placeholder_ | REPH? _dottedcircle_) _nukta_? (HALANT_GROUP CN)* MEDIAL_GROUP HALANT_OR_MATRA_GROUP SYLLABLE_TAIL
```

> Note: Practically speaking, shaping engines are highly unlikely to
> encounter more than 4 sequential `(HALANT_GROUP CN)`
> instances in any real-word syllables. Thus, implementations may
> choose to limit occurrences by limiting the above expressions to a
> finite length, such as `(HALANT_GROUP CN){0,4}` .

> Note: Although they are labeled as "standalone syllables" here,
> many sequences that match the standalone regular expression above
> are instances where a document needs to display a matra, combining
> mark, or special form in isolation. Such sequences might not have
> any significance with regard to the definition of syllables used in
> the language or orthography of the text.

A symbol-based syllable will match the expression:
```markdown
S SYLLABLE_TAIL
```

A broken syllable will match the expression:
```markdown
REPH? _nukta_? (HALANT_GROUP CN)* MEDIAL_GROUP HALANT_OR_MATRA_GROUP SYLLABLE_TAIL
```

> Note: Practically speaking, shaping engines are highly unlikely to
> encounter more than 4 sequential `(HALANT_GROUP CN)`
> instances in any real-word syllables. Thus, implementations may
> choose to limit occurrences by limiting the above expressions to a
> finite length, such as `(HALANT_GROUP CN){0,4}` .


The primary problem involved in shaping broken syllables is the lack
of a syllable base (either a base consonant or an independent
vowel). Without a syllable base, the shaping engine cannot perform
<abbr title="Glyph Positioning table">GPOS</abbr> positioning and other contextual operations that are required
later in the shaping process.

To make up for this limitation, shaping engines should insert a
dotted-circle placeholder (`U+25CC`) character into the text stream
where the missing syllable base was expected to occur. This
placeholder allows the shaping process to proceed on a best-effort
basis at handling the broken-syllable sequence, but making guarantees
about the orthographic correctness or preferred appearance of the
final result is out of scope for this document.

Shaping engines can perform this dotted-circle insertion at any point
after the broken syllable has been recognized and before <abbr title="Glyph Substitution table">GSUB</abbr> features
are applied. However, the best results will likely be attained by
performing the insertion immediately, before proceeding to
stage 2. This will enable the maximum number of <abbr title="Glyph Substitution table">GSUB</abbr> and <abbr title="Glyph Positioning table">GPOS</abbr> features
in the active font to be correctly applied to the text run by ensuring
that all reordering, tagging, and sorting algorithms are executed as
usual.

> Note: In software stacks where other text-handling operations, such
> as Unicode normalization and localization, are performed before the
> text run is passed to the shaping engine, there is a potential for
> the dotted-circle insertion to cause unexpected effects.
>
> For example, if a `ccmp` or `locl` feature substitutes the default
> dotted-circle placeholder glyph with a variant glyph of a different
> size or weight for the (`U+25CC`) codepoint, then any shaping engine
> which relies on another software component to handle that
> functionality must take additional care to ensure consistency.


The expressions above use state-machine syntax from the Ragel
state-machine compiler. The operators represent:

```markdown
a* = zero or more copies of a
b+ = one or more copies of b
c? = optional instance of c
d{n} = exactly n copies of d
d{,n} = zero to n copies of d
d{n,} = n or more copies of d
d{n,m} = n to m copies of d
!e = not e
^f = character-level not f
g.h = concatenation of g and h
i|j = i or j
( ) = grouping of expression elements
```


After the syllables have been identified, each of the subsequent 
shaping stages occurs on a per-syllable basis.

### Stage 2: Initial reordering ###

The initial reordering stage is used to relocate glyphs from the
phonetic order in which they occur in a run of text to the
orthographic order in which they are presented visually.

> Note: Primarily, this means moving dependent-vowel (matra) glyphs, 
> <samp>"Ra,Halant,ZWJ"</samp> glyph sequences, and other consonants that take special
> treatment in some circumstances. 
>
> These reordering moves are mandatory. The final-reordering stage
> may make additional moves, depending on the text and on the features
> implemented in the active font.

The syllable should be processed by tagging each glyph with its
intended position based on its ordering category. After all glyphs
have been tagged, the entire syllable should be sorted in stable order,
so that glyphs of the same ordering category remain in the same
relative position with respect to each other.

The final sort order of the ordering categories should be:


	POS_RA_TO_BECOME_REPH
	POS_PREBASE_MATRA
	POS_PREBASE_CONSONANT

	POS_SYLLABLE_BASE
	POS_AFTER_MAIN

	POS_ABOVEBASE_CONSONANT

	POS_BEFORE_SUBJOINED
	POS_BELOWBASE_CONSONANT
	POS_AFTER_SUBJOINED

	POS_BEFORE_POST
	POS_POSTBASE_CONSONANT
	POS_AFTER_POST

	POS_FINAL_CONSONANT
	POS_SMVD


This sort order enumerates all of the possible final positions to
which a codepoint might be reordered, across all of the Indic
scripts. It includes some ordering categories not utilized in
Telugu. 

The basic positions (left to right) are <samp>"Reph"</samp>
(`POS_RA_TO_BECOME_REPH`), dependent vowels (matras) and consonants
positioned before the base consonant or syllable base
(`POS_PREBASE_MATRA` and `POS_PREBASE_CONSONANT`), the base consonant
or syllable base (`POS_SYLLABLE_BASE`), above-base consonants
(`POS_ABOVEBASE_CONSONANT`), below-base consonants
(`POS_BELOWBASE_CONSONANT`), consonants positioned after the base
consonant or syllable base (`POS_POSTBASE_CONSONANT`), syllable-final
consonants (`POS_FINAL_CONSONANT`), and syllable-modifying or Vedic
signs (`POS_SMVD`).

In addition, several secondary positions are defined to handle various
reordering rules that deal with relative, rather than absolute,
positioning. `POS_AFTER_MAIN` means that a character must be
positioned immediately after the syllable base. `POS_BEFORE_SUBJOINED`
and `POS_AFTER_SUBJOINED` mean that a character must be positioned
before or after any below-base consonants, respectively. Similarly,
`POS_BEFORE_POST` and `POS_AFTER_POST` mean that a character must be
positioned before or after any post-base consonants, respectively. 

For shaping-engine implementers, the names used for the ordering
categories matter only in that they are unambiguous. 

For a definition of the "base" consonant, refer to stage 2, step 1, which
follows.

#### Stage 2, step 1: Base consonant ####

The first step is to determine the base consonant of the syllable, if
there is one, and tag it as `POS_SYLLABLE_BASE`.

In a syllable that begins with an independent vowel, the independent
vowel will always serve as the syllable base, and it should be tagged
as `POS_SYLLABLE_BASE`. The shaping engine can then proceed to step 2.

In a standalone sequence or other syllable that begins with a placeholder
or dotted circle, the placeholder or dotted circle will always serve
as the syllable base, and it should be tagged as
`POS_SYLLABLE_BASE`. The shaping engine can then proceed to step 2.

In a syllable that begins with a consonant, the shaping engine must
determine the base consonant by a script-specific algorithm.

> Note: Shaping engines may choose to treat independent-vowel bases 
> like base consonants for the sake of simplicity or code
> reuse.
>
> However, implementations that take this approach should note
> that removing the distinction between base consonants and
> independent-vowel bases entirely may have unintended
> consequences. Making guarantees about the correctness of the results
> or about language-specific tests is out of scope for this document.

The base consonant is defined as the consonant in a consonant-based
syllable that carries the syllable's vowel sound. That vowel sound
will either be provided by the script's inherent vowel (in which case
it is not written with a separate character) or the sound will be designated
by the addition of a dependent-vowel (matra) sign.


<!--- > Because vowel-based syllables will not include consonants and
> because independent vowels do not take on special forms or require
> reordering, many of the steps that follow will involve no
> work for a vowel-based syllable. However, vowel-based syllables must
> still be sorted and their marks handled correctly, and <abbr title="Glyph Substitution table">GSUB</abbr> and <abbr title="Glyph Positioning table">GPOS</abbr>
> lookups must be applied. These steps of the shaping process follow
> the same rules that are employed for consonant-based syllables.
--->

While performing the base-consonant search, shaping engines may
also encounter special-form consonants, including below-base
consonants and post-base consonants. Each of these special-form
consonants must also be tagged (`POS_BELOWBASE_CONSONANT`,
`POS_POSTBASE_CONSONANT`, respectively). 

Any pre-base-reordering consonant (such as a pre-base-reordering <samp>"Ra"</samp>)
encountered during the base-consonant search must be tagged
`POS_POSTBASE_CONSONANT`. 
 
> Note: Shaping engines may choose any method to identify consonants that
> have below-base, post-base, or pre-base-reordering forms while
> executing the above algorithm. For example, one implementation may
> choose to maintain a static table of special-form consonants to
> compare against the text run. Another implementation might examine
> the active font to see if it includes a `blwf`, `pstf`, or `pref`
> lookup in the <abbr title="Glyph Substitution table">GSUB</abbr> table that affects the consonants encountered in
> the syllable.
>
> However, checking for <abbr title="Glyph Substitution table">GSUB</abbr> support ensures that the expected
> behavior is implemented in the active font, and is therefore the
> most reliable approach.


The algorithm for determining the base consonant is

  - If the syllable starts with <samp>"Ra,Halant,ZWJ"</samp>, exclude the starting
    <samp>"Ra"</samp> from the list of consonants to be considered. 
  - Starting from the end of the syllable, move backwards until a consonant is found.
      * If the consonant is the first consonant, stop.
      * If the consonant is preceded by the sequence <samp>"Halant,ZWJ"</samp>, stop.
      * If the consonant has a below-base form, tag it as
        `POS_BELOWBASE_CONSONANT`, then move to the previous consonant. 
      * If the consonant has a post-base form, tag it as
        `POS_POSTBASE_CONSONANT`, then move to the previous consonant. 
      * If the consonant is a pre-base-reordering <samp>"Ra"</samp>, tag it as
        `POS_POSTBASE_CONSONANT`, then move to the previous consonant. 
      * If none of the above conditions is true, stop.
  - The consonant stopped at will be the base consonant.

Telugu includes a pre-base-reordering <samp>"Ra"</samp>.  A <samp>"Halant,Ra"</samp> sequence
after the base consonant or syllable base will be reordered to a pre-base position
during the final-reordering stage.

> Note: It is important to note that all consonants in Telugu have a
> post-base form, therefore the backwards-search step will
> automatically move past them until it reaches either a <samp>"Ra,Halant"</samp>
> sequence or the first consonant. However, this condition is not the
> same as the shaping characteristic `BASE_POS_FIRST`, which does not
> use the above search algorithm at all.

> Note: Because Telugu employs the `BLWF_MODE_POST_ONLY` shaping
> characteristic, consonants with below-base special forms will occur
> only after the base consonant or syllable base. 
> 
> During the base-consonant search, therefore, all of these below-base
> form sequences will be encountered and tagged correctly as
> <samp>"Halant,_consonant_"</samp> patterns. Stage 2, step 5 below exists to ensure that
> the <samp>"_consonant_,Halant"</samp> pattern preceding the base consonant or syllable base
> for below-base forms in other Indic scripts will also be tagged correctly.


#### Stage 2, step 2: Matra decomposition ####

Second, any multi-part dependent vowels (matras) must be decomposed
into their independent components. Telugu has one
multi-part dependent vowel, "Ai" (`U+0C48`). It has a canonical
decomposition, so this step is unambiguous.

> "Ai" (`U+0C48`) decomposes to "`U+0C46`,`U+0C56`"

Because this decomposition is a character-level operation, the shaping
engine may choose to perform it earlier, such as during an initial
Unicode-normalization stage. However, all such decompositions must be
completed before the shaping engine begins step three, below.

:::{figure-md}
![Two-part matra decomposition](/images/telugu/telugu-matra-decompose.svg "Two-part matra decomposition"){.shaping-demo .inline-svg .greyscale-svg #telugu-matra-decompose}

Two-part matra decomposition
:::

```{svg-color-toggle-button} telugu-matra-decompose
```


#### Stage 2, step 3: Tag matras ####

Third, all dependent-vowel (matra) signs, including those that
resulted from the preceding decomposition step, must be tagged to be
moved to the correct position in the syllable.

Left-side matras should be tagged with `POS_PREBASE_MATRA`.

Above-base matras should be tagged with `POS_BEFORE_SUBJOINED`.

Right-side matras should be tagged according to two rules.

  - Matras <samp>"Sign U"</samp> (`U+0C41`) and <samp>"Sign Uu"</samp> (`U+0C42`) should be
       tagged with `POS_BEFORE_SUBJOINED`.

  - Matras <samp>"Sign Vocalic R"</samp> (`U+0C43`) and <samp>"Sign Vocalic Rr"</samp>
       (`U+0C44`) should be tagged with `POS_AFTER_SUBJOINED`.

Below-base matras should be tagged with `POS_BEFORE_SUBJOINED`.

For simplicity, shaping engines may choose to tag single-part matras
in an earlier text-processing step, using the information in the
_Mark-placement subclass_ column of the character tables. It is
critical at this step, however, that all decomposed matras are also
correctly tagged before proceeding to the next step.

#### Stage 2, step 4: Adjacent marks ####

Fourth, any subsequences of marks that include a <samp>"Nukta"</samp> and a
<samp>"Halant"</samp> or Vedic sign must be reordered so that the <samp>"Nukta"</samp> appears
first.

This means that the subsequence <samp>"Halant,Nukta"</samp> is reordered to
<samp>"Nukta,Halant"</samp> and that the subsequence <samp>"_Vedic_sign_,Nukta"</samp> is
reordered to <samp>"Nukta,_Vedic_sign"</samp>.

For subsequences of affected marks that are longer than two, the
reordering operation must be repeated until the <samp>"Nukta"</samp> is the first
character in the subsequence. No other marks in the subsequence
should be reordered.

This order is canonical in Unicode and is required so that
<samp>"_consonant_,Nukta"</samp> substitution rules from <abbr title="Glyph Substitution table">GSUB</abbr> will be correctly
matched later in the shaping process.

> Note: Prior to Unicode version 14, the Telugu block did not include
> a "Nukta" mark. However, there are reports of users using the
> "Nukta" from other Indic blocks, so shaping engines may encounter a
> "Nukta" from other scripts in text runs, and should handle the
> situation gracefully.

#### Stage 2, step 5: Pre-base consonants ####

Fifth, consonants that occur before the syllable base must be tagged
with `POS_PREBASE_CONSONANT`. Excluding initial <samp>"Ra,Halant,ZWJ"</samp> sequences
that will become <samp>"Reph"</samp>s: 

  - If the consonant has a below-base form, tag it as
          `POS_BELOWBASE_CONSONANT`. 
  - Otherwise, tag it as `POS_PREBASE_CONSONANT`.
  
> Note: Shaping engines may choose any method to identify consonants that
> have below-base, post-base, or pre-base-reordering forms while
> executing the above algorithm. For example, one implementation may
> choose to maintain a static table of special-form consonants to
> compare against the text run. Another implementation might examine
> the active font to see if it includes a `blwf`, `pstf`, or `pref`
> lookup in the <abbr title="Glyph Substitution table">GSUB</abbr> table that affects the consonants encountered in
> the syllable.
>
> However, checking for <abbr title="Glyph Substitution table">GSUB</abbr> support ensures that the expected
> behavior is implemented in the active font, and is therefore the
> most reliable approach.

Telugu does not use any pre-base consonants; this step is listed here
because it is part of the general processing scheme for shaping Indic scripts.

> Note: Because Telugu employs the `BLWF_MODE_POST_ONLY` shaping
> characteristic, consonants with below-base special forms will occur
> only after the base consonant or syllable base. 
> 
> During the base-consonant search in stage 2, step 1, therefore, all of these below-base
> form sequences will be encountered and tagged correctly as
> <samp>"Halant,_consonant_"</samp> patterns. The tagging is this step ensures that
> the <samp>"_consonant_,Halant"</samp> pattern preceding the base consonant or syllable base
> for below-base forms in other Indic scripts will also be tagged correctly.

#### Stage 2, step 6: Reph ####

Sixth, initial <samp>"Ra,Halant,ZWJ"</samp> sequences that will become <samp>"Reph"</samp>s must be tagged with
`POS_RA_TO_BECOME_REPH`.

> Note: an initial <samp>"Ra,Halant,ZWJ"</samp> sequence will always become a <samp>"Reph"</samp>.

#### Stage 2, step 7: Final consonants ####

Seventh, all final consonants must be tagged. Consonants that occur
after the syllable base _and_ after a dependent vowel (matra) sign
must be tagged with  `POS_FINAL_CONSONANT`.

> Note: Final consonants occur only in Sinhala and should not be
> expected in `<tel2>` text runs. This step is included here to
> maintain compatibility across Indic scripts.


#### Stage 2, step 8: Mark tagging ####

Eighth, all marks must be tagged. 

> Note: In this step, joiner and non-joiner characters must also be
> tagged according to the same rules given for marks, even though
> these characters are not categorized as marks in Unicode.

Marks in the `BINDU`, `VISARGA`, `AVAGRAHA`, `CANTILLATION`,
`SYLLABLE_MODIFIER`, `GEMINATION_MARK`, and `SYMBOL` categories should
be tagged with `POS_SMVD`. 

All <samp>"Nukta"</samp>s must be tagged with the same positioning tag as the
preceding consonant, independent vowel, placeholder, or dotted circle.

All remaining marks (not in the `POS_SMVD` category and not <samp>"Nukta"</samp>s)
must be tagged with the same positioning tag as the closest non-mark
character the mark has affinity with, so that they move together 
during the sorting step.

There are two possible cases: those marks before the syllable base
and those marks after the syllable base. In addition, an exception is
made for <samp>"Halant"</samp> marks that follow a left-side (pre-base) matra.

  1. Initially, all remaining marks should be tagged with the same
	 positioning tag as the closest preceding consonant.

  2. For each consonant after the syllable base (such as post-base
	 consonants, below-base consonants, or final consonants), all
	 remaining marks located between that current consonant and any
	 previous consonant should be tagged with the same positioning tag as
	 the current (later) consonant.
  
     In other words, all consonants preceding the syllable base "own" the
	 marks that follow them, while all consonants after the syllable base
	 "own" the marks that come before them. When a syllable does not have
	 any consonants after the syllable base, the syllable base should
	 "own" all the marks that follow it.
  
  3. Finally, <samp>"Halant"</samp> marks that follow a left-side dependent vowel
     (matra) should _not_ be tagged with the left-side matra's
     positioning tag. Instead, the <samp>"Halant"</samp> should be tagged with the
     positioning tag of the non-mark character preceding the left-side
     matra. This prevents the <samp>"Halant"</samp> mark from being moved with the
     left-side matra when the syllable is sorted.


<!--- HarfBuzz also tags everything between a post-base consonant or -->
<!--matra and another post-base consonant as belonging to the latter -->
<!--post-base consonant. --->


#### Stage 2, step 9: Sort syllable ####

With these steps completed, the syllable can be sorted into the final
sort order as listed at the beginning of stage 2.

The glyphs in the syllable should be sorted in stable order,
so that glyphs of the same ordering category remain in the same
relative position with respect to each other.


#### Stage 2, step 10: Flag sequences for possible feature applications ####

With the initial reordering complete, those glyphs in the syllable that
may have <abbr title="Glyph Substitution table">GSUB</abbr> or <abbr title="Glyph Positioning table">GPOS</abbr> features applied in stages 3, 5, and 6 should be
flagged for each potential feature. 

This flagging is preliminary; the set of potential features varies
between different scripts and which features are supported varies
between fonts. It is also possible that the application of
one feature on a glyph sequence will perform a substitution that makes
a later feature no longer applicable to the updated sequence.

Consequently, the flagging must be completed before shaping proceeds
to the stages during which features are applied.

Some shaping features, such as `locl`, can potentially apply to any
glyphs. Therefore it is not necessary to maintain a separate flag for
these features in the bitmask (or other data structure) used to track
the flags -- although shaping engines may do so if desired.

The sequences to flag are summarized in the list below; a full
description of each feature's function and interpretation is provided
in <abbr title="Glyph Substitution table">GSUB</abbr> and <abbr title="Glyph Positioning table">GPOS</abbr> application stages that follow.

  - `nukt` should match <samp>"_Consonant_,Nukta"</samp> sequences
  - `akhn` should match <samp>"Ka,Halant,Ssa"</samp>
  - `rphf` should match initial <samp>"Ra,Halant,ZWJ"</samp> sequences
  - `pref` should match  <samp>"_Consonant_,Ra"</samp> sequences in
            post-base position
  - `blwf` should match <samp>"Halant,_Consonant_"</samp> in post-base positions
  - `half` should match <samp>"_Consonant_,Halant"</samp> in pre-base position but
           _not_ match <samp>"_Consonant_,Halant,ZWNJ,_Consonant_"</samp> sequences
  - `pstf` should match <samp>"Halant,_Consonant_"</samp> in post-base position
  - `cjct` should match <samp>"_Consonant_,Halant,_Consonant_"</samp> but _not_
            match <samp>"_Consonant_,Halant,ZWJ,_Consonant_"</samp> or
            <samp>"_Consonant_,Halant,ZWNJ,_Consonant_"</samp>


### Stage 3: Applying the basic substitution features from <abbr>GSUB</abbr> ###

The basic-substitution stage applies mandatory substitution features
using the rules in the font's <abbr title="Glyph Substitution table">GSUB</abbr> table. In preparation for this
stage, glyph sequences should be flagged for possible application 
of <abbr title="Glyph Substitution table">GSUB</abbr> features in stage 2, step 10.

The order in which these substitutions must be performed is fixed for
all Indic scripts:

	locl
	nukt
	akhn
	rphf 
	rkrf (not used in Telugu)
	pref
	blwf 
	abvf (not used in Telugu)
	half
	pstf
	vatu (not used in Telugu)
	cjct
	cfar (not used in Telugu)

#### Stage 3, step 1: locl ####

The `locl` feature replaces default glyphs with any language-specific
variants, based on examining the language setting of the text run.

> Note: Strictly speaking, the use of localized-form substitutions is
> not part of the shaping process, but of the localization process,
> and could take place at an earlier point while handling the text
> run. However, shaping engines are expected to complete the
> application of the `locl` feature before applying the subsequent
> <abbr title="Glyph Substitution table">GSUB</abbr> substitutions in the following steps.

#### Stage 3, step 2: nukt ####

The `nukt` feature replaces <samp>"_Consonant_,Nukta"</samp> sequences with a
precomposed nukta-variant of the consonant glyph. 

  - The context defined for a `nukt` feature is:

:::{table} `nukt` feature context
    
| Backtrack     | Matching sequence             | Lookahead     |
|:--------------|:------------------------------|:--------------|
| _none_        | `_consonant_`(full),`_nukta_` | _none_        |
:::


:::{figure-md}
![Nukta form ligation](/images/telugu/telugu-nukt.svg "Nukta form ligation"){.shaping-demo .inline-svg .greyscale-svg #telugu-nukt}

Nukta form ligation
:::

```{svg-color-toggle-button} telugu-nukt
```


#### Stage 3, step 3: akhn ####

The `akhn` feature replaces specific sequences with required ligatures. 

  - <samp>"Ka,Halant,Ssa"</samp> is substituted with the <samp>"KSsa"</samp> ligature. 
  
These sequences can occur anywhere in a syllable. The characters have
orthographic status equivalent to full consonants in some languages,
and fonts may have `cjct` substitution rules designed to match them in
subsequences. Therefore, this feature must be applied before all other
many-to-one substitutions. 

  - The context defined for an `akhn` feature is:

:::{table} `akhn` feature context
    
| Backtrack     | Matching sequence           | Lookahead     |
|:--------------|:----------------------------|:--------------|
| _none_        | `AKHAND_CONSONANT_SEQUENCE` | _none_        |
:::


:::{figure-md}
![KSsa ligation](/images/telugu/telugu-akhn-kssa.svg "KSsa ligation"){.shaping-demo .inline-svg .greyscale-svg #telugu-akhn-kssa}

KSsa ligation
:::

```{svg-color-toggle-button} telugu-akhn-kssa
```


#### Stage 3, step 4: rphf ####

The `rphf` feature replaces initial <samp>"Ra,Halant,ZWJ"</samp> sequences with the
<samp>"Reph"</samp> glyph.
	

  - The context defined for a `rphf` feature is:

:::{table} `rphf` feature context
    
| Backtrack        | Matching sequence           | Lookahead     |
|:-----------------|:----------------------------|:--------------|
| `SYLLABLE_START` | "Ra"(full),`_halant_`,"ZWJ" | _none_        |
:::


:::{figure-md}
![Reph formation](/images/telugu/telugu-rphf.svg "Reph formation"){.shaping-demo .inline-svg .greyscale-svg #telugu-rphf}

Reph formation
:::

```{svg-color-toggle-button} telugu-rphf
```


#### Stage 3, step 5: rkrf ####

> This feature is not used in Telugu.

#### Stage 3, step 6: pref ####

The `pref` feature replaces pre-base-reordering consonant glyphs with
any special forms. Telugu includes one such reordering consonant,
<samp>"Ra"</samp> when it occurs in post-base position.

The substitution of the nominal glyph for its special form takes place
at this stage. However, the actual reordering move is performed later,
in stage 4, step 4.

#### Stage 3, step 7: blwf ####

The `blwf` feature replaces below-base-consonant glyphs with any
special forms. All consonants in Telugu can take on a below-base consonant
form.

:::{figure-md}
![Below-base form composition](/images/telugu/telugu-blwf.svg "Below-base form composition"){.shaping-demo .inline-svg .greyscale-svg #telugu-blwf}

Below-base form composition
:::

```{svg-color-toggle-button} telugu-blwf
```


#### Stage 3, step 8: abvf ####

> This feature is not used in Telugu.

#### Stage 3, step 9: half ####

The `half` feature replaces <samp>"_Consonant_,Halant"</samp> sequences before the
base consonant or syllable base with "half forms" of the consonant
glyphs.

In the most common case, this substitution applies to
<samp>"_Consonant_,Halant"</samp> sequences that are followed by another
_Consonant_.

In addition, a sequence matching <samp>"_Consonant_,Halant,ZWJ"</samp> must also be
flagged for potential `half` substitutions.

> Note: The presence of the <samp>"ZWJ"</samp> at the end of the sequence means
> that the sequence may match the regular-expression test in stage 1
> as the end of a syllable, even without being followed by a base
> consonant or syllable base.
>
> The fact that the regular-expression tests identify a syllable break
> after the <samp>"_Consonant_,Halant,ZWJ"</samp> is a byproduct of OpenType
> shaping and Unicode encoding, however, and might not have any
> significance with regard to the definition of syllables used in the
> language or orthography of the text.

There are two exceptions to the default behavior, for which the
shaping engine must test:

  - Initial <samp>"Ra,Halant"</samp> sequences, which should have been flagged for
    the `rphf` feature earlier, must not be flagged for potential
    `half` substitutions.

  - A sequence matching <samp>"_Consonant_,Halant,ZWNJ,_Consonant_"</samp> must not be
    flagged for potential `half` substitutions.

> Note: Telugu does not usually incorporate half forms, but it is
> possible for a font to implement them in order to provide for
> desired typographic variation.

:::{figure-md}
![Half form composition](/images/telugu/telugu-half.svg "Half form composition"){.shaping-demo .inline-svg .greyscale-svg #telugu-half}

Half form composition
:::

```{svg-color-toggle-button} telugu-half
```


#### Stage 3, step 10: pstf ####

The `pstf` feature replaces post-base-consonant glyphs with any special forms.


#### Stage 3, step 11: vatu ####

> This feature is not used in Telugu.

#### Stage 3, step 12: cjct ####

The `cjct` feature replaces sequences of adjacent consonants with
conjunct ligatures. These sequences must match <samp>"_Consonant_,Halant,_Consonant_"</samp>.

A sequence matching <samp>"_Consonant_,Halant,ZWJ,_Consonant_"</samp> or
<samp>"_Consonant_,Halant,ZWNJ,_Consonant_"</samp> must not be flagged to form a conjunct.

> Note: The presence of the <samp>"ZWJ"</samp> in a
> <samp>"_Consonant_,Halant,ZWJ,_Consonant_"</samp> sequence should automatically
> inhibit any `cjct` feature rules from matching the sequence as valid
> input, and thus prevent the `cjct` substitution from being applied.

> Note: The presence of the <samp>"ZWNJ"</samp> in a
> <samp>"_Consonant_,Halant,ZWNJ,_Consonant_"</samp> sequence means that the
> <samp>"_Consonant_,Halant,ZWNJ"</samp> subsequence will match the
> regular-expression test in stage 1 as the end of a syllable.
> 
> Because OpenType shaping features in `<tel2>` are defined as
> applying only within an individual syllable, this means that the
> presence of the <samp>"ZWNJ"</samp> will automatically prevent the application of
> a `cjct` feature by triggering the identification of a syllable
> break between the two consonants.
>
> The fact that the regular-expression tests identify a syllable break
> after the <samp>"_Consonant_,Halant,ZWNJ"</samp> is a byproduct of OpenType
> shaping and Unicode encoding, however, and might not have any
> significance with regard to the definition of syllables used in the
> language or orthography of the text.
>
> Note, also: The presence of the <samp>"ZWJ"</samp> means that a
> <samp>"_Consonant_,Halant,ZWJ"</samp> sequence may match the regular-expression
> test in stage 1 as the end of a syllable, even without being
> followed by a base consonant or syllable base. By definition,
> however, a <samp>"_Consonant_,Halant,ZWJ"</samp> syllable identified in stage 1
> cannot also include a <samp>"_Consonant_"</samp> after the <abbr title="Zero-Width Joiner">ZWJ</abbr>.

The font's <abbr title="Glyph Substitution table">GSUB</abbr> rules might be implemented so that `cjct`
substitutions apply to half-form consonants; therefore, this feature
must be applied after the `half` feature. 

> Note: Telugu does not usually incorporate conjuncts, but it is
> possible for a font to implement the `cjct` feature in order to
> provide for desired typographic variation.


#### Stage 3, step 13: cfar ####

> This feature is not used in Telugu.


### Stage 4: Final reordering ###

The final reordering stage repositions marks, dependent-vowel (matra)
signs, and <samp>"Reph"</samp> glyphs to the appropriate location with respect to
the base consonant or syllable base. Because multiple substitutions
may have occurred during the application of the basic-shaping features
in the preceding stage, these repositioning moves could not be
performed during the initial reordering stage.

Like the initial reordering stage, the steps involved in this stage
occur on a per-syllable basis.

<!--- Check that classifications have not been mangled. If the -->
<!--character is a Halant AND a ligature was formed AND a multiple
substitution was performed, restore the classification to VIRAMA
because it was almost certainly lost in the preceding <abbr title="Glyph Substitution table">GSUB</abbr> stage.
--->

#### Stage 4, step 1: Base consonant ####

The final reordering stage, like the initial reordering stage, begins
with determining the syllable base of each syllable, following the
same algorithm used in stage 2, step 1.

In a syllable that begins with an independent vowel, the independent
vowel will always serve as the syllable base. In a standalone sequence or
other syllable that begins with a placeholder or a dotted circle, the
placeholder or dotted circle will always serve as the syllable base.

In a syllable that begins with a consonant, the shaping engine must
repeat the base-consonant search algorithm used in stage 2, step 1.

The codepoint of the underlying base consonant or syllable base will
not change between the search performed in stage 2, step 1, and the
search repeated here. However, the application of <abbr title="Glyph Substitution table">GSUB</abbr> shaping
features in stage 3 means that several ligation and many-to-one
substitutions may have taken place. The final glyph produced by that
process may, therefore, be a conjunct or ligature form — in most
cases, such a glyph will not have an assigned Unicode codepoint.
   
#### Stage 4, step 2: Pre-base matras ####

Pre-base dependent vowels (matras) that were reordered during the
initial reordering stage must be moved to their final position. This
position is defined as:
   
   - after the last standalone <samp>"Halant"</samp> glyph that comes after the
     matra's starting position and also comes before the main
     consonant.
   - If a zero-width joiner follows this last standalone <samp>"Halant"</samp>, the
     final matra position is moved to after the joiner.

This means that the matra will move to the right of all explicit
<samp>"consonant,Halant"</samp> subsequences, but will stop to the left of the base
consonant or syllable base, all conjuncts or ligatures that contain
the base consonant or syllable base, and all half forms.

> Note: OpenType and Unicode both state that if the syllable includes
> a <abbr title="Zero-Width Joiner">ZWJ</abbr> immediately after the last <samp>"Halant"</samp>, then the final matra
> position should be after the <abbr title="Zero-Width Joiner">ZWJ</abbr>.
>
> However, there are several test sequences indicating that
> Microsoft's Uniscribe shaping engine did not follow this rule (in,
> at least, Devanagari and Bengali text), and in these circumstances
> Uniscribe instead makes the final matra position before the final
> <samp>"Consonant,Halant,ZWJ"</samp>.
>
> Subsequently, the HarfBuzz shaping engine has also followed the same
> pattern. If other shaping engine implementations prefer to maintain
> maximum compatibility with Uniscribe and HarfBuzz, then they should
> also follow suit.

> Note: The Microsoft script-development specifications for OpenType
> shaping also state that if a zero-width non-joiner follows the last
> standalone <samp>"Halant"</samp>, the final matra position is moved to after the
> non-joiner. However, it is unnecessary to test for this condition,
> because a <samp>"Halant,ZWNJ"</samp> subsequence is, by definition, the end of a
> syllable. Consequently, a <samp>"Halant,ZWNJ"</samp> cannot be followed by a
> pre-base dependent vowel.


#### Stage 4, step 3: Reph ####

<samp>"Reph"</samp> must be moved from the beginning of the syllable to its final
position. Because Telugu incorporates the `REPH_POS_AFTER_POST`
shaping characteristic, this final position is immediately after 
any post-base consonant forms.


The algorithm for finding the final <samp>"Reph"</samp> position is

  - Move the <samp>"Reph"</samp> to the position immediately before
    the first post-base matra, syllable modifier, or Vedic sign that
    has a positioning tag after the script's <samp>"Reph"</samp> position in the
    syllable sort order (as listed in [stage
    2](#stage-2-initial-reordering)). This will be the final <samp>"Reph"</samp>
    position. 
	> Note: Because Telugu incorporates the
    > `REPH_POS_AFTER_POST` shaping characteristic, this means
    > any positioning tag of `POS_FINAL_CONSONANT` or later,
    > although a post-base matra, syllable modifier, or Vedic sign
    > would not typically be tagged with `POS_FINAL_CONSONANT`.
  - If no other location has been located in the previous step, move
    the <samp>"Reph"</samp> to the end of the syllable.


Finally, if the final position of <samp>"Reph"</samp> occurs after a
<samp>"_matra_,Halant"</samp> subsequence, then <samp>"Reph"</samp> must be repositioned to the
left of <samp>"Halant"</samp>, to allow for potential matching with `abvs` or
`psts` substitutions from <abbr title="Glyph Substitution table">GSUB</abbr>.

#### Stage 4, step 4: Pre-base-reordering consonants ####

Any pre-base-reordering consonants must be moved to before
the base consonant or syllable base.
  
Telugu includes one such reordering consonant. <samp>"Ra"</samp> occurring in the
post-base position is reordered to a pre-base position at this step.

The algorithm for reordering <samp>"Ra"</samp> in this circumstance is:

  - Only reorder the <samp>"Ra"</samp> if the current glyph was substituted using
    the `pref` feature in stage 3, step 6.
  - Select the final position using [the same method](#stage-4-step-2-pre-base-matras) as used for
    reordering a pre-base matra.
  - If the pre-base matra positioning algorithm cannot determine the final
    position, place the <samp>"Ra"</samp> immediately before the base consonant or syllable base.


#### Stage 4, step 5: Initial matras ####

Any left-side dependent vowels (matras) that are at the start of a
word must be flagged for potential substitution by the `init` feature
of <abbr title="Glyph Substitution table">GSUB</abbr>.

Telugu does not use the `init` feature, so this step will
involve no work when processing `<tel2>` text. It is included here in
order to maintain compatibility with the other Indic scripts.


### Stage 5: Applying all remaining substitution features from <abbr>GSUB</abbr> ###

In this stage, the remaining substitution features from the <abbr title="Glyph Substitution table">GSUB</abbr> table
are applied. In preparation for this stage, glyph sequences should be
flagged for possible application of <abbr title="Glyph Substitution table">GSUB</abbr> features in stage 2,
step 10.

The order in which these features are applied is not canonical; they
should be applied in the order in which they appear in the <abbr title="Glyph Substitution table">GSUB</abbr> table
in the font.

	init (not used in Telugu)
	pres
	abvs
	blws
	psts
	haln

The `init` feature is not used in Telugu.

The `pres` feature replaces pre-base-consonant glyphs with special
presentations forms. This can include consonant conjuncts, half-form
consonants, and stylistic variants of left-side dependent vowels
(matras). 

The `abvs` feature replaces above-base-consonant glyphs with special
presentation forms. This usually includes contextual variants of
above-base marks or contextually appropriate mark-and-base ligatures.

:::{figure-md}
![Above-base form ligation](/images/telugu/telugu-abvs.svg "Above-base form ligation"){.shaping-demo .inline-svg .greyscale-svg #telugu-abvs}

Above-base form ligation
:::

```{svg-color-toggle-button} telugu-abvs
```

The `blws` feature replaces below-base-consonant glyphs with special
presentation forms. This usually involves replacing multiple
below-base glyphs (substituted earlier with the `blwf`) feature with
ligatures or conjunct forms.

:::{figure-md}
![Below-base form ligation](/images/telugu/telugu-blws.svg "Below-base form ligation"){.shaping-demo .inline-svg .greyscale-svg #telugu-blws}

Below-base form ligation
:::

```{svg-color-toggle-button} telugu-blws
```

The `psts` feature replaces post-base-consonant glyphs with special
presentation forms. This usually includes replacing right-side
dependent vowels (matras) with stylistic variants or replacing
post-base-consonant/matra pairs with contextual ligatures. 

:::{figure-md}
![Post-base form ligation](/images/telugu/telugu-psts.svg "Post-base form ligation"){.shaping-demo .inline-svg .greyscale-svg #telugu-psts}

Post-base form ligation
:::

```{svg-color-toggle-button} telugu-psts
```

The `haln` feature replaces syllable-final <samp>"_Consonant_,Halant"</samp> pairs with
special presentation forms. This can include stylistic variants of the
consonant where placing the <samp>"Halant"</samp> mark on its own is
typographically problematic. 

:::{figure-md}
![Halant form ligation](/images/telugu/telugu-haln.svg "Halant form ligation"){.shaping-demo .inline-svg .greyscale-svg #telugu-haln}

Halant form ligation
:::

```{svg-color-toggle-button} telugu-haln
```

> Note: The `calt` feature, which allows for generalized application
> of contextual alternate substitutions, is usually applied at this
> point. However, `calt` is not mandatory for correct Telugu shaping
> and may be disabled in the application by user preference.


### Stage 6: Applying remaining positioning features from <abbr>GPOS</abbr> ###

In this stage, mark positioning, kerning, and other <abbr title="Glyph Positioning table">GPOS</abbr> features are
applied.

As with the preceding stage, the order in which these features are
applied is not canonical; they should be applied in the order in which
they appear in the <abbr title="Glyph Positioning table">GPOS</abbr> table in the font.

        dist
        abvm
        blwm

> Note: The `kern` feature is usually applied at this stage, if it is
> present in the font. However, `kern` (like `calt`, above) is not
> mandatory for shaping Telugu text and may be disabled by user preference.

The `dist` feature adjusts the horizontal positioning of
glyphs. Unlike `kern`, adjustments made with `dist` do not require the
application or the user to enable any software _kerning_ features, if
such features are optional. 

:::{figure-md}
![Distance positioning](/images/telugu/telugu-dist.svg "Distance positioning"){.shaping-demo .inline-svg .greyscale-svg #telugu-dist}

Distance positioning
:::

```{svg-color-toggle-button} telugu-dist
```

The `abvm` feature positions above-base marks for attachment to base
characters. In Telugu, this includes above-base dependent vowels (matras),
diacritical marks, and Vedic signs. 

:::{figure-md}
![Above-base mark positioning](/images/telugu/telugu-abvm.svg "Above-base mark positioning"){.shaping-demo .inline-svg .greyscale-svg #telugu-abvm}

Above-base mark positioning
:::

```{svg-color-toggle-button} telugu-abvm
```

The `blwm` feature positions below-base marks for attachment to base
characters. In Telugu, this includes below-base dependent vowels
(matras) as well as below-base diacritical marks.

:::{figure-md}
![Below-base mark positioning](/images/telugu/telugu-blwm.svg "Below-base mark positioning"){.shaping-demo .inline-svg .greyscale-svg #telugu-blwm}

Below-base mark positioning
:::

```{svg-color-toggle-button} telugu-blwm
```


## The `<telu>` shaping model ##

The older Telugu script tag, `<telu>`, has been deprecated. However,
shaping engines may still encounter fonts that were built to work with
`<telu>` and some users may still have documents that were written to
take advantage of `<telu>` shaping.

### Distinctions from `<tel2>` ###

The most significant distinction between the shaping models is that the
sequence of <samp>"Halant"</samp> and consonant glyphs used to trigger shaping
features was altered when migrating from `<telu>` to
`<tel2>`. 

Specifically, shaping engines were expected to reorder post-base
<samp>"Halant,_Consonant_"</samp> sequences to <samp>"_Consonant_,Halant"</samp>.

As a result, a font's <abbr title="Glyph Substitution table">GSUB</abbr> substitutions would be written to match
<samp>"_Consonant_,Halant"</samp> sequences in all pre-base and post-base positions.


The `<telu>` syllable

	Pre-baseC Halant BaseC Halant Post-baseC

would be reordered to

	Pre-baseC Halant BaseC Post-baseC Halant

before features are applied.

In `<tel2>` text, as described above in this document, there is no
such reordering. The correct sequence to match for <abbr title="Glyph Substitution table">GSUB</abbr> substitutions is
<samp>"_Consonant_,Halant"</samp> for pre-base consonants, but <samp>"Halant,_Consonant_"</samp>
for post-base consonants.

In addition, for some scripts, left-side dependent vowel marks
(matras) were not repositioned during the final reordering
stage. For `<telu>` text, the left-side matra was always positioned
at the beginning of the syllable.


### Advice for handling fonts with `<telu>` features only ###

Shaping engines may choose to match post-base <samp>"_Consonant_,Halant"</samp>
sequences in order to apply <abbr title="Glyph Substitution table">GSUB</abbr> substitutions when it is known that
the font in use supports only the `<telu>` shaping model.

### Advice for handling text runs composed in `<telu>` format ###

Shaping engines may choose to match post-base <samp>"_Consonant_,Halant"</samp>
sequences for <abbr title="Glyph Substitution table">GSUB</abbr> substitutions or to reorder them to
<samp>"Halant,_Consonant_"</samp> when processing text runs that are tagged with
the `<telu>` script tag and it is known that the font in use supports
only the `<tel2>` shaping model.

Shaping engines may also choose to position left-side matras according
to the `<telu>` ordering scheme; however, doing so might interfere
with matching <abbr title="Glyph Substitution table">GSUB</abbr> or <abbr title="Glyph Positioning table">GPOS</abbr> features.


================================================
FILE: opentype-shaping-thai-lao.md
================================================
```{include} /_global.md
```

# Thai and Lao shaping in OpenType #

This document details the shaping procedure needed to display text
runs in the Thai and Lao scripts.


**Contents**

  - [General information](#general-information)
  - [Terminology](#terminology)
  - [Glyph classification](#glyph-classification)
      - [Shaping classes and subclasses](#shaping-classes-and-subclasses)
      - [Mark combining classes](#mark-combining-classes)
      - [<abbr>PUA</abbr> fallback classifications](#pua-fallback-classifications)
      - [Thai and Lao character tables](#thai-and-lao-character-tables)
  - [The `<thai>`/`<lao >` shaping model](#the-thailao-shaping-model)
      - [Stage 1: Applying the language substitution features from <abbr>GSUB</abbr>](#stage-1-applying-the-language-substitution-features-from-gsub)
      - [Stage 2: Decomposing all Am vowel signs](#stage-2-decomposing-all-am-vowel-signs)
      - [Stage 3: Reordering sequences of marks](#stage-3-reordering-sequences-of-marks)
      - [Stage 4: Applying all positioning features from <abbr>GPOS</abbr>](#stage-4-applying-all-positioning-features-from-gpos)
  - [The <abbr>PUA</abbr> fallback shaping model](#the-pua-fallback-shaping-model)
      - [Contextual replacement rules](#contextual-replacement-rules)
	    - [Stage 1: Decomposing all Am vowel signs](#stage-1-decomposing-all-am-vowel-signs)
      - [Stage 2: Reordering sequences of marks](#stage-2-reordering-sequences-of-marks)
      - [Stage 3: Remapping codepoints to the appropriate <abbr>PUA</abbr> alternates](#stage-3-remapping-codepoints-to-the-appropriate-pua-alternates)


## General information ##

The Thai and Lao scripts are both descendants of the Brahmi script,
and follow many of the same general patterns found in [Indic
scripts](opentype-shaping-indic-general.md). They are distinct enough 
from Indic scripts that they should not be supported by a
general-purpose Indic shaping engine.

Thai and Lao use different alphabets but are historically
related. They share common orthographic conventions and shaping
characteristics, which enables shaping engines to support both scripts
in a single implementation.

The Thai script is used to write multiple languages, most commonly
Thai, Pak Thai (or Southern Thai), Kuy, Isan, Lanna (or Northern
Thai), and Kelantan-Pattani Malay. In addition, the Thai script is
used to write Sanskrit and Pali. However, the Thai script is not used
for Vedic texts, therefore Thai and Lao text runs are not expected to
include any glyphs from the Vedic Extensions block of Unicode.

The Lao script is used to write multiple languages, most commonly
Lao, Khmu', Hmong, and Isan. 

The Thai script tag defined in OpenType is `<thai>`. The Lao script
tag defined in OpenType is `<lao >`. Because OpenType script tags must
be exactly four letters long, the `<lao >` tag includes a trailing
space. 

A significant number of  older Thai fonts that do not use the OpenType
shaping model are still in usage; these fonts employ the Unicode
"Private Use Area" (<abbr>PUA</abbr>) to store contextual forms of
characters. Shaping engines may implement this <abbr title="Private Use Area">PUA</abbr>-base shaping model
as a fallback mechanism when such fonts are encountered.


## Terminology ##

OpenType shaping uses a standard set of terms for Brahmi-derived and
Indic scripts.  The terms used colloquially in any particular language
may vary, however, potentially causing confusion.

Both Thai and Lao feature inherent vowels for every consonant, and
employ **dependent vowel** signs to replace the inherent vowel with a
different vowel sound.

The Thai term for a dependent vowel sign is **sara**. The Lao term for
a vowel sign is **sala**. The official names of the Thai vowel signs
in the Unicode standard includes "sara" (for example, <samp>"Sara Am"</samp>),
while the official names of the Lao vowel signs use "sign" (for
example, <samp>"Sign Am"</samp>).

Some of these dependent-vowel signs are encoded as marks that attach
to the consonant in **above-base** or **below-base** position. Others
are encoded as full letters that may appear in **pre-base**
(left-side) or **post-base** (right-side) position.

Thai and Lao differ from Indic scripts in that these pre-base dependent
vowels are entered before typing the consonant to which they
apply. Therefore, pre-base dependent vowels do not need to be
reordered by the shaping engine.

**Phinthu** is the term used for the Thai equivalent of the "halant"
or "virama" mark that suppresses the inherent vowel of a consonant. It
is used only when writing Sanskrit or Pali text in the Thai script.

**Nikhahit** is the term for the Thai equivalent of "anusvara". It
is used only when writing Sanskrit or Pali text in the Thai
script. The equivalent mark in Lao is called **niggahita**.

Both Thai and Lao include several **tone markers** as combining marks
that are positioned with respect to the consonant and, possibly, to
any corresponding dependent-vowel marks.

Where possible, using the standard terminology is preferred, as the
use of a language-specific term necessitates choosing one language
over all of the others that share a common script.


## Glyph classification ##

Shaping Thai and Lao text depends on the shaping engine correctly
classifying each glyph in the run. As with most other scripts, the
classifications must distinguish between consonants, vowels
(independent and dependent), numerals, punctuation, and various types
of diacritical mark. 

For most codepoints, the `General Category` property defined in the Unicode
standard is correct, but it is not always sufficient to fully capture the
expected shaping behavior. Therefore, Thai and Lao glyphs may
additionally be classified by how they are treated when shaping a run
of text.


### Shaping classes and subclasses ###

The shaping classes listed in the tables that follow are defined so
that they capture the positioning rules used by Thai and Lao scripts. 

For most codepoints, the _Shaping class_ is synonymous with the `Indic
Syllabic Category` defined in Unicode. However, there are some
distinctions, where the defined category does not fully capture the
behavior of the character in the shaping process.

Numbers are classified as `NUMBER`, even though they evoke no special
behavior from the Indic shaping rules, because there are OpenType features that
might affect how the respective glyphs are drawn, such as `tnum`,
which specifies the usage of tabular-width numerals, and `sups`, which
replaces the default glyphs with superscript variants.

Marks, including diacritics, tone markers, and dependent vowels, are further labeled
with a mark-placement subclass, which indicates where the glyph will
be placed with respect to the base character to which it is
attached. The actual position of the glyphs is determined by the
lookups found in the font's <abbr title="Glyph Positioning table">GPOS</abbr> table.

There are three basic _mark-placement subclasses_ for marks
in Thai and Lao. Each corresponds to the visual position of the mark with
respect to the consonant to which it is attached:

  - `TOP_POSITION` marks are positioned above the consonant.
  - `BOTTOM_POSITION` marks are positioned below the consonant.
  - `RIGHT_POSITION` marks are positioned to the right of the consonant.
  
Thai and Lao vowel marks can also appear to the left of the consonant
to which they are attached. However, in Thai and Lao text runs, these
vowels exist _before_ the consonant — that is, to the left of the
consonant in the character sequence. Thus, no reordering of these
vowels (as is done in several other Brahmi-derived scripts) is
required for Thai or Lao.

In order to unambiguously distinguish between this non-reordering
convention and the reordering conventions of other scripts, the
left-side vowels are not designated `LEFT_POSITION` in their
mark-placement subclass. Instead, these vowels are classified as `VISUAL_ORDER_LEFT`.

These positions may also be referred to elsewhere in shaping documents as:

  - _Above-base_ 
  - _Below-base_ 
  - _Pre-base_ 
  - _Post-base_ 
  
respectively. The `VISUAL_ORDER_LEFT`, `RIGHT`, `TOP`, and `BOTTOM` designations
corresponds to Unicode's preferred terminology. The _Pre_, _Post_,
_Above_, and _Below_ terminology is used in the official descriptions
of OpenType <abbr title="Glyph Substitution table">GSUB</abbr> and <abbr title="Glyph Positioning table">GPOS</abbr> features. Shaping engines may, internally,
use whichever terminology is preferred.

For most mark and dependent-vowel codepoints, the _mark-placement
subclass_ is synonymous with the `Indic Positional Category` defined
in Unicode. However, there may be some distinctions, where the defined
category does not fully capture the behavior of the character in the
shaping process. 


### Mark combining classes ###

The Unicode standard defines a _canonical combining class_ for each mark
codepoint that is used whenever a sequence of marks needs to be sorted
into canonical order. 

The numeric values of these combining classes are used during Unicode
normalization. 

All Thai and Lao marks belong to standard combining classes. However,
for script-shaping purposes, some marks need to be reassigned to a
modified class in order to ensure that certain sequences of
consecutive marks are reordered correctly.

In particular, the Thai <samp>"Sara U"</samp> (`U+0E38`) and <samp>"Sara Uu"</samp> (`U+0E39`)
marks are reassigned from the canonical class 103 to the class 3
(which is an unused class in Unicode's set of canonical classes).

This ensures that <samp>"Sara U"</samp> or <samp>"Sara Uu"</samp> codepoints adjacent to
<samp>"Phinthu"</samp> (`U+0E3A`) are not reordered to a position after the
<samp>"Phinthu"</samp> mark.


:::{table} Mark-classification table

| Codepoint | Combining class | Glyph                              |
|:----------|:----------------|:-----------------------------------|
|`U+0E38`   | 3               | &#x0E38; Sara U                    |
|`U+0E47`   | _0_             | &#x0E47; Maitaikhu                 |
|`U+0E4A`   | 107             | &#x0E4A; Mai Tri                   |
|`U+0EB9`   | 118             | &#x0EB9; Sign Uu                   |
|`U+0EBC`   | _0_             | &#x0E47; Semivowel Sign Lo         |
|`U+0ECB`   | 122             | &#x0E4A; Tone Mai Catawa           |
:::

> Note: Reassigning marks to modified classes in this manner should
> not produce any unwanted side effects, because the reassigned class
> is unused. However, any implementations that need to maintain strict
> adherence to Unicode's canonical combining classes may choose to
> handle the Phinthu-reordering issue in a different manner.


### <abbr>PUA</abbr> fallback classifications ###

Older Thai fonts that implement the <abbr title="Private Use Area">PUA</abbr>-substitution fallback method
rather than modern OpenType script shaping rules incorporate
subclasses for consonants that indicate whether or not the consonant
includes an ascender, a normal descender, or a removable descender.

There are four possible values:

  - `NORMAL_CONSONANT` or `NC`
  - `ASCENDER_CONSONANT` or `AC`
  - `DESCENDER_CONSONANT` or `DC`
  - `REMOVABLE_DESCENDER_CONSONANT` or `RC`
  
Furthermore, vowels and marks in these fonts are classified by whether
they are positioned at the same baseline as consonants, below
consonants, above consonants, or must be positioned at the top of any
stacks of marks.

There are four possible values:

  - `CONSONANT_BASELINE_LEVEL` or `CV`
  - `BELOW_CONSONANT_LEVEL` or `BV`
  - `ABOVE_CONSONANT_LEVEL` or `AV`
  - `TOP_LEVEL` or `TV`


### Thai and Lao character tables ###

Separate character tables are provided for the Thai and Lao blocks as
well as for other miscellaneous characters that are used in `<thai>`
and `<lao >` text runs: 

  - [Thai character table](character-tables/character-tables-thai.md#thai-character-table)
  - [Miscellaneous character table](character-tables/character-tables-thai.md#miscellaneous-character-table)

  - [Lao character table](character-tables/character-tables-lao.md#lao-character-table)
  - [Miscellaneous character table](character-tables/character-tables-lao.md#miscellaneous-character-table)

The tables list each codepoint along with its Unicode general
category, its shaping class, its mark-placement subclass, and its
<abbr title="Private Use Area">PUA</abbr>-fallback category. The codepoint's Unicode name and an example
glyph are also provided.

For example:

:::{table} Example character table

| Codepoint | Unicode category | Shaping class     | Mark-placement subclass | PUA    | Glyph                         |
|:----------|:-----------------|:------------------|:------------------------|:-------|:------------------------------|
|`U+0E01`   | Letter           | CONSONANT         | _null_                  | NC     | &#x0E01; Ko Kai               |
| | | | | | |
|`U+0E48`   | Mark [Mn]        | TONE_MARKER       | TOP_POSITION            | TV     | &#x0E48; Mai Ek               |
| | | | | | |
|`U+0E81`   | Letter           | CONSONANT         | _null_                  | _null_ | &#x0E81; Ko                   |
| | | | | | |
|`U+0EC8`   | Mark [Mn]        | TONE_MARKER       | TOP_POSITION            | _null_ | &#x0EC8; Tone Mai Ek          |
:::


Codepoints with no assigned meaning are
designated as _unassigned_ in the _Unicode category_ column. 

Assigned codepoints with a _null_ in the _Shaping class_
column evoke no special behavior from the shaping engine.

The _Mark-placement subclass_ column indicates mark-placement
positioning for codepoints in the _Mark_ category. Assigned, non-mark
codepoints have a _null_ in this column and evoke no special
mark-placement behavior. Marks tagged with [Mn] in the _Unicode
category_ column are categorized as non-spacing; marks tagged with
[Mc] are categorized as spacing-combining.

The _PUA_ column indicates which, if any, fallback-shaping category
the codepoint belongs to when found in older fonts using the <abbr title="Private Use Area">PUA</abbr>
fallback shaping scheme. Note that the <abbr title="Private Use Area">PUA</abbr> method was employed only
for Thai fonts, so Lao codepoints do not have a <abbr title="Private Use Area">PUA</abbr> fallback-shaping
category. Thai codepoints with a _null_ in the _PUA_ column were not
used in the <abbr title="Private Use Area">PUA</abbr> fallback-shaping scheme and evoke no special behavior
from the shaping engine.

Some codepoints in the tables use a _Shaping class_ that
differs from the codepoint's Unicode _General Category_. The _Shaping
class_ takes precedence during OpenType shaping, as it captures more
specific, script-aware behavior.

Other important characters that may be encountered when shaping runs
of Thai and Lao text include the dotted-circle placeholder (`U+25CC`), 
the no-break space (`U+00A0`), and the zero-width space (`U+200B`).

The dotted-circle placeholder is frequently used when displaying a
dependent vowel sign or a combining mark in isolation. Real-world
text syllables may also use other characters, such as hyphens or dashes,
in a similar placeholder fashion; shaping engines should cope with
this situation gracefully.

<!--- The zero-width joiner is primarily used to prevent the formation of a
subjoining form from a <samp>"_Consonant_,Halant,_Consonant_"</samp> sequence. The sequence
<samp>"_Consonant_,Halant,ZWJ,_Consonant_"</samp> blocks the substitution of a
subjoined form for the second consonant. --->

<!---
A secondary usage of the zero-width joiner is to prevent the formation of
<samp>"Reph"</samp>. An initial <samp>"Ra,Halant,ZWJ"</samp> sequence should not produce a <samp>"Reph"</samp>,
where an initial <samp>"Ra,Halant"</samp> sequence without the zero-width joiner
otherwise would.
--->

The no-break space is primarily used to insert spaces between
phrases. Thai and Lao texts do not employ inter-word spaces. Consequently,
when spaces are inserted into a text run, it is important that they be
preserved: line-breaking algorithms must not break lines after a
Thai or Lao space, so the no-break space character is used instead of the
traditional space. 

The no-break space may also be used to display those codepoints that
are defined as non-spacing (marks, dependent vowels (matras),
below-base consonant forms, and post-base consonant forms) in an
isolated context, as an alternative to displaying them superimposed on
the dotted-circle placeholder. 

## The `<thai>`/`<lao >` shaping model ##

Processing a run of `<thai>` or `<lao >` text involves four top-level stages:


1. Applying the language substitution features from <abbr>GSUB</abbr>
2. Decomposing all Am vowel signs
3. Reordering sequences of marks
4. Applying all positioning features from <abbr>GPOS</abbr>


As with other Brahmi-derived and Indic scripts, the basic substitution
features must be applied to the run in a specific order. The
positioning features in the final stage, however, do not have a
mandatory order.

Unlike many other Brahmi-derived and Indic scripts, shaping Thai and Lao
text does not require a syllable-identification stage.

Each syllable contains exactly one vowel sound. Valid syllables may
begin with either a consonant or an independent vowel. 

In addition to valid syllables, standalone sequences may occur, such
as when an isolated codepoint is shown in example text.


### Stage 1: Applying the language substitution features from <abbr>GSUB</abbr> ###

The language-substitution stage applies mandatory substitution features
using the rules in the font's <abbr title="Glyph Substitution table">GSUB</abbr> table. In preparation for this
stage, glyph sequences should be tagged for possible application 
of <abbr title="Glyph Substitution table">GSUB</abbr> features.

The order in which these substitutions must be performed is fixed:

	locl
	ccmp


#### Stage 1, step 1: locl ####

The `locl` feature replaces default glyphs with any language-specific
variants, based on examining the language setting of the text run.

> Note: Strictly speaking, the use of localized-form substitutions is
> not part of the shaping process, but of the localization process,
> and could take place at an earlier point while handling the text
> run. However, shaping engines are expected to complete the
> application of the `locl` feature before applying the subsequent
> <abbr title="Glyph Substitution table">GSUB</abbr> substitutions in the following steps.


#### Stage 1, step 2: ccmp ####

The `ccmp` feature allows a font to substitute mark-and-base sequences
with a pre-composed glyph including the mark and the base, or to
substitute a single glyph into an equivalent decomposed sequence of glyphs. 
 
In `<thai>` and `<lao >` text, this may include a decomposition for
the <samp>"Am"</samp> dependent-vowel sign. If such a decomposition is used in the
active font, the shaping engine must keep track of the fact that the
resulting components originated as an <samp>"Am"</samp> sign. 

If there is not an <samp>"Am"</samp> decomposition in the active font's `ccmp`
lookup, the shaping engine will decompose the codepoint in the
following stage.
  
If present, these composition and decomposition substitutions must be
performed before applying any other <abbr title="Glyph Substitution table">GSUB</abbr> lookups, because
those lookups may be written to match only the `ccmp`-substituted
glyphs. 

:::{figure-md}
![Glyph composition](images/thai-lao/thai-ccmp.svg "Glyph composition"){.shaping-demo .inline-svg .greyscale-svg #thai-ccmp}

Glyph composition
:::

```{svg-color-toggle-button} thai-ccmp
```

### Stage 2: Decomposing all Am vowel signs ###

The Thai and Lao alphabets each include one character that must be
decomposed for shaping purposes, the vowel sign <samp>"Am"</samp>. The decomposition is
canonically defined, resulting in the sequence <samp>"_Anusvara_,Sara Aa"</samp> in
the appropriate script. 

  - Thai Sara Am (`U+0E33`) decomposes to <samp>"Nikhahit,Sara Aa"</samp> (`U+0E4D`,`U+0E32`).
  - Lao Sign Am (`U+0EB3`) decomposes to <samp>"Niggahita,Sign Aa"</samp> (`U+0ECD`,`U+0EB2`).

> Note: if the active font decomposed the <samp>"Am"</samp> sign via a `ccmp`
> feature lookup during stage one, then no further action is needed
> on the shaping engine's part during this stage.

The shaping engine must keep track of the fact that the <samp>"Nikhahit"</samp> or
<samp>"Niggahita"</samp> marks originated as part of an <samp>"Am"</samp> sign, because these
decomposed marks are handled differently during the mark-reordering
stage.

:::{figure-md}
![Am decomposition](images/thai-lao/lao-am-decomposition.svg "Am decomposition"){.shaping-demo .inline-svg .greyscale-svg #lao-am-decomposition}

Am decomposition
:::

```{svg-color-toggle-button} lao-am-decomposition
```
  
### Stage 3: Reordering sequences of marks ###

In this stage, sequences of consecutive marks may need to be
reordered.

In `<thai>` and `<lao >` text runs, two conditions should be checked
for possible reordering.

  - A <samp>"Nikhahit"</samp> or <samp>"Niggahita"</samp> mark that originated as part of an
    <samp>"Am"</samp> sign (which was decomposed in stage two, above) must be
    reordered so that it occurs before any tone markers in the
    sequence of marks.
  - A <samp>"Phinthu"</samp> mark must be reordered so that it occurs after any
    <samp>"Sara U"</samp> or <samp>"Sara Uu"</samp> marks.
	
> Note: <samp>"Nikhahit"</samp> or <samp>"Niggahita"</samp> marks that were not originally part
> of an <samp>"Am"</samp> sign should not be reordered.

> Note: Shaping engines may alternatively choose to implement the Phinthu
> reordering rule by modifying the combining classes assigned to
> <samp>"Phinthu"</samp>, <samp>"Sara U"</samp>, and <samp>"Sara Uu"</samp> as necessary before processing
> the text run, or by performing a sorting step at this stage.


<!--- 

move the
   * NIKHAHIT backwards over any tone mark (0E48-0E4B).
   *
   * <0E14, 0E4B, 0E33> -> <0E14, 0E4D, 0E4B, 0E32>
   *
   * This reordering is legit only when the NIKHAHIT comes from a SARA AM, not
   * when it's there to start with. The string <0E14, 0E4B, 0E4D> is probably
   * not what a user wanted, but the rendering is nevertheless nikhahit above
   * chattawa.
   *
   * Same for Lao.
   *
   * Note:
   *
   * Uniscribe also does some below-marks reordering.  Namely, it positions U+0E3A
   * after U+0E38 and U+0E39.  We do that by modifying the ccc for U+0E3A.
   * See unicode->modified_combining_class ().  Lao does NOT have a U+0E3A
   * equivalent.

--->


### Stage 4: Applying all positioning features from <abbr>GPOS</abbr> ###

In this stage, mark positioning, kerning, and other <abbr title="Glyph Positioning table">GPOS</abbr> features are
applied. As with the preceding stage, the order in which these
features are applied is not canonical; they should be applied in the
order in which they appear in the <abbr title="Glyph Positioning table">GPOS</abbr> table in the font.

	kern
	mark
	mkmk

> Note: The `kern` feature is usually applied at this stage, if it is
> present in the font. However, `kern` is not mandatory for shaping
> Thai and Lao text and may be disabled by user preference.

The `kern` feature adjusts the horizontal positioning of
glyphs.

:::{figure-md}
![Application of the kern feature](/images/thai-lao/lao-kern.svg "Application of the kern feature"){.shaping-demo .inline-svg .greyscale-svg #lao-kern}

Application of the kern feature
:::

```{svg-color-toggle-button} lao-kern
```

The `mark` feature positions marks with respect to base glyphs.

:::{figure-md}
![Application of the mark feature](/images/thai-lao/thai-mark.svg "Application of the mark feature"){.shaping-demo .inline-svg .greyscale-svg #thai-mark}

Application of the mark feature
:::

```{svg-color-toggle-button} thai-mark
```

The `mkmk` feature positions marks with respect to preceding marks,
providing proper positioning for sequences of marks that attach to the
same base glyph.

:::{figure-md}
![Application of the mkmk feature](/images/thai-lao/thai-mkmk.svg "Application of the mkmk feature"){.shaping-demo .inline-svg .greyscale-svg #thai-mkmk}

Application of the mkmk feature
:::

```{svg-color-toggle-button} thai-mkmk
```


## The <abbr>PUA</abbr> fallback shaping model ##

A significant number of  older Thai fonts that do not use the OpenType
shaping model are still in usage; these fonts employ the Unicode
"Private Use Area" (<abbr>PUA</abbr>) to store contextual forms of
characters.

The <abbr title="Private Use Area">PUA</abbr> shaping model is described at
[linux.thai.net/~thep/th-otf/shaping.html](https://linux.thai.net/~thep/th-otf/shaping.html)
. It relies on a set of pre-determined mappings from the codepoints in the
Unicode Thai block to codepoints in the <abbr title="Private Use Area">PUA</abbr>.

For consonants, these alternate-glyph mappings depend on whether or
not the consonant includes an ascender, a normal descender, or a
removable descender.

There are four possible values:

  - `NORMAL_CONSONANT` or `NC`
  - `ASCENDER_CONSONANT` or `AC`
  - `DESCENDER_CONSONANT` or `DC`
  - `REMOVABLE_DESCENDER_CONSONANT` or `RC`
  
Furthermore, vowels and marks in these fonts are classified by whether
they are positioned at the same baseline as consonants, below
consonants, above consonants, or must be positioned at the top of any
stacks of marks.

There are four possible values:

  - `CONSONANT_BASELINE_LEVEL` or `CV`
  - `BELOW_CONSONANT_LEVEL` or `BV`
  - `ABOVE_CONSONANT_LEVEL` or `AV`
  - `TOP_LEVEL` or `TV`


The classifications of the consonant, vowel, and mark characters in
the Thai Block are listed in the _PUA_ column of the [Thai character
table](character-tables/character-tables-thai.md#thai-character-table). 


## Contextual replacement rules ##

Codepoints in the Thai Block can be mapped to one of several alternate
<abbr title="Private Use Area">PUA</abbr> codepoints depending on context:

  - A tone marker that does not follow an above-base vowel sign may be
    mapped to an alternate that is positioned lower, closer to the top
    of the consonant. This is a `SHIFT_DOWN` replacement action.
  - A tone marker, above-base diacritic, or above-base vowel sign
    following a consonant with an ascender may be mapped to an
    alternate that is positioned further to the left (thereby
    preventing a collision with the ascender). This is a `SHIFT_LEFT` replacement action.
  - A below-base vowel sign that follows a consonant with a
    non-removable descender may be mapped to an alternate that is
    positioned lower (thereby preventing a collision with the
    descender). This is a `SHIFT_DOWN` replacement action.
  - A consonant with a removable descender may be mapped to a
    descender-less alternate when the consonant is followed by a
    below-base vowel sign. This is a `REMOVE_DESCENDER` replacement action.
	
The above rules may combine. Specifically, a tone marker that does not
follow an above-base vowel sign _and_ follows a consonant with an
ascender must be positioned lower and further to the left.  This is a
`SHIFT_DOWN_AND_LEFT` replacement action.

Additionally, below-base vowels are handled separately from above-base
vowels and tone markers; a consonant that is followed by a below-base
vowel and a tone marker may have to perform two independent
replacement actions.
	
The following table summarizes the actions taken for each of the
possible consonant (vertical) and vowel/mark (horizontal) sequences:


:::{table} Summary of contextual-replacement rules for <samp>"Consonant,Vowel"</samp> sequences in <abbr>PUA</abbr> fallback

|        |  AV  |  BV  |  TV   |  AV,TV    |
|:-------|:-----|:-----|:------|:-----------|
| **NC** |      |      | `SD`  |            |
| **AC** | `SL` |      | `SDL` | `SL`       |
| **RC** |      | `RD` | `SD`  |            |
| **DC** |      | `SD` | `SD`  |            | 
:::

These replacements take the place of both <abbr title="Glyph Substitution table">GSUB</abbr> substitutions and <abbr title="Glyph Positioning table">GPOS</abbr>
positioning in modern OpenType fonts.

Shaping engines can replace the original codepoints with the
appropriate alternates from the <abbr title="Private Use Area">PUA</abbr> block by testing for the above
conditions. 

With each consonant, vowel, and mark character correctly classified,
the shaping engine can process the text run.

There are three top-level stages:

1. Decomposing all Am vowel signs
2. Reordering sequences of marks
3. Remapping codepoints to the appropriate <abbr>PUA</abbr> alternates


### Stage 1: Decomposing all Am vowel signs ###

The Thai alphabet includes one character that must be decomposed for
shaping purposes, the vowel sign <samp>"Am"</samp>. The decomposition is
canonically defined, resulting in the sequence <samp>"Nikhahit,Sara Aa"</samp>.

  - Sara Am (`U+0E33`) decomposes to <samp>"Nikhahit,Sara Aa"</samp> (`U+0E4D`,`U+0E32`).

The shaping engine must keep track of the fact that the <samp>"Nikhahit"</samp>
mark originated as part of an <samp>"Am"</samp> sign, because these decomposed
marks are handled differently during the mark-reordering stage.

:::{figure-md}
![Glyph decomposition](images/thai-lao/thai-am-decomposition.svg "Glyph decomposition"){.shaping-demo .inline-svg .greyscale-svg #thai-am-decomposition}

Glyph decomposition
:::

```{svg-color-toggle-button} thai-am-decomposition
```

### Stage 2: Reordering sequences of marks ###

In this stage, certain sequences of consecutive marks may need to be
reordered.

As is the case in OpenType-font text runs, two conditions should be checked
for possible reordering.

  - A <samp>"Nikhahit"</samp> mark that originated as part of an <samp>"Am"</samp> sign (which
    was decomposed in stage one, above) must be reordered so that it
    occurs before any tone markers in the sequence of marks.
  - A <samp>"Phinthu"</samp> mark must be reordered so that it occurs after any
    <samp>"Sara U"</samp> or <samp>"Sara Uu"</samp> marks.
	
> Note: <samp>"Nikhahit"</samp> marks that were not originally part of an <samp>"Am"</samp> sign
> should not be reordered.

> Note: Shaping engines may choose to implement the Phinthu
> reordering rule by modifying the combining classes assigned to
> <samp>"Phinthu"</samp>, <samp>"Sara U"</samp>, and <samp>"Sara Uu"</samp> as necessary before processing
> the text run, or by performing a sorting step at this stage.


### Stage 3: Remapping codepoints to the appropriate <abbr>PUA</abbr> alternates ###

The contextual replacement rules described above can be implemented in
a pair of state machines, one for above-base replacement moves and one
for below-base replacement moves.

Each consonant codepoint and subsequent (possibly empty) sequence of
marks should be processed in turn through both machines. The output
for each codepoint will be one of the standard replacement actions:

  - `SD`: replace the codepoint with the `SHIFT_DOWN` alternate
  - `SL`: replace the codepoint with the `SHIFT_LEFT` alternate
  - `SDL`: replace the codepoint with the `SHIFT_DOWN_AND_LEFT` alternate
  - `RD`: replace the codepoint with the `REMOVE_DESCENDER` alternate
  - _null_: no replacement should be made

The above-base state machine tracks four possible states, designated
`AS0` through `AS3`. 

The initial states of the possible codepoints are as follows:

:::{table} Initial states for above-base <abbr>PUA</abbr> remapping

| PUA class | initial state |
|:----------|:--------------|
| NC        | AS0           |
| AC        | AS1           |
| RC        | AS0           |
| DC        | AS0           |
| _Other_   | AS3           |
:::


The following state machine table lists the replacement action to take
and the resulting next state for each possible mark type that may
follow a consonant:


:::{table} State-machine table for above-base <abbr>PUA</abbr> remapping

| Input state | AV         | BV         | TV         |
|:------------|:-----------|:-----------|:-----------|
| AS0         | _null_,AS3 | _null_,AS0 | `SD`,AS3   |
| AS1         | `SL`,AS2   | _null_,AS1 | `SDL`,AS2  |
| AS2         | _null_,AS3 | _null_,AS2 | `SL`,AS3   |
| AS3         | _null_,AS3 | _null_,AS3 | _null_,AS3 |
:::


The below-base state machine tracks three possible states, designated
`BS0` through `BS2`. 

The initial states of the possible codepoints are as follows:

:::{table} Initial states for below-base <abbr>PUA</abbr> remapping

| PUA class | initial state |
|:----------|:--------------|
| NC        | BS0           |
| AC        | BSO           |
| RC        | BS1           |
| DC        | BS2           |
| _Other_   | BS2           |
:::


The following state machine table lists the replacement action to take
and the resulting next state for each possible mark type that may
follow a consonant:

:::{table} State-machine table for below-base <abbr>PUA</abbr> remapping

| Input state | AV         | BV         | TV         |
|:------------|:-----------|:-----------|:-----------|
| BS0         | _null_,BS0 | _null_,BS2 | _null_,BS0 |
| BS1         | _null_,BS1 | `RD`,BS2   | _null_,BS1 |
| BS2         | _null_,BS2 | `SD`,BS2   | _null_,BS2 |
:::

When the necessary replacement action for each codepoint has been
determined, codepoints can be replaced with the <abbr title="Private Use Area">PUA</abbr> codepoints from
the following table.

Note that Windows fonts and MacOS fonts used different mappings.


#### SD mappings ####

:::{table} `SD` mappings by platform

| Input    | Windows  | MacOS    |
|:---------|:---------|:---------|
| `U+0E48` | `U+F70A` | `U+F88B` |
| `U+0E49` | `U+F70B` | `U+F88E` |
| `U+0E4A` | `U+F70C` | `U+F891` |
| `U+0E4B` | `U+F70D` | `U+F894` |
| `U+0E4C` | `U+F70E` | `U+F897` |
| `U+0E38` | `U+F718` | `U+F89B` |
| `U+0E39` | `U+F719` | `U+F89C` |
| `U+0E3A` | `U+F71A` | `U+F89D` |
:::


#### SL mappings ####

:::{table} `SL` mappings by platform

| Input    | Windows  | MacOS    |
|:---------|:---------|:---------|
| `U+0E48` | `U+F713` | `U+F88A` |
| `U+0E49` | `U+F714` | `U+F88D` |
| `U+0E4A` | `U+F715` | `U+F890` |
| `U+0E4B` | `U+F716` | `U+F893` |
| `U+0E4C` | `U+F717` | `U+F896` |
| `U+0E31` | `U+F710` | `U+F884` |
| `U+0E34` | `U+F701` | `U+F885` |
| `U+0E35` | `U+F702` | `U+F886` |
| `U+0E36` | `U+F703` | `U+F887` |
| `U+0E37` | `U+F704` | `U+F888` |
| `U+0E47` | `U+F712` | `U+F889` |
| `U+0E4D` | `U+F711` | `U+F899` |
:::


#### SDL mappings ####

:::{table} `SDL` mappings by platform

| Input    | Windows  | MacOS    |
|:---------|:---------|:---------|
| `U+0E48` | `U+F705` | `U+F88C` |
| `U+0E49` | `U+F706` | `U+F88F` |
| `U+0E4A` | `U+F707` | `U+F892` |
| `U+0E4B` | `U+F708` | `U+F895` |
| `U+0E4C` | `U+F709` | `U+F898` |
:::


#### RD mappings ####

:::{table} `RD` mappings by platform

| Input    | Windows  | MacOS    |
|:---------|:---------|:---------|
| `U+0E0D` | `U+F70F` | `U+F89A` |
| `U+0E10` | `U+F700` | `U+F89E` |
:::


================================================
FILE: opentype-shaping-tibetan.md
================================================
```{include} /_global.md
```

# Tibetan shaping in OpenType #

This document details the shaping procedure needed to display text
runs in the Tibetan script.


**Contents**

  - [General information](#general-information)
  - [Terminology](#terminology)
  - [Glyph classification](#glyph-classification)
      - [Shaping classes and subclasses](#shaping-classes-and-subclasses)
      - [Tibetan character tables](#tibetan-character-tables)
  - [The `<tibt>` shaping model](#the-tibt-shaping-model)
      - [Stage 1: Applying the language substitution features from <abbr>GSUB</abbr>](#stage-1-applying-the-language-substitution-features-from-gsub)
      - [Stage 2: Applying all basic substitution features from <abbr>GSUB</abbr>](#stage-2-applying-all-basic-substitution-features-from-gsub)
      - [Stage 3: Applying remaining positioning features from <abbr>GPOS</abbr>](#stage-3-applying-remaining-positioning-features-from-gpos)


## General information ##

The Tibetan script was modeled on seventh-century [Indic
scripts](opentype-shaping-indic-general.md) and incorporates several
patterns and conventions found in Indic scripts. However, Tibetan
developed independently and possesses enough major distinctions that
it is inadvisable to attempt supporting it in a general-purpose
Indic shaping engine. 

The Tibetan script is used to write multiple languages, most commonly
Tibetan, Dzongkha, Sikkimese, Ladakhi, and Balti. In addition,
Sanskrit may be written in Tibetan, but the Tibetan script is not used
for Vedic texts, therefore Tibetan text runs are not expected to
include any glyphs from the Vedic Extensions block of Unicode. 

The Tibetan script tag defined in OpenType is `<tibt>`. 

Notably, Tibetan was originally included in version 1.0 of the Unicode
standard, encoded in a block that closely mirrored the structure of
the Indic scripts. However, this encoding for Tibetan was removed in
Unicode 1.1. A new encoding for Tibetan was included in version 2.0 of
the Unicode standard, more appropriately structured for the writing
system.

## Terminology ##

OpenType shaping uses a standard set of terms for Brahmi-derived and
Indic scripts.  The terms used colloquially in any particular language
may vary, however, potentially causing confusion.

**Matra** is the standard term for a dependent vowel sign. Syllables
in Tibetan script can include sequences of multiple vowels and,
therefore, multiple matras. Each matra is either a **above-base** or
a **below-base** form.

Several compound matra codepoints are included in the Tibetan Unicode
block. However, these are only used when transcribing Sanskrit
text. Otherwise, Tibetan syllables will include at most one matra.

**Tsheng** or **tsek** is the term for the small, dot-like mark that is placed
between syllables in a Tibetan word. Sequences of tsek marks are
occasionally used to justify lines of text within a block. For
line-breaking purposes, words may be broken after a tsek mark.

**Srog-med** is the term for the "virama" or "halant" sign (`U+0F84`). However,
the Tibetan script does not natively use the srong-med mark: it is
used only when transcribing text in a language that requires a "halant".

<!--- **Chandrabindu** (or simply **Bindu**) is the standard term for the
diacritical mark indicating that the preceding vowel should be
nasalized. Tibetan script does not use a chandrabindu; however, the
_BINDU_ category is used for other marks during the
syllable-identification stage in order to maintain compatibility with
other scripts. --->

<!--- Tibetan has a bindu, but it seems to be there just for
      transcription --->

The term **base consonant** in Tibetan is analogous to its usage in
Indic and Brahmi-derived scripts. The base consonant of a syllable is
rendered in its full form; subsequent consonants are generally shown
in **subjoined** form, stacked below the base consonant.

The Tibetan Unicode block includes separate codepoints for the base
and subjoined forms of each consonant. Therefore, shaping engines are
not required to determine the base consonant of a syllable
algorithmically.

Tibetan also employs the term **head consonant**, which refers to the
consonant in a stack that is in the visually topmost position. Certain
consonants take on an alternate form when used in stack-initial
positions (such as <samp>"Ra"</samp>). When the alternate form is visually the
topmost consonant in the stack, it is regarded as the head consonant,
even though the consonant that follows is regarded as the base
consonant.

For example, the sequence <samp>"Ra,Subjoined Ka"</samp> (`U+0F62`,`U+0F90`) is
rendered with the <samp>"Ka"</samp> in its non-subjoined, base-consonant form and the <samp>"Ra"</samp>
positioned above. In this circumstance, the <samp>"Ra"</samp> would still be
regarded as the head consonant.


Where possible, using the standard terminology is preferred, as the
use of a language-specific term necessitates choosing one language
over all of the others that share a common script.

## Glyph classification ##

Shaping Tibetan text depends on the shaping engine correctly
classifying each glyph in the run. As with most other scripts, the
classifications must distinguish between consonants, vowels
(independent and dependent), numerals, punctuation, and various types
of diacritical mark. 

For most codepoints, the `General Category` property defined in the Unicode
standard is correct, but it is not sufficient to fully capture the
expected shaping behavior (such as glyph reordering). Therefore,
Tibetan glyphs must additionally be classified by how they are treated
when shaping a run of text.

### Shaping classes and subclasses ###

The shaping classes listed in the tables that follow are defined so
that they capture the positioning rules used by Tibetan script. 

For most codepoints, the _Shaping class_ is synonymous with the `Indic
Syllabic Category` defined in Unicode. However, there are some
distinctions, where the defined category does not fully capture the
behavior of the character in the shaping process.

Several of the diacritic and syllable-modifying marks behave according
to their own rules and, thus, have a special class. These include
`BINDU` and `VISARGA`. Some less-common marks behave according to
rules that are similar to these common marks, and are therefore
classified with the corresponding common mark.

Letters generally fall into the classes `CONSONANT`,
`VOWEL_INDEPENDENT`, and `VOWEL_DEPENDENT`. These classes help the
shaping engine parse and identify key positions in a syllable. For
example, Unicode categorizes dependent vowels as `Mark [Mn]`, but the
shaping engine must be able to distinguish between dependent vowels
and diacritical marks (which are categorized as `Mark [Mn]`).

Tibetan uses two subclasses of consonant, `CONSONANT_SUBJOINED` and
`CONSONANT_HEAD`. 

The `CONSONANT_SUBJOINED` subclass is used for consonants immediately
following the base consonant of a syllable and before the vowel
sound. Unlike most Indic scripts, Tibetan explicitly encodes the
subjoined forms of each consonant in a separate codepoint. Therefore,
the shaping engine is not responsible for identifying the base and
below-base consonants (or other special forms) and fonts are not
responsible for implementing substitution features to substitute
subjoined forms in context.

The `CONSONANT_HEAD` subclass is used for special transliteration
letters that are not found in the Tibetan language. They should pass
checks for consonants, but do not evoke special shaping behavior.

Other characters, such as symbols, need no special
attention from the shaping engine, so they are not assigned a shaping
class.

Numbers are classified as `NUMBER`, even though they evoke no special
behavior from the Indic shaping rules, because there are OpenType features that
might affect how the respective glyphs are drawn, such as `tnum`,
which specifies the usage of tabular-width numerals, and `sups`, which
replaces the default glyphs with superscript variants.

Marks, subjoined consonants, and dependent vowels are further labeled
with a mark-placement subclass, which indicates where the glyph will
be placed with respect to the base character to which it is
attached. The actual position of the glyphs is determined by the
lookups found in the font's <abbr title="Glyph Positioning table">GPOS</abbr> table.

There are two basic _mark-placement subclasses_ for dependent vowel signs
(matras). Each corresponds to the visual position of the matra with
respect to the base consonant to which it is attached:

  - `TOP_POSITION` matras are positioned above the base consonant.
  - `BOTTOM_POSITION` matras are positioned below the base consonant.
  
Syllable modifiers and other marks may be placed in `TOP` or `BOTTOM`
position, or:

  - `LEFT_POSITION` marks are positioned to the left of the base consonant.
  - `RIGHT_POSITION` marks are positioned to the right of the base consonant.

These positions may also be referred to elsewhere in shaping documents as:

  - _Above-base_ 
  - _Below-base_ 
  - _Pre-base_ 
  - _Post-base_ 
  
respectively. The `LEFT`, `RIGHT`, `TOP`, and `BOTTOM` designations
corresponds to Unicode's preferred terminology. The _Pre_, _Post_,
_Above_, and _Below_ terminology is used in the official descriptions
of OpenType <abbr title="Glyph Substitution table">GSUB</abbr> and <abbr title="Glyph Positioning table">GPOS</abbr> features. Shaping engines may, internally,
use whichever terminology is preferred.

For most mark and dependent-vowel codepoints, the _mark-placement
subclass_ is synonymous with the `Indic Positional Category` defined
in Unicode. However, there are some distinctions, where the defined
category does not fully capture the behavior of the character in the
shaping process. 


### Tibetan character tables ###

Separate character tables are provided for the Tibetan block as well
as for other miscellaneous characters that are used in `<tibt>` text
runs:

  - [Tibetan character table](character-tables/character-tables-tibetan.md#tibetan-character-table)
  - [Miscellaneous character table](character-tables/character-tables-tibetan.md#miscellaneous-character-table)

The tables list each codepoint along with its Unicode general
category, its shaping class, and its mark-placement subclass. The
codepoint's Unicode name and an example glyph are also provided.

For example:

:::{table} Example character table

| Codepoint | Unicode category | Shaping class     | Mark-placement subclass    | Glyph                        |
|:----------|:-----------------|:------------------|:---------------------------|:-----------------------------|
|`U+0F40`   | Letter           | CONSONANT         | _null_                     | &#x0F40; Ka                  |
| | | | |
|`U+0F7E`   | Mark [Mn]        | BINDU             | TOP_POSITION               | &#x0F7E; Sign Rjes Su Nga Ro |
:::


Codepoints with no assigned meaning are
designated as _unassigned_ in the _Unicode category_ column. 

Assigned codepoints with a _null_ in the _Shaping class_
column evoke no special behavior from the shaping engine.

The _Mark-placement subclass_ column indicates mark-placement
positioning for codepoints in the _Mark_ category. Assigned, non-mark
codepoints have a _null_ in this column and evoke no special
mark-placement behavior. Marks tagged with [Mn] in the _Unicode
category_ column are categorized as non-spacing; marks tagged with
[Mc] are categorized as spacing-combining.

Some codepoints in the tables use a _Shaping class_ that
differs from the codepoint's Unicode _General Category_. The _Shaping
class_ takes precedence during OpenType shaping, as it captures more
specific, script-aware behavior.

Other important characters that may be encountered when shaping runs
of Tibetan text include the dotted-circle placeholder (`U+25CC`), 
the no-break space (`U+00A0`), and the zero-width space (`U+200B`).

The dotted-circle placeholder is frequently used when displaying a
dependent vowel (matra) or a combining mark in isolation. Real-world
text syllables may also use other characters, such as hyphens or dashes,
in a similar placeholder fashion; shaping engines should cope with
this situation gracefully.

<!--- The zero-width joiner is primarily used to prevent the formation of a
subjoining form from a <samp>"_Consonant_,Halant,_Consonant_"</samp> sequence. The sequence
<samp>"_Consonant_,Halant,ZWJ,_Consonant_"</samp> blocks the substitution of a
subjoined form for the second consonant. --->

<!---
A secondary usage of the zero-width joiner is to prevent the formation of
<samp>"Reph"</samp>. An initial <samp>"Ra,Halant,ZWJ"</samp> sequence should not produce a <samp>"Reph"</samp>,
where an initial <samp>"Ra,Halant"</samp> sequence without the zero-width joiner
otherwise would.
--->

The no-break space is primarily used to insert spaces between
phrases. Tibetan text does not employ inter-word spaces. Consequently,
when spaces are inserted into a text run, it is important that they be
preserved: line-breaking algorithms must not break lines after a
Tibetan space, so the no-break space character is used instead of the
traditional space. 

The no-break space may also be used to display those codepoints that
are defined as non-spacing (marks, dependent vowels (matras),
below-base consonant forms, and post-base consonant forms) in an
isolated context, as an alternative to displaying them superimposed on
the dotted-circle placeholder. 

The Wheel of Dharma symbol (`U+2638`) from the Miscellaneous Symbols
block also occurs in Tibetan texts.


## The `<tibt>` shaping model ##

Processing a run of `<tibt>` text involves three top-level stages:

1. Applying the language substitution features from <abbr>GSUB</abbr>
2. Applying all basic substitution features from <abbr>GSUB</abbr>
3. Applying all remaining positioning features from <abbr>GPOS</abbr>


As with other Brahmi-derived and Indic scripts, the basic substitution
features must be applied to the run in a specific order. The
positioning features in the final stage, however, do not have a
mandatory order.

Unlike many other Brahmi-derived and Indic scripts, shaping Tibetan
text does not require a syllable-identification stage nor any
reordering moves.

A syllable in Tibetan is usually separated from subsequent syllables
or words by a "tsheng" mark at the end of the syllable. A word-final
syllable may also be separated by a punctuation mark or a non-breaking
space.

Each syllable contains exactly one vowel sound. Valid syllables may
begin with either a consonant or an independent vowel. 

The general form of a consonant-based syllable in Tibetan begins with
an optional pre-base consonant (also called a "prefix"), followed by
the syllable's base consonant, zero or more subjoined
consonants, zero or more dependent-vowel signs (matras), an optional
post-base consonant (also called a "suffix") and zero or more syllable
modifiers or diacritical marks.

:::{figure-md}
![Tibetan syllable example](/images/tibetan/tibetan-syllable.svg "Tibetan syllable example"){.shaping-demo .inline-svg .greyscale-svg #tibetan-syllable}

Tibetan syllable example
:::

```{svg-color-toggle-button} tibetan-syllable
```

The prefix, suffix, and base consonants will all be from the
`CONSONANT` shaping class. All subjoined consonants will be from the
`CONSONANT_SUBJOINED` class.

The prefix, suffix, and base consonant are all shown in
their default form and position. Any subjoined consonants are stacked
below the base consonant. Any dependent vowel signs (matras) are
rendered as marks positioned either above the base consonant or below
the consonant stack.

> Note: A base consonant that is not accompanied by a
> dependent vowel sign (matra) carries the script's inherent vowel
> sound. This vowel sound is changed by a dependent vowel sign
> following the consonant.

> Note: Prefix and suffix consonants do not carry a vowel sound. This
> does not affect shaping, except in that Tibetan differs from many
> other scripts in not employing a "halant" or vowel-killer sign to
> designate the suppression of these sounds.

Certain consonant sequences may take on alternate shapes to provide a
better visual fit with adjoining characters (such as within a
consonant stack). However, these alternates are not considered
orthographically distinct forms.

Native words in Tibetan do not incorporate more than a single
dependent-vowel sign (matra) in a syllable. However, multiple
dependent-vowel signs may be used to represent loanwords from
Sanskrit, Chinese, and many other languages.

In addition to valid syllables, standalone sequences may occur, such
as when an isolated codepoint is shown in example text.

> Note: Foreign loanwords, when written in the Tibetan script, may
> not adhere to the syllable-formation rules described above. 


### Stage 1: Applying the language substitution features from <abbr>GSUB</abbr> ###

The language-substitution stage applies mandatory substitution features
using the rules in the font's <abbr title="Glyph Substitution table">GSUB</abbr> table. In preparation for this
stage, glyph sequences should be tagged for possible application 
of <abbr title="Glyph Substitution table">GSUB</abbr> features.

The order in which these substitutions must be performed is fixed:

	locl
	ccmp


#### Stage 1, step 1: locl ####

The `locl` feature replaces default glyphs with any language-specific
variants, based on examining the language setting of the text run.

> Note: Strictly speaking, the use of localized-form substitutions is
> not part of the shaping process, but of the localization process,
> and could take place at an earlier point while handling the text
> run. However, shaping engines are expected to complete the
> application of the `locl` feature before applying the subsequent
> <abbr title="Glyph Substitution table">GSUB</abbr> substitutions in the following steps.


#### Stage 1, step 2: ccmp ####

The `ccmp` feature allows a font to substitute mark-and-base sequences
with a pre-composed glyph including the mark and the base, or to
substitute a single glyph into an equivalent decomposed sequence of glyphs. 
 
In `<tibt>` text, this may include decompositions of multi-part
dependent vowel signs (matras).

The Tibetan Unicode block includes several multi-part matras, most
intended for use transcribing Sanskrit. However, usage is discouraged
for several of these matras, and two of the codepoints have been
officially deprecated. In their place, text authors are encouraged to
use the corresponding sequence of single-part matras.

  - `U+0F77` is deprecated and should be replaced by <samp>"`U+0FB2`,`U+0F81`"</samp>
  - `U+0F79` is deprecated and should be replaced by <samp>"`U+0FB3`,`U+0F81`"</samp>
  - `U+0F73` can be replaced by <samp>"`U+0F71`,`U+0F72`"</samp>
  - `U+0F75` can be replaced by <samp>"`U+0F71`,`U+0F74`"</samp>
  - `U+0F81` can be replaced by <samp>"`U+0F71`,`U+0F80`"</samp>
  
If present, these composition and decomposition substitutions must be
performed before applying any other <abbr title="Glyph Substitution table">GSUB</abbr> lookups, because
those lookups may be written to match only the `ccmp`-substituted
glyphs. 


:::{figure-md}
![Composition-decomposition substitution](images/tibetan/tibetan-ccmp.svg "Composition-decomposition substitution"){.shaping-demo .inline-svg .greyscale-svg #tibetan-ccmp}

Composition-decomposition substitution
:::

```{svg-color-toggle-button} tibetan-ccmp
```


### Stage 2: Applying all basic substitution features from <abbr>GSUB</abbr> ###

In this stage, the basic substitution features from the <abbr title="Glyph Substitution table">GSUB</abbr> table
are applied. The order in which these features are applied is not
canonical; they should be applied in the order in which they appear in
the <abbr title="Glyph Substitution table">GSUB</abbr> table in the font. 

	abvs
	blws
	calt
	liga


The `abvs` feature replaces above-base-consonant glyphs with special
presentation forms. This usually includes contextual variants of
above-base marks or contextually appropriate mark-and-base ligatures.

:::{figure-md}
![Application of the abvs feature](/images/tibetan/tibetan-abvs.svg "Application of the abvs feature"){.shaping-demo .inline-svg .greyscale-svg #tibetan-abvs}

Application of the abvs feature
:::

```{svg-color-toggle-button} tibetan-abvs
```

The `blws` feature replaces below-base-consonant glyphs with special
presentation forms. In Tibetan, this can include contextual ligatures
involving below-base dependent vowel marks (matras) or subjoined
consonants.

:::{figure-md}
![Application of the blws feature](/images/tibetan/tibetan-blws.svg "Application of the blws feature"){.shaping-demo .inline-svg .greyscale-svg #tibetan-blws}

Application of the blws feature
:::

```{svg-color-toggle-button} tibetan-blws
```

The `calt`  feature substitutes glyphs with contextual alternate
forms. In general, this involves replacing the default form of a
stacking glyph (such as a subjoined consonant) with an alternate that
provides a preferable connection to an adjacent glyph in the stack.

The `calt` feature performs substitutions that are not mandatory for
orthographic correctness. The substitutions made by `calt`
can be disabled by application-level user interfaces.

:::{figure-md}
![Application of the calt feature](/images/tibetan/tibetan-calt.svg "Application of the calt feature"){.shaping-demo .inline-svg .greyscale-svg #tibetan-calt}

Application of the calt feature
:::

```{svg-color-toggle-button} tibetan-calt
```


The `liga` feature substitutes standard, optional ligatures that are on
by default. Substitutions made by `liga` may be disabled by
application-level user interfaces.

:::{figure-md}
![Application of the liga feature](/images/tibetan/tibetan-liga.svg "Application of the liga feature"){.shaping-demo .inline-svg .greyscale-svg #tibetan-liga}

Application of the liga feature
:::

```{svg-color-toggle-button} tibetan-liga
```


### Stage 3: Applying remaining positioning features from <abbr>GPOS</abbr> ###

In this stage, mark positioning, kerning, and other <abbr title="Glyph Positioning table">GPOS</abbr> features are
applied. As with the preceding stage, the order in which these
features are applied is not canonical; they should be applied in the
order in which they appear in the <abbr title="Glyph Positioning table">GPOS</abbr> table in the font.

        kern
		abvm
        blwm
		mkmk

> Note: The `kern` feature is usually applied at this stage, if it is
> present in the font. However, `kern` is not mandatory for shaping
> Tibetan text and may be disabled by user preference.

The `kern` feature adjusts the horizontal positioning of
glyphs.

:::{figure-md}
![Application of the kern feature](/images/tibetan/tibetan-kern.svg "Application of the kern feature"){.shaping-demo .inline-svg .greyscale-svg #tibetan-kern}

Application of the kern feature
:::

```{svg-color-toggle-button} tibetan-kern
```

The `abvm` feature positions above-base glyphs for attachment to base
characters. In Tibetan, this includes tone markers, diacritical marks,
and above-base dependent vowels (matras).

:::{figure-md}
![Application of the abvm feature](/images/tibetan/tibetan-abvm.svg "Application of the abvm feature"){.shaping-demo .inline-svg .greyscale-svg #tibetan-abvm}

Application of the abvm feature
:::

```{svg-color-toggle-button} tibetan-abvm
```

The `blwm` feature positions below-base glyphs for attachment to base
characters. In Tibetan, this includes subjoined consonants as well as
below-base dependent vowels (matras), and diacritical marks.

:::{figure-md}
![Application of the blwm feature](/images/tibetan/tibetan-blwm.svg "Application of the blwm feature"){.shaping-demo .inline-svg .greyscale-svg #tibetan-blwm}

Application of the blwm feature
:::

```{svg-color-toggle-button} tibetan-blwm
```

The `mkmk` feature positions marks with respect to preceding marks,
providing proper positioning for sequences of marks that attach to the
same base glyph. In Tibetan, this also includes attaching marks to
subjoined consonants or dependent vowels.

:::{figure-md}
![Application of the mkmk feature](/images/tibetan/tibetan-mkmk.svg "Application of the mkmk feature"){.shaping-demo .inline-svg .greyscale-svg #tibetan-mkmk}

Application of the mkmk feature
:::

```{svg-color-toggle-button} tibetan-mkmk
```


================================================
FILE: opentype-shaping-use.md
================================================
# Universal Shaping Engine script shaping in OpenType #

This document details the default shaping procedure needed to display
text runs in scripts supported by the Universal Shaping Engine (<abbr>USE</abbr>)
model. 


**Contents**

  - [General information](#general-information)
  - [Terminology](#terminology)
  - [Glyph classification](#glyph-classification)
  - [The <abbr>USE</abbr> shaping model](#the-use-shaping-model)
      - [Stage 1: Split vowel decomposition](#stage-1-split-vowel-decomposition)
      - [Stage 2: Cluster identification](#stage-2-cluster-identification)
      - [Stage 3: Basic cluster formation](#stage-3-basic-cluster-formation)
	      - [Stage 3, step 1: Applying the basic pre-processing features from <abbr>GSUB</abbr>](#stage-3-step-1-applying-the-basic-pre-processing-features-from-gsub)
          - [Stage 3, step 2: Applying the basic reordering features from <abbr>GSUB</abbr>](#stage-3-step-2-applying-the-basic-reordering-features-from-gsub)
          - [Stage 3, step 3: Applying the basic orthographic features from <abbr>GSUB</abbr>](#stage-3-step-3-applying-the-basic-orthographic-features-from-gsub)
	  - [Stage 4: Glyph reordering](#stage-4-glyph-reordering)
	      - [Stage 4, step 1: Applying the reordering features from <abbr>GSUB</abbr>](#stage-4-step-1-applying-the-reordering-features-from-gsub)
	      - [Stage 4, step 2: Performing property-based reordering moves](#stage-4-step-2-performing-property-based-reordering-moves)
	  - [Stage 5: Final feature application](#stage-5-final-feature-application)
	      - [Stage 5, step 1: Applying the final topographic features from <abbr>GSUB</abbr>](#stage-5-step-1-applying-the-final-topographic-features-from-gsub)
	      - [Stage 5, step 2: Applying the final typographic-presentation features from <abbr>GSUB</abbr>](#stage-5-step-2-applying-the-final-typographic-presentation-features-from-gsub)
	      - [Stage 5, step 3: Applying the final positioning features from <abbr>GPOS</abbr>](#stage-5-step-3-applying-the-final-positioning-features-from-gpos)
  
  
## General information ##

The Universal Shaping Engine (<abbr>USE</abbr>) model is used for complex scripts
that are not already supported by a dedicated OpenType shaping
model. 

"Complex" scripts, in OpenType shaping terminology, are scripts that
require some combination of glyph reordering, contextual joining
behavior, or the substitution of context-dependent forms for
linguistic or orthographic correctness.

The scripts covered by this model include Javanese, Balinese,
Buginese, Batak, Chakma, Lepcha, Modi, Phags-pa, Tagalog, Siddham,
Sundanese, Tai Le, Tai Tham, Tai Viet, and many others.

In many ways, the <abbr title="Universal Shaping Engine">USE</abbr> model is a generalization of the
[Indic2](opentype-shaping-indic-general.md) OpenType 
shaping model, with adjustments made to correct shortfalls encountered
when using the Indic2 shaping model, as well as additional changes
designed to broaden the number of scripts that can be supported. For
example, the <abbr title="Universal Shaping Engine">USE</abbr> model includes a step applying contextual
joining-behavior features as is performed in the Arabic-like shaping
model. 

> Note: The term _Indic3_ is sometimes used in comparison to Indic2
> (or the corresponding increment of the script tags for existing
> OpenType shaping models, such as `<dev3>` in comparison to
> `<dev2>`).
>
> This terminology either indicates that a shaping engine has
> implemented support for one or more of the Indic2 scripts within the
> <abbr title="Universal Shaping Engine">USE</abbr> model or it is merely a conversational convention to discuss
> support for the Indic2-model scripts in <abbr title="Universal Shaping Engine">USE</abbr>.
>
> At the present time, there is no formal definition for an Indic3
> model, and there are not registered OpenType script tags for
> `<dev3>` or any other third generation of the scripts handled by the
> Indic2 model.

<abbr title="Universal Shaping Engine">USE</abbr> was introduced after the release of version 8.0 of the Unicode
specification. The intent is for <abbr title="Universal Shaping Engine">USE</abbr> to support complex scripts added
to future Unicode releases in addition to those already supported.


## Terminology ##

The <abbr title="Universal Shaping Engine">USE</abbr> shaping model uses a standard set of terms for the features of
supported scripts. These terms are similar to the standard terms used
for Indic scripts, but with several key distinctions.

A **cluster** is the fundamental unit used in shaping; it consists of
a sequence of Unicode codepoints that will be processed as an atomic
unit. An individual syllable typically corresponds to a single
cluster, but any particular cluster might involve multiple syllables
or a sequence that does not match the syllable-formation rules of the
script.

A **base** character in the <abbr title="Universal Shaping Engine">USE</abbr> model may be a consonant, an
independent vowel, a number, or any of several additional character
classes.

A cluster's base consonant is generally rendered in its full form
(although it may form ligatures), while other consonants in the
cluster frequently take on secondary forms. Different <abbr title="Glyph Substitution table">GSUB</abbr>
substitutions may apply to a script's **pre-base** and **post-base**
consonants. Some of these substitutions create **above-base** or
**below-base** forms. The **Reph** form of the consonant "Ra" is an
example.

A **vowel** character in the <abbr title="Universal Shaping Engine">USE</abbr> model is a dependent vowel or any of
several additional marks with similar behavior. This class is similar
to the "matra" class used in Indic shaping.

**Halant** is the standard term for a "vowel-killer" sign.


## Glyph classification ##

The <abbr title="Universal Shaping Engine">USE</abbr> shaping model classifies characters based on a specific set of
properties defined for each codepoint in the Unicode Character
Database (<abbr>UCD</abbr>), augmented with a small set of pre-defined property
overrides.

The <abbr title="Unicode Character Database">UCD</abbr> properties used for <abbr title="Universal Shaping Engine">USE</abbr> character classification are:

	Unicode General Category (UGC)
	Unicode Indic Syllabic Category (UISC)
	Unicode Indic Positional Category (UIPC)

In addition, the Unicode Character Decomposition Mapping (<abbr>UCDM</abbr>) is used for
all split vowels.


### <abbr>USE</abbr> overrides ###

Although, in general, the <abbr title="Universal Shaping Engine">USE</abbr> shaping model relies on the <abbr title="Unicode General Category">UGC</abbr>, <abbr title="Unicode Indic Syllabic Category">UISC</abbr>,
and <abbr title="Unicode Indic Positional Category">UIPC</abbr> properties, the <abbr title="Universal Shaping Engine">USE</abbr> model makes a small set of standardized
overrides to the properties of certain specific characters.

The following table lists the complete set of <abbr title="Universal Shaping Engine">USE</abbr> overrides. Shaping
engines should implement the override properties in order to guarantee
correct results.

> Note: A _null_ in the following table indicates that the
> corresponding Unicode property is not overridden for the codepoint
> featured in that row. 


:::{table} Property overrides for <abbr>USE</abbr> shaping


| Codepoint | Unicode UISC               | USE override UISC | Unicode UIPC | USE override UIPC | Glyph                                   |
|:----------|:---------------------------|:------------------|:-------------|:------------------|:----------------------------------------|
| `U+AA29`  | Vowel_Dependent            | Bindu             | _null_       | _null_            | &#xAA29; Cham Vowel Sign Aa             |
| `U+0F71`  | Vowel_Dependent            | Nukta             | _null_       | _null_            | &#x0F71; Tibetan Vowel Sign Aa          |
| `U+A982`  | Consonant_Succeeding_Repha | Tone_Mark         | _null_       | _null_            | &#xA982; Javanese Sign Layar            |
| `U+0F7F`  | Visarga                    | Consonant_Dead    | _null_       | _null_            | &#x0F7F; Tibetan Sign Rnam Bcad         |
| `U+11134` | Pure_Killer                | Gemination_Mark   | _null_       | _null_            | &#x11134; Chakma Maayyaa                |
| `U+0F74`  | _null_                     | _null_            | Bottom       | Top               | &#x0F74; Tibetan Vowel Sign U           |
| `U+AA35`  | _null_                     | _null_            | Bottom       | Top               | &#xAA35; Cham Consonant Sign            |
| `U+1A18`  | _null_                     | _null_            | Bottom       | Top               | &#x1A18; Buginese Vowel Sign U          |
| `U+0F72`  | _null_                     | _null_            | Top          | Bottom            | &#x0F72; Tibetan Vowel Sign I           |
| `U+0F7A`  | _null_                     | _null_            | Top          | Bottom            | &#x0F7A; Tibetan Vowel Sign E           |
| `U+0F7B`  | _null_                     | _null_            | Top          | Bottom            | &#x0F7B; Tibetan Vowel Sign Ee          |
| `U+0F7C`  | _null_                     | _null_            | Top          | Bottom            | &#x0F7C; Tibetan Vowel Sign O           |
| `U+0F7D`  | _null_                     | _null_            | Top          | Bottom            | &#x0F7D; Tibetan Vowel Sign Oo          |
| `U+0F80`  | _null_                     | _null_            | Top          | Bottom            | &#x0F80; Tibetan Vowel Sign Reversed Ii |
| `U+11127` | _null_                     | _null_            | Top          | Bottom            | &#x11127; Chakma Vowel Sign A           |
| `U+11128` | _null_                     | _null_            | Top          | Bottom            | &#x11128; Chakma Vowel Sign I           |
| `U+11129` | _null_                     | _null_            | Top          | Bottom            | &#x11129; Chakma Vowel Sign Ii          |
| `U+1112D` | _null_                     | _null_            | Top          | Bottom            | &#x1112d; Chakma Vowel Sign Ai          |
| `U+11130` | _null_                     | _null_            | Top          | Bottom            | &#x11130; Chakma Vowel Sign Oi          |
| | | | | | |
:::


### <abbr>USE</abbr> classification table ###

The following table lists the classes utilized in the <abbr title="Universal Shaping Engine">USE</abbr> shaping
model, along with a definition for each class. The class definitions
refer to the <abbr title="Unicode General Category">UGC</abbr>, <abbr title="Unicode Indic Syllabic Category">UISC</abbr>, and <abbr title="Unicode Indic Positional Category">UIPC</abbr> categories in the Unicode standard,
or to specific Unicode codepoints.

The symbols given in the "Symbol" column for each class may be used to
express cluster-matching rules or other algorithms.

Vowels and modifiers may be further subclassified as described in the
[<abbr title="Universal Shaping Engine">USE</abbr> subclasses table](#use-subclasses-table) below.


:::{table} Class definitions for <abbr>USE</abbr> shaping

| USE classification        | Symbol | Definition                                                                                                    |
|:--------------------------|:-------|:--------------------------------------------------------------------------------------------------------------|
| BASE                      | `B`    | UISC = Number _or_ (UISC = Avagraha & UGC = Lo) _or_ (UISC = Bindu & UGC = Lo) _or_ UISC = Consonant _or_ (UISC = Consonant_Final & UGC = Lo) _or_ UISC = Consonant_Head_Letter _or_ (UISC = Consonant_Medial & UGC = Lo) _or_ (UISC = Consonant_Subjoined & UGC = Lo) _or_ UISC = Tone_Letter _or_ (UISC = Vowel & UGC = Lo) _or_ UISC = Vowel_Independent _or_ (UISC = Vowel_Dependent & UGC = Lo) |
| Combining grapheme joiner | `CGJ`  | `U+034F`                                                                                                      |
| CONS_MOD                  | `CM`   | UISC = Nukta _or_ Gemination_Mark _or_ Consonant_Killer                                                       |
| CONS_WITH_STACKER         | `CS`   | UISC = Consonant_With_Stacker                                                                                 |
| CONS_FINAL                | `F`    | (UISC = Consonant_Final & UGC != Lo) _or_ UISC = Consonant_Succeeding_Repha                                   |
| CONS_FINAL_MOD            | `FM`   | UISC = Syllable_Modifier                                                                                      |
| BASE_OTHER                | `GB`   | UISC = Consonant_Placeholder _or_ `U+2015`, `U+2022`, `U+25FB`–`U+25FE`                                       |
| HALANT                    | `H`    | UISC = Virama _or_ Invisible_Stacker                                                                          |
| HALANT_NUM                | `HN`   | UISC = Number_Joiner                                                                                          |
| BASE_IND                  | `IND`  | (UISC = Consonant_Dead _or_ Modifying_Letter) _or_ (UGC = Po != `U+104E`, `U+2022`) _or_ `U+002D`             |
| CONS_MED                  | `M`    | UISC = Consonant_Medial & UGC != Lo                                                                           |
| BASE_NUM                  | `N`    | UISC = Brahmi_Joining_Number                                                                                  |
| OTHER                     | `O`    | Any other SCRIPT_COMMON characters; White space characters, UGC=Zs                                            |
| REPHA                     | `R`    | UISC = Consonant_Preceding_Repha _or_ Consonant_Prefixed                                                      |
| Reserved character        | `Rsv`  | Any character not currently assigned or otherwise reserved in Unicode                                         |
| SYM                       | `S`    | UGC = Sc _or_ (UGC = So & != `U+25CC`)                                                                        |
| SYM_MOD                   | `SM`   | `U+1B6B`, `U+1B6C`, `U+1B6D`, `U+1B6E`, `U+1B6F`, `U+1B70`, `U+1B71`, `U+1B72`, `U+1B73`                      |
| CONS_SUB                  | `SUB`  | UISC = Consonant_Subjoined & UGC != Lo                                                                        |
| VOWEL                     | `V`    | (UISC = Vowel & UGC != Lo) _or_ (UISC = Vowel_Dependent & UGC != Lo) _or_ UISC = Pure_Killer                  |
| VOWEL_MOD                 | `VM`   | (UISC = Bindu & UGC != Lo) _or_ UISC = Tone_Mark _or_ Cantillation_Mark _or_ Register_Shifter _or_ Visarga    |
| VARIATION_SELECTOR        | `VS`   | `U+FE00`‒`U+FE0F`                                                                                             |
| Word joiner               | `WJ`   | `U+2060`                                                                                                      |
| Zero width joiner         | `ZWJ`  | UISC = Joiner                                                                                                 |
| Zero width nonjoiner      | `ZWNJ` | UISC = Non_Joiner                                                                                             |
| | | |
:::


### <abbr>USE</abbr> subclasses table ###

Vowels and modifiers may be further subclassified based on their
position relative to base characters. The subclasses incorporated in
the <abbr title="Universal Shaping Engine">USE</abbr> shaping model are defined in the table below.

Split-vowel subclasses are not assigned a symbol because each split
vowel must be decomposed into its components.


:::{table} Subclasses for <abbr>USE</abbr> shaping

| USE classification     | Symbol  | Definition                                                              |
|:-----------------------|:--------|:------------------------------------------------------------------------|
| CONS_MOD_ABOVE         | `CMAbv` | USE=CM & UIPC = Top                                                     |
| CONS_MOD_BELOW         | `CMBlw` | USE=CM & UIPC = Bottom                                                  |
| CONS_FINAL_ABOVE       | `FAbv`  | USE=F & UIPC = Top                                                      |
| CONS_FINAL_BELOW       | `FBlw`  | USE=F & UIPC = Bottom                                                   |
| CONS_FINAL_POST        | `FPst`  | USE=F & UIPC = Right                                                    |
| CONS_MED_ABOVE         | `MAbv`  | USE=M & UIPC = Top                                                      |
| CONS_MED_BELOW         | `MBlw`  | USE=M & UIPC = Bottom                                                   |
| CONS_MED_PRE           | `MPre`  | USE=M & UIPC = Left                                                     |
| CONS_MED_POST          | `MPst`  | USE=M & UIPC = Right                                                    |
| SYM_MOD_ABOVE          | `SMAbv` | `U+1B6B`,`U+1B6D`,`U+1B6E`,`U+1B6F`,`U+1B70`,`U+1B71`,`U+1B72`,`U+1B73` |
| SYM_MOD_BELOW          | `SMBlw` | `U+1B6C`                                                                |
| VOWEL_ABOVE            | `VAbv`  | USE=V & UIPC = Top                                                      |
| VOWEL_ABOVE_BELOW      | _null_  | USE=V & UIPC = Top_And_Bottom                                           |
| VOWEL_ABOVE_BELOW_POST | _null_  | USE=V & UIPC = Top_And_Bottom_And_Right                                 |
| VOWEL_ABOVE_POST       | _null_  | USE=V & UIPC = Top_And_Right                                            |
| VOWEL_BELOW            | `VBlw`  | USE=V & UIPC = Bottom _or_ Overstruck                                   |
| VOWEL_BELOW_POST       | _null_  | USE=V & UIPC = Bottom_And_Right                                         |
| VOWEL_PRE              | `VPre`  | USE=V & UIPC = Left                                                     |
| VOWEL_PRE_ABOVE        | _null_  | USE=V & UIPC = Top_And_Left                                             |
| VOWEL_PRE_ABOVE_POST   | _null_  | USE=V & UIPC = Top_And_Left_And_Right                                   |
| VOWEL_PRE_POST         | _null_  | USE=V & UIPC = Left_And_Right                                           |
| VOWEL_POST             | `VPst`  | USE=V & UIPC = Right                                                    |
| VOWEL_MOD_ABOVE        | `VMAbv` | USE=VM & UIPC = Top                                                     |
| VOWEL_MOD_BELOW        | `VMBlw` | USE=VM & UIPC = Bottom _or_ Overstruck                                  |
| VOWEL_MOD_PRE          | `VMPre` | USE=VM & UIPC = Left                                                    |
| VOWEL_MOD_POST         | `VMPst` | USE=VM & UIPC = Right                                                   |
| | | |
:::


## The <abbr>USE</abbr> shaping model ##

The <abbr title="Universal Shaping Engine">USE</abbr> shaping model consists of five top-level stages.

1. Decomposition of split vowels
2. Identifying clusters
3. Applying basic cluster formation features
4. Glyph reordering
5. Applying final features

All scripts supported by the <abbr title="Universal Shaping Engine">USE</abbr> model will be processed in this same
pattern. However, not every script requires that actions be taken in
every operation.

The first two stages take place for the entire text run being
shaped. Subsequently, stages 3, 4, and 5 are each conducted in order on a
per-cluster basis, until every cluster in the run has been processed.

The substitution features from <abbr title="Glyph Substitution table">GSUB</abbr> and the positioning features from
<abbr title="Glyph Positioning table">GPOS</abbr> are applied to the text run in predefined features groups. Which
features are applied at each step in the process are described below.


### Stage 1: Split vowel decomposition ###

Most split vowels have a canonical decomposition defined in the
Unicode specification. The <abbr title="Universal Shaping Engine">USE</abbr> shaping model requires that all such
split vowels be decomposed into their components before any further
processing is performed. 

For these vowels, the canonical decomposition must be performed prior
to cluster identification. Because this decomposition is a
character-level operation, the shaping engine may choose to perform it
earlier, such as during an initial Unicode-normalization stage. 

For any split vowels that do not have a canonical decomposition, the
active font should provide a decomposition via the `ccmp` substitution
feature in <abbr title="Glyph Substitution table">GSUB</abbr>. 

The cluster-identification rules detailed in stage two are based on
the canonical decompositions, and do not take non-canonical <abbr title="Glyph Substitution table">GSUB</abbr>
decomposition into account.


### Stage 2. Cluster identification ###

A cluster in the <abbr title="Universal Shaping Engine">USE</abbr> model is defined according to a generalized,
visual pattern that is common to all supported scripts. Consequently,
the cluster-identification expressions used do not enforce linguistic
or orthographic correctness.

An independent cluster will consist of a standalone codepoint that
does not require further shaping, optionally followed by a variation
selector. Independent clusters will match the expression:
```markdown
(IND | O | Rsv | WJ) VS?
```

A standard cluster features a required base character and may include
many optional elements. Standard clusters will match the expression:
```markdown
( R | CS )? ( B | GB ) VS? CMAbv* CMBlw* ( ((H B) | SUB) VS? CMAbv* CMBlw* )* MPre? MAbv? MBlw? MPst? VPre* VAbv* VBlw* VPst* VMPre* VMAbv* VMBlw* VMPst* FAbv* FBlw* FPst* FM?
```

A halant-terminated cluster occurs when any character other than a `B`
follows a `H`. Halant-terminated clusters will match the expression:
```markdown
( R | CS )? (B | GB) VS? CMAbv* CMBlw* ( ((H B) | SUB) VS? CMAbv* CMBlw*)* H
```

A number-joiner–terminated cluster will match the expression:
```markdown
N VS? (HN N VS?)* HN
```

A numeral cluster will match the expression:
```markdown
N VS? (HN N VS?)*
```

A symbol cluster will match the expression:
```markdown
(S | GB) VS? SMAbv* SMBlw*
```

> Note: Practically speaking, shaping engines are highly unlikely to
> encounter more than a small number of sequential vowel or modifiers
> in any real-world clusters. Thus, implementations may choose to
> limit occurrences by limiting some of the above expressions to a
> finite length, such as `VPre{0,4}` rather than `VPre*`.

The expressions above use state-machine syntax from the Ragel
state-machine compiler. The operators represent:

```markdown
a* = zero or more copies of a
b+ = one or more copies of b
c? = optional instance of c
d{n} = exactly n copies of d
d{,n} = zero to n copies of d
d{n,} = n or more copies of d
d{n,m} = n to m copies of d
!e = not e
^f = character-level not f
g.h = concatenation of g and h
i|j = i or j
( ) = grouping of expression elements
```

Sequences not matching any of the above expressions should be regarded
as broken. The shaping engine may make a best-effort attempt
to shape the broken sequence, but making guarantees about the
correctness or appearance of the final result is out of scope for this
document.

After the clusters have been identified, each of the subsequent 
shaping stages occurs on a per-cluster basis.


### Stage 3: Basic cluster formation ###

The basic cluster formation stage is used to apply fundamental
substitutions necessary for script and language correctness.

#### Stage 3, step 1: Applying the basic pre-processing features from <abbr title="Glyph Substitution table">GSUB</abbr> ####

The basic pre-processing step applies mandatory substitution features
using the rules in the font's <abbr title="Glyph Substitution table">GSUB</abbr> table. In preparation for this 
stage, glyph sequences should be tagged for possible application 
of <abbr title="Glyph Substitution table">GSUB</abbr> features. 

The order in which these features are applied is not canonical; they
should be applied in the order in which they appear in the <abbr title="Glyph Substitution table">GSUB</abbr> table
in the font.

	locl
	ccmp
	nukt
	akhn

The `locl` feature replaces default glyphs with any language-specific
variants, based on examining the language setting of the text run.

> Note: Strictly speaking, the use of localized-form substitutions is
> not part of the shaping process, but of the localization process,
> and could take place at an earlier point while handling the text
> run. However, shaping engines are expected to complete the
> application of the `locl` feature before applying the subsequent
> <abbr title="Glyph Substitution table">GSUB</abbr> substitutions in the following steps.

The `ccmp` feature allows a font to substitute mark-and-base sequences
with a pre-composed glyph including the mark and the base, or to
substitute a single glyph into an equivalent decomposed sequence of
glyphs. 

If present, these composition and decomposition substitutions must be
performed before applying any other <abbr title="Glyph Substitution table">GSUB</abbr> lookups, because
those lookups may be written to match only the `ccmp`-substituted
glyphs.

> Note: The `ccmp` feature may perform decompositions of split vowels
> that do not have a canonical decomposition defined in Unicode. Split
> vowels that do have a canonical decomposition were decomposed in
> stage one.

The `nukt` feature replaces <samp>"_Consonant_,Nukta"</samp> sequences with a
precomposed nukta-variant of the consonant glyph. 

The `akhn` feature replaces specific sequences with required
ligatures. These sequences can occur anywhere in a cluster. 
Akhand characters have orthographic status equivalent to full
consonants in some languages, and fonts may have later substitution
rules designed to match them in subsequences. Therefore, this
feature must be applied before all other many-to-one substitutions.


#### Stage 3, step 2: Applying the basic reordering features from <abbr title="Glyph Substitution table">GSUB</abbr> ####

The basic reordering step applies mandatory substitution features from
<abbr title="Glyph Substitution table">GSUB</abbr> that affect reordering elements.

For these features, the glyph substitutions themselves are applied at this
step. However, the actual reordering of the glyphs does not take place
until stage 4, step 1.

The order in which these substitutions must be performed is fixed for
all <abbr title="Universal Shaping Engine">USE</abbr> scripts:

	rphf
	pref

##### Stage 3, step 2.1: rphf #####

The `rphf` feature replaces cluster-initial <samp>"Ra,Halant"</samp> sequences with
the <samp>"Reph"</samp> glyph.

> Note: although the glyph substitution is performed in this step, the
> corresponding glyph reordering move is not performed until a later
> stage. 

##### Stage 3, step 2.2: pref #####

The `pref` feature replaces pre-base-consonant glyphs with any special
forms. 

> Note: although the glyph substitution is performed in this step, the
> corresponding glyph reordering move is not performed until a later
> stage. 


#### Stage 3, step 3: Applying the basic orthographic features from <abbr title="Glyph Substitution table">GSUB</abbr> ####

The basic orthographic step applies substitution features using the
rules in the font's <abbr title="Glyph Substitution table">GSUB</abbr> table. In preparation for this stage, glyph
sequences should be tagged for possible application of <abbr title="Glyph Substitution table">GSUB</abbr> features. 

The order in which these features are applied is not canonical; they
should be applied in the order in which they appear in the <abbr title="Glyph Substitution table">GSUB</abbr> table
in the font.

	rkrf
	abvf
	blwf
	half
	pstf
	vatu
	cjct

The `rkrf` feature replaces <samp>"_Consonant_,Halant,Ra"</samp> sequences with the
"Rakaar"-ligature form of the consonant glyph.

The `abvf` feature replaces above-base-consonant glyphs with any
special forms. 

The `blwf` feature replaces below-base-consonant glyphs with any
special forms.

The `half` feature replaces <samp>"_Consonant_,Halant"</samp> sequences before the
base consonant with "half forms" of the consonant glyphs.

The `pstf` feature replaces post-base-consonant glyphs with any
special forms.

The `vatu` feature replaces certain sequences with "Vattu variant"
forms. 

The `cjct` feature replaces sequences of adjacent consonants with
conjunct ligatures. These sequences must match <samp>"_Consonant_,Halant,_Consonant_"</samp>.


### Stage 4: Glyph reordering ###

The glyph-reordering stage moves dependent vowels, diacritics, and
other mark glyphs in relation to the base consonant. All reordering is
performed in this stage, which is broken into two distinct steps:

1. Applying the reordering features from <abbr title="Glyph Substitution table">GSUB</abbr>
2. Performing property-based reordering moves


#### Stage 4, step 1: Applying the reordering features from <abbr title="Glyph Substitution table">GSUB</abbr> ####

In this step, the reordering moves corresponding to the
glyph-reordering features in <abbr title="Glyph Substitution table">GSUB</abbr> are performed.

Any glyph substitutions that apply to characters involved in these
reordering moves were performed in stage 3, step 2. Therefore, this
step only requires moving glyphs to their final positions.

The order in which these substitutions must be performed is fixed for
all <abbr title="Universal Shaping Engine">USE</abbr> scripts:

	rphf
	pref

##### Stage 4, step 1.1: rphf #####

In stage 3, step 2, the `rphf` feature replaced cluster-initial
<samp>"Ra,Halant"</samp> sequences with the <samp>"Reph"</samp> glyph. The <samp>"Reph"</samp> glyph is now
reordered to its final position. The algorithm to determine the final
position of the <samp>"Reph"</samp> glyph is:

  - Move the <samp>"Reph"</samp> right one position at a time.
    - If the character immediately following the new position is an
      explicit <samp>"Halant"</samp>, stop.
    - If the character immediately before the new position is a full
      base (`B`) character, stop.
    - If the end of the cluster is reached, stop.

##### Stage 4, step 1.2: pref #####

In stage 3, step 2, the `pref` feature replaced pre-base-consonant
glyphs with special forms. The pre-base-consonant glyph is now
reordered to its final position. The algorithm to determine the final
position of the pre-base-reordering consonant is:

  - Move the pre-base-reordering consonant left one position at a
    time.
    - If the pre-base reordering consonant is to the left of the
	  first spacing glyph after an explicit <samp>"Halant"</samp>, stop.
    - When the pre-base reordering consonant is to the left of the
	  first spacing glyph in the cluster, stop. 
	- If the beginning of the cluster is reached, stop.
	
> Note: Each cluster may have only one pre-base-reordering consonant
> glyph. 
>
> Note: scripts that use pre-base medial consonants may also make use
> of the `pref` feature reordering.


#### Stage 4, step 2: Performing property-based reordering moves ####

In this step, any characters that match one of the <abbr title="Universal Shaping Engine">USE</abbr> reordering
classifications should be reordered into their final position. 

> Note: this classification-based reordering step ensures that
> reordering characters not addressed by the active font's <abbr title="Glyph Substitution table">GSUB</abbr>
> features are ordered correctly.

The character classes reordered in this step are:

```markdown
`R`		= `REPHA`
`VPre`		= `VOWEL_PRE`
`VMPre`		= `VOWEL_MOD_PRE`
```

Pre-base `REPHA` glyphs that occur before a full base are reordered
using the <samp>"Reph"</samp> reordering algorithm described in [Stage 4, step 1.1](#stage-4-step-11-rphf),
just as if the `rphf` feature had been applied to the glyph.

Pre-base `VOWEL_PRE` vowel glyphs, including both stand-alone `VOWEL_PRE` vowels
and `VOWEL_PRE` components of split vowels, are reordered to
   - before the base glyph
   - before any other pre-base glyphs that were reordered in earlier steps
   
Pre-base `VOWEL_MOD_PRE` vowel-modifier glyphs are reordered to
   - before the base glyph
   - before any pre-base `VOWEL_PRE` vowel glyphs
   - before any other pre-base glyphs that were reordered in earlier steps


### Stage 5: Final feature application ###

The final stage involves applying topographic joining features for
connected scripts, applying typographic-presentation features from
<abbr title="Glyph Substitution table">GSUB</abbr>, and applying positioning features from <abbr title="Glyph Positioning table">GPOS</abbr>.


#### Stage 5, step 1: Applying the final topographic features from <abbr title="Glyph Substitution table">GSUB</abbr> ####

For connected scripts, this step applies the substitutions to select
the correct topographic form for each glyph, based on its position in
the syllable.

Whether or not each codepoint joins on the left or the right side is
determined by the `Unicode Joining Type` (<abbr>UJT</abbr>) property defined in <abbr title="Unicode Character Database">UCD</abbr>
for each codepoint.

> Note: <abbr title="Universal Shaping Engine">USE</abbr> does not support positional typographic features for any
> non-connected scripts.
	
	isol
	init
	medi
	fina

#### Stage 5, step 2: Applying the final typographic-presentation features from <abbr title="Glyph Substitution table">GSUB</abbr> ####

The final typographic-presentation step applies mandatory substitution
features using the rules in the font's <abbr title="Glyph Substitution table">GSUB</abbr> table. In preparation for this
stage, glyph sequences should be tagged for possible application 
of <abbr title="Glyph Substitution table">GSUB</abbr> features.

The order in which these features are applied is not canonical; they
should be applied in the order in which they appear in the <abbr title="Glyph Substitution table">GSUB</abbr> table
in the font.

	abvs
	blws
	calt
	clig
	haln
	liga
	pres
	psts
	rclt
	rlig
	vert
	vrt2
	
The `abvs` feature replaces above-base-consonant glyphs with special
presentation forms. This usually includes contextual variants of
above-base marks or contextually appropriate mark-and-base ligatures.

The `blws` feature replaces below-base-consonant glyphs with special
presentation forms. This usually includes replacing consonants that
are adjacent to special consonant forms with contextual
ligatures.

The `calt` feature substitutes glyphs with contextual alternate
forms.  In contrast to `rclt`, the `calt` feature performs
substitutions that are not mandatory for orthographic
correctness. However, unlike `rclt`, the substitutions made by `calt`
can be disabled by application-level user interfaces.

The `clig` feature substitutes optional ligatures that are on by
default, but which are activated only in certain
contexts. Substitutions made by clig may be disabled by
application-level user interfaces. 

The `haln` feature replaces syllable-final <samp>"_Consonant_,Halant"</samp> pairs with
special presentation forms. This can include stylistic variants of the
consonant where placing the <samp>"Halant"</samp> mark on its own is
typographically problematic. 

The `liga` feature substitutes standard, optional ligatures that are on
by default. Substitutions made by `liga` may be disabled by
application-level user interfaces.

The `pres` feature replaces pre-base-consonant glyphs with special
presentations forms. This can include consonant conjuncts, half-form
consonants, and stylistic variants of left-side dependent vowels
(matras). 

The `psts` feature replaces post-base-consonant glyphs with special
presentation forms. This usually includes replacing right-side
dependent vowels (matras) with stylistic variants or replacing
post-base-consonant/matra pairs with contextual ligatures. 

The `rclt` feature substitutes glyphs with contextual alternate
forms. The `rclt` feature should be used to perform such substitutions
that are required by the orthography of the active script and
language. Substitutions made by `rclt` cannot be disabled by 
application-level user interfaces.

The `rlig` feature substitutes glyph sequences with mandatory
ligatures. Substitutions made by `rlig` cannot be disabled by
application-level user interfaces.


#### Stage 5, step 3: Applying the final positioning features from <abbr>GPOS</abbr> ####

	curs
	dist
	kern
	mark
	abvm
	blwm
	mkmk
	
The `curs` feature perform cursive positioning in connected scripts or
cursive styles. Each cursive glyph has an entry point and exit point;
the `curs` feature positions glyphs so that the entry point of the
current glyph meets the exit point of the preceding glyph.

The `dist` feature adjusts the horizontal positioning of
glyphs. Unlike `kern`, adjustments made with `dist` do not require the
application or the user to enable any software _kerning_ features, if
such features are optional. 

The `kern` adjusts glyph spacing between pairs of adjacent glyphs.

The `mark` feature positions marks with respect to base glyphs.

The `abvm` feature positions above-base marks for attachment to base
characters. This includes above-base dependent vowels (matras),
diacritical marks, syllable modifiers, and above-base consonant forms. 

The `blwm` feature positions below-base marks for attachment to base
characters. This includes below-base dependent vowels (matras),
diacritical marks, syllable modifiers, and below-base consonant forms.

The `mkmk` feature positions marks with respect to preceding marks,
providing proper positioning for sequences of marks that attach to the
same base glyph.


================================================
FILE: opentype-shaping-vedic-extensions.md
================================================
# Vedic Extensions in OpenType #

This document outlines the shaping information needed to display
characters from the Unicode Vedic Extensions block, which may be used
within text runs in many Indic scripts.

**Contents**

  - [General information](#general-information)
  - [Terminology](#terminology)
  - [Glyph classification](#glyph-classification)
      - [Vedic Extensions character table](#vedic-extensions-character-table)
  - [Shaping information](#shaping-information)


## General information ##

The Vedic Extensions block encodes letters and marks that are used in
a large body of ancient literature written in the Vedic Sanskrit
language.

Primarily an oral language in the time period when the key literature
originated, Vedic Sanskrit has no native script. Therefore, texts may
be typeset in any one of the Indic scripts, using the Vedic Extensions
to supplement the main script's character set.

## Terminology ##

Individual Vedic Extension characters may be named by a combination of
the Vedic text in which the mark is used, the regional or manuscript
tradition involved, or a simple visual or phonetic description of the
character. Some commonly used general categories are worth noting.

**Udatta** is the term for a high tone on a vowel.

**Anudatta** is the term for a low tone on a vowel.

**Svarita** is the term for a falling or mixed tone on a vowel.

**Anusvara** is the term for a nasalization sound that precedes a consonant.

**Visarga** is the term for a soft breathing sound that precedes a vowel.

> Note: In modern Indic languages, the terms _anusvara_ and _visarga_
> often refer to diacritical marks that have the above effects on
> pronunciation. In the Vedic Sanskrit language, however, they are
> generally considered independent letters.

## Glyph classification ##

For most codepoints, the `General Category` property defined in the Unicode
standard is correct, but it is not sufficient to fully capture the
expected shaping behavior (such as how the character is treated during
glyph reordering). Therefore, they must additionally be classified by
how they are treated when shaping a run of text.


### Vedic Extensions character table ###


Vedic Extension glyphs should be classified as in the following
table. Codepoints with no assigned meaning are
marked as _unassigned_ in the _Unicode category_ column. 

Assigned codepoints marked with a _null_ in the _Shaping class_
column evoke no special behavior from the shaping engine. 

The _Mark-placement subclass_ column indicates mark-placement
positioning. Assigned codepoints marked with a
_null_ in this column evoke no special mark-placement behavior. Marks
tagged with [Mn] in the _Unicode category_ column are categorized as
non-spacing; marks tagged with [Mc] are categorized as
spacing-combining.

Some codepoints in the following table use a _Shaping class_ that
differs from the codepoint's Unicode _General Category_. The _Shaping
class_ takes precedence during OpenType shaping, as it captures more
specific behavior.


:::{table} Vedic Extensions character table

| Codepoint | Unicode category | Shaping class     | Mark-placement subclass    | Glyph                        |
|:----------|:-----------------|:------------------|:---------------------------|:-----------------------------|
|`U+1CD0`   | Mark [Mn]        | CANTILLATION      | TOP_POSITION               | &#x1CD0; Tone Karshana       |
|`U+1CD1`   | Mark [Mn]        | CANTILLATION      | TOP_POSITION               | &#x1CD1; Tone Shara          |
|`U+1CD2`   | Mark [Mn]        | CANTILLATION      | TOP_POSITION               | &#x1CD2; Tone Prenkha        |
|`U+1CD3`   | Punctuation      | _null_            | _null_                     | &#x1CD3; Sign Nihshvasa      |
|`U+1CD4`   | Mark [Mn]        | CANTILLATION      | OVERSTRUCK                 | &#x1CD4; Tone Midline Svarita |
|`U+1CD5`   | Mark [Mn]        | CANTILLATION      | BOTTOM_POSITION            | &#x1CD5; Tone Aggravated Independent Svarita |
|`U+1CD6`   | Mark [Mn]        | CANTILLATION      | BOTTOM_POSITION            | &#x1CD6; Tone Independent Svarita |
|`U+1CD7`   | Mark [Mn]        | CANTILLATION      | BOTTOM_POSITION            | &#x1CD7; Tone Kathaka Independent Svarita |
|`U+1CD8`   | Mark [Mn]        | CANTILLATION      | BOTTOM_POSITION            | &#x1CD8; Tone Candra Below   |
|`U+1CD9`   | Mark [Mn]        | CANTILLATION      | BOTTOM_POSITION            | &#x1CD9; Tone Kathaka Independent Svarita Schroeder |
|`U+1CDA`   | Mark [Mn]        | CANTILLATION      | TOP_POSITION               | &#x1CDA; Tone Double Svarita |
|`U+1CDB`   | Mark [Mn]        | CANTILLATION      | TOP_POSITION               | &#x1CDB; Tone Triple Svarita |
|`U+1CDC`   | Mark [Mn]        | CANTILLATION      | BOTTOM_POSITION            | &#x1CDC; Tone Kathaka Anudatta |
|`U+1CDD`   | Mark [Mn]        | CANTILLATION      | BOTTOM_POSITION            | &#x1CDD; Tone Dot Below      |
|`U+1CDE`   | Mark [Mn]        | CANTILLATION      | BOTTOM_POSITION            | &#x1CDE; Tone Two Dots Below |
|`U+1CDF`   | Mark [Mn]        | CANTILLATION      | BOTTOM_POSITION            | &#x1CDF; Tone Three Dots Below |
| | | | |																		
|`U+1CE0`   | Mark [Mn]        | CANTILLATION      | TOP_POSITION               | &#x1CE0; Tone Rigvedic Kashmiri Independent Svarita |
|`U+1CE1`   | Mark [Mc]        | CANTILLATION      | RIGHT_POSITION             | &#x1CE1; Tone Atharavedic Independent Svarita |
|`U+1CE2`   | Mark [Mn]        | AVAGRAHA          | OVERSTRUCK                 | &#x1CE2; Sign Visarga Svarita |
|`U+1CE3`   | Mark [Mn]        | _null_            | OVERSTRUCK                 | &#x1CE3; Sign Visarga Udatta |
|`U+1CE4`   | Mark [Mn]        | _null_            | OVERSTRUCK                 | &#x1CE4; Sign Reversed Visarga Udatta |
|`U+1CE5`   | Mark [Mn]        | _null_            | OVERSTRUCK                 | &#x1CE5; Sign Visarga Anudatta |
|`U+1CE6`   | Mark [Mn]        | _null_            | OVERSTRUCK                 | &#x1CE6; Sign Reversed Visarga Anudatta |
|`U+1CE7`   | Mark [Mn]        | _null_            | OVERSTRUCK                 | &#x1CE7; Sign Visarga Udatta With Tail |
|`U+1CE8`   | Mark [Mn]        | AVAGRAHA          | OVERSTRUCK                 | &#x1CE8; Sign Visarga Anudatta With Tail |
|`U+1CE9`   | Letter           | AVAGRAHA          | _null_                     | &#x1CE9; Sign Anusvara Antargomukha |
|`U+1CEA`   | Letter           | _null_            | _null_                     | &#x1CEA; Sign Anusvara Bahirgomukha |
|`U+1CEB`   | Letter           | _null_            | _null_                     | &#x1CEB; Sign Anusvara Vamagomukha |
|`U+1CEC`   | Letter           | AVAGRAHA          | _null_                     | &#x1CEC; Sign Anusvara Vamagomukha With Tail |
|`U+1CED`   | Mark [Mn]        | AVAGRAHA          | BOTTOM_POSITION            | &#x1CED; Sign Tiryak         |
|`U+1CEE`   | Letter           | AVAGRAHA          | _null_                     | &#x1CEE; Sign Hexiform Long Anusvara |
|`U+1CEF`   | Letter           | _null_            | _null_                     | &#x1CEF; Sign Long Anusvara  |
| | | | |																		
|`U+1CF0`   | Letter           | _null_            | _null_                     | &#x1CF0; Sign Rthang Long Anusvara |
|`U+1CF1`   | Letter           | AVAGRAHA          | _null_                     | &#x1CF1; Sign Anusvara Ubhayato Mukha |
|`U+1CF2`   | Letter           | CONSONANT_DEAD    | _null_                     | &#x1CF2; Sign Ardhavisarga   |
|`U+1CF3`   | Letter           | CONSONANT_DEAD    | _null_                     | &#x1CF3; Sign Rotated Ardhavisarga |
|`U+1CF4`   | Mark [Mn]        | CANTILLATION      | TOP_POSITION               | &#x1CF4; Tone Candra Above   |
|`U+1CF5`   | Letter           | CONSONANT_WITH_STACKER | _null_                | &#x1CF5; Sign Jihvamuliya    |
|`U+1CF6`   | Letter           | CONSONANT_WITH_STACKER | _null_                | &#x1CF6; Sign Upadhmaniya    |
|`U+1CF7`   | Mark [Mc]        | _null_            | _null_                     | &#x1CF7; Sign Atikrama       |
|`U+1CF8`   | Mark [Mn]        | CANTILLATION      | _null_                     | &#x1CF8; Tone Ring Above     |
|`U+1CF9`   | Mark [Mn]        | CANTILLATION      | _null_                     | &#x1CF9; Tone Double Ring Above |
|`U+1CFA`   | Letter           | PLACEHOLDER       | _null_                     | &#x1CFA; Sign Double Anusvara Antargomukha |
|`U+1CFB`   | _unassigned_     |                   |                            |                              |
|`U+1CFC`   | _unassigned_     |                   |                            |                              |
|`U+1CFD`   | _unassigned_     |                   |                            |                              |
|`U+1CFE`   | _unassigned_     |                   |                            |                              |
|`U+1CFF`   | _unassigned_     |                   |                            |                              |
:::


## Shaping information ##

31 of the characters in the block are categorized as marks. 27 of
these marks are subcategorized as non-spacing; the remaining four are
spacing-combining. 

Of the non-spacing marks, 20 are classified as `CANTILLATION` (or tone-marker)
indicators, which modify the pitch of vowels. Most of these marks are
generally positioned above or below the main character, using <abbr title="Glyph Positioning table">GPOS</abbr>
mark attachment, in a position that does not interact or interfere
with the main character. In Unicode, the `CANTILLATION` classification
is separate from the `TONE_MARKER` classification used in some scripts
for semantic reasons; the two classifications are identical for
shaping purposes.

Some of the marks (cantillation and non-cantillation) are classified
as `OVERSTRUCK` in the _Mark-placement subclass_ column.
This indicates that the mark is intended to be rendered on top of the
preceding character. During reordering, `OVERSTRUCK` marks are tagged
for the ordering position `POS_AFTER_MAIN`.

Some marks are classified, for shaping purposes, as `AVAGRAHA` or
`VISARGA`. This indicates that the mark behaves more like the Avagraha
or Visarga character than like a diacritic.

Characters that are categorized in Unicode as letters vary with
respect to whether or not they trigger special behavior in the shaping
process. These include letters that are classified as `CONSONANT` and
letters that are classified as `AVAGRAHA`.


<!--- 1cf5 and 1cf6 get reclassified as CONSONANT

1ce2 and 1ce8 get treated like tone marks, but SHOULD be allowed only after Visarga.

1ced gets treated like tone mark, but SHOULD be allowed only after U+1CE9..U+1CF1

1ce9 1cec 1cee 1cf1 all take marks in standalone clusters, similar to Avagraha.
--->


================================================
FILE: overview.md
================================================
```{include} /index.md
```


================================================
FILE: test/spellcheck.yml
================================================
matrix:
- name: Main
  aspell:
    lang: en
  dictionary:
    encoding: utf-8
    wordlists:
    - 'test/wordlist.txt'
  pipeline:
  - pyspelling.filters.markdown:
      markdown_extensions:
      - markdown.extensions.tables
  - pyspelling.filters.html:
      comments: false
      attributes:
      - title
      - alt
      ignores:
      - code
      - pre
      - samp
      - abbr
      - :matches(.skip_spellcheck)
  sources:
  - '!character-tables/**/*.md|!images/**/*.md|**/*.md'
  default_encoding: utf-8
- name: Chartables
  aspell:
    lang: en
  dictionary:
    encoding: utf-8
    wordlists:
    - 'test/wordlist.txt'
  pipeline:
  - pyspelling.filters.markdown:
      markdown_extensions:
      - markdown.extensions.tables
  - pyspelling.filters.html:
      comments: false
      attributes:
      - title
      - alt
      ignores:
      - code
      - pre
      - samp
      - abbr
      - table
      - :matches(.skip_spellcheck)
  sources:
  - 'character-tables/*.md'
  default_encoding: utf-8


================================================
FILE: test/spellcheck_html.yml
================================================
matrix:
- name: HTML
  aspell:
    lang: en
  dictionary:
    encoding: utf-8
    wordlists:
    - 'test/wordlist.txt'
  pipeline:
  - pyspelling.filters.html:
      comments: false
      attributes:
      - title
      - alt
      ignores:
      - code
      - pre
      - samp
      - abbr
      - :matches(.skip_spellcheck)
  sources:
  - '_build/html/*.html'
  default_encoding: utf-8


================================================
FILE: test/wordlist.txt
================================================
AllSorts
AFA
AFD
AFE
AFF
AMTRA
BBE
BCA
BCB
BCC
BD
BaseC
BiDi
BLWF
BSO
BV
Bézier
Blobmoji
CBD
CBDT
CBE
CBF
CCA
CCB
CDA
CDB
CDD
CDE
CDF
CEA
CEB
CEC
CED
CEE
CEF
CEK
CFA
CFB
CFD
CFE
CFF
CGJ
CHA
CJK
CLDR
CMAbv
CMBlw
CN
COLR
COLRv
CPAL
CoreText
DCA
DCF
DDA
DDC
DDD
DDE
DDF
DDHA
DF
DHA
EB
EBC
ECB
ECD
EmojiTwo
FAF
FAbv
FB
FBlw
FC
FPst
FVS
FinalC
FirefoxEmoji
FontForge
FontTools
GDEF
GPOS
GSUB
HB
HN
HarfBuzz
HfG
IndependentVowel
JNya
JoyPixels
KA
KD
KSsa
LBase
LCount
LIndex
LLA
LRM
LTR
LV
LVIndex
LVT
lookupListOffset
MAbv
MBlw
MCM
MPre
MPst
MacOS
MultipleSub
MultipleSubst
N'Ko
NFD
NFKC
NFKD
NCount
NKo
NNA
NnTta
NUKTA
NUM
Openmoji
OpenType
POS
PRE
PUA
README
RGI
RLM
RTL
Ragel
Reddit
SBase
SCount
SDL
SHA
SIndex
SL
SMAbv
SMBlw
SMVD
SVG
Segoe
SkinTone
TBase
TCount
TIndex
TOCtree
TOCtrees
TRYo
TTX
UA
UCD
UCDM
UGC
UI
UIPC
UISC
UJT
Uniscribe
Uniscribe's
VAbv
VBase
VBlw
VCount
VIndex
VM
VMAbv
VMBlw
VMPre
VMPst
VPre
VPst
WJ
YAML
YesLogic
baseC
belowbaseC
featureListOffset
featureVariations
featureVariationsOffset
scriptListOffset
xA
xAA
xFB
ABOVEBASE
AFAICT
AKHAND
ALAPH
ANUSVARA
AVAGRAHA
Aa
Aaa
Abaric
Adak
Addak
Ahsda
Ai
Aira
Aiton
Akhand
Alaph
Alef
Antargomukha
Anudatta
Anusvara
Aq
Ardhavisarga
Asat
Atharavedic
Atikrama
Atthacan
Avagraha
Avestan
Ayin
BEH
BELOWBASE
BHA
BINDU
Bahirgomukha
Balti
Baluda
Bambara
Bangla
Bantoc
Baphala
Batak
Bathamasat
Bcad
Beh
Beyyal
Bhasha
Bheth
Bidirectionality
Bindi
Bindu
Brahmi
Brahmic
Buginese
Candra
Candrabindu
Cantillation
Catawa
Ccc
Chakma
Cham
Chandrabindu
Chillu
Choseong
Choseongul
Cia
Cn
Coeng
DALATH
Dagalga
Dagesh
Dalath
Damma
Dammatan
Dotless
Dyula
Dzongkha
Ee
Ek
Esṭrangēlā
Ethiopic
Etnahta
Fatha
Fathatan
GEMINATION
Gali
Garshuni
Gemination
Gmünd
Gondi
Grantha
Gurmukhi
Gurmukhi's
Halant
Halants
Hamza
Hanja
Hataf
Hathi
Hexiform
Hiriq
Holam
Ie
Ijam
IndependentVowel
Iri
Irula
Isan
Ja
Jamo
Jeongum
Jihvamuliya
Judezmo
Jungseong
Ka
Kai
Kakabat
Karshana
Kashida
Kashidas
Kashmiri
Kasra
Kasratan
Kathaka
Kayah
Ke
Kelantan
Keycap
Kha
Khamti
Khanda
Khmu
Kinzi
Kiyeok
Ko
Krung
Kufi
Kutchi
Kuy
Ladakhi
Lanna
Layar
Lepcha
Letterlike
Lf
Lm
Lookahead
Maayyaa
Maddah
Maitaikhu
Maithili
Majlīyānā
Manding
Maninka
Manipuri
Maḏnḥāyā
Mc
Midline
Modi
Mukha
Mx
Mynanmar
NIKHAHIT
Naskh
Nataliq
Nga
Niggahita
Nihshvasa
Nikahit
Nikhahit
Niqqud
Noto
Nukta
Nya
Nç
OVERSTRUCK
Odia
Oe
Oo
Overstruck
POSTBASE
PREBASE
Pak
Palaung
Pali
Paniya
Pao
Pashto
Patah
Pattani
Peh
Phags
Phinthu
Pho
Pictographic
Pre
Precomposed
Prenkha
Pwo
Qa
Qaa
Qaq
RAKAR
REPHA
RISH
Rafe
Rakaar
Rakaaraansaya
Raphala
Rbasa
Reahmuk
Recomposition
Repha
Rha
Rieul
Rigvedic
Rish
Rjes
Rnam
Ro
Robat
Rr
Rra
Rsv
Rthang
Rumai
Rumi
SHIFTER
STACKER
SYM
Samyok
Sannya
Sant
Saurashtra
Segol
Serṭā
Sgaw
Shadda
Shan
Shara
Sheva
Shifter
Sibe
Siddham
Sinhala
Sios
Slv
Slvt
Soyombo
Srog
Ssa
Ssangkiyeok
Stacker
Su
Sukun
Svarita
Syāmē
THA
TITLECASE
Tai
Tampuan
Tcomplex
Tham
Tifinagh
Tippi
Tiryak
Toandakhiat
Todo
Tri
Tsheng
Tta
Twemoji
Ubhayato
Udaat
Udatta
Upadhmaniya
Ura
Uu
VISARGA
Vamagomukha
Vattu
Vf
Viet
Viriam
Virama
Visarga
Vmain
Vpost
Wa
Xibe
YYA
Yakash
Yansaya
Yaphala
Yod
Yya
Zanabazar
Zapf
Zsye
Zsym
aa
aab
abvf
abvm
abvs
adak
advertized
akhn
al
algorithmically
anusvara
apf
appled
arab
arabic
artifically
asat
ascender
ascenders
bangjeom
baphala
barree
beng
bengali
bidirectionality
bindi
bindu
bitmask
blackflag
blwf
blwm
blws
bng
bugfixes
calt
candrabindu
cantillation
ccc
ccmp
ce
cec
ced
cee
cek
cfar
cff
chandrabindu
chandrakkala
chattawa
chillu
choseong
cjct
clig
cmap
codepoint
codepoints
codepoint's
coeng
coengs
compat
compatibilty
compatiblity
consonantmedial
consonantwithstacker
constitues
counterintuitive
cswh
dagesh
dalath
damma
dammatan
danda
decompositions
designator
dev
deva
devanagari
directionality
dlig
dotless
dottedcircle
eading
eg
emojimodified
encodings
endtag
fallbacks
familymember
fatha
fathatan
fe
featureListOffset
featureVariations
featureVariationsOffset
fi
fina
fitzpatrick
frac
funcs
fvs
gb
genderperson
gendersign
germany
ghunna
gjr
glyf
glyphs
grantha
grapheme
greyscale
gujarati
gujr
gur
gurmukhi
hal
halant
halanta
halantamu
halants
haln
haming
hamza
hangul
hardcoded
hasanta
hb
hebr
hebrew
hgher
hrasva
hç
ignorable
ignorables
ijam
implementer
implementers
indic
init
interesteed
interword
intrepreted
isol
jamo
jnya
jongseong
jungseong
júç
júð
ka
kannada
kar
kashida
kasra
kern
kerning
keycap
kha
khmer
khmr
kinzi
kirīma
kiyeok
knd
knda
kssa
lajanyalan
lakuna
lao
liga
ljmo
locl
lookahead
lookups
lv
lvt
mH
malayalam
matra
matras
matra's
matraabove
matrabelow
matrapost
matrapre
md
medi
mh
mis
mkmk
mlm
mlym
modifer
mong
mongolian
monnga
monospaced
morx
mr
mset
multipersongroup
multiplesub
mw
myanmar
mym
mymr
natively
nbsp
nga
niggahita
nikahit
nikhahit
niqqud
nirugu
nko
nntta
nonjoiner
notdef
nukt
nukta
occurence
occuring
opentype
oriya
ory
orya
ot
otc
otf
otl
overline
owels
pictographic
png
postprocess
pre
precomposed
preprocess
preprocessing
pstf
psts
pua
pulli
punc
py
ra
ragel
raphala
rclt
recomposition
recompositions
regionalindicator
registershifter
reodering
repaya
reph
repha
repositioned
repositions
rish
rkrf
rlig
robat
rphf
rumi
sala
samp
sanitization
sara
sbix
scriptListOffset
shadda
shaper
shapers
shaper's
shifter
shifters
sinf
sinh
sinhala
sios
skintone
sm
somefilename
somefontfilename
spl
srong
stch
str
subcategorized
subclasses
subclass's
subclassified
subsequence
subsequences
subst
subtag
subtags
svg
swara
syllablemodifier
syrc
syriac
tagchar
tamil
taml
targetting
tashdid
tatweel
telu
telugu
th
thai
thailao
thr
tibetan
tibt
tjmo
tml
tnum
tra
tsek
tsheng
ttc
ttf
ttx
un
uncategorized
unicode
unicodes
unioned
uniscribe
unneccessary
va
varation
vattu
vatu
vedic
vedicsign
virama
visarga
visiblity
vjmo
vrt
waw
yaphala
yeh
yya
zah
zwj
zwnj
ḥarakah