master a9b44fee2aa9 cached
178 files
53.3 MB
4.9M tokens
1173 symbols
1 requests
Copy disabled (too large) Download .txt
Showing preview only (19,566K chars total). Download the full file to get everything.
Repository: aws-samples/amazon-textract-response-parser
Branch: master
Commit: a9b44fee2aa9
Files: 178
Total size: 53.3 MB

Directory structure:
gitextract_tsv6824f/

├── .flake8
├── .github/
│   ├── PULL_REQUEST_TEMPLATE.md
│   └── workflows/
│       └── test_pull_request.yml
├── .idea/
│   ├── amazon-textract-response-parser.iml
│   ├── inspectionProfiles/
│   │   └── profiles_settings.xml
│   ├── misc.xml
│   ├── modules.xml
│   └── vcs.xml
├── CODE_OF_CONDUCT.md
├── CONTRIBUTING.md
├── LICENSE
├── README.md
├── src-csharp/
│   ├── LICENSE
│   ├── Program.cs
│   ├── README.md
│   ├── TextractExtensions.cs
│   ├── appsettings.json
│   └── parser.csproj
├── src-js/
│   ├── .eslintrc.js
│   ├── .nvmrc
│   ├── .prettierrc.js
│   ├── CHANGELOG.md
│   ├── README.md
│   ├── bin/
│   │   └── reading-order-diagnostic.js
│   ├── examples/
│   │   ├── README.md
│   │   ├── browser-iife/
│   │   │   ├── main.html
│   │   │   ├── main.js
│   │   │   ├── package.json
│   │   │   └── test.js
│   │   ├── nodejs-import/
│   │   │   ├── main.js
│   │   │   └── package.json
│   │   ├── nodejs-require/
│   │   │   ├── main.js
│   │   │   └── package.json
│   │   └── nodejs-typescript/
│   │       ├── package.json
│   │       ├── src/
│   │       │   └── main.ts
│   │       └── tsconfig.json
│   ├── jest.config.js
│   ├── package.json
│   ├── rollup.config.mjs
│   ├── src/
│   │   ├── api-models/
│   │   │   ├── base.ts
│   │   │   ├── content.ts
│   │   │   ├── document.ts
│   │   │   ├── expense.ts
│   │   │   ├── form.ts
│   │   │   ├── geometry.ts
│   │   │   ├── id.ts
│   │   │   ├── index.ts
│   │   │   ├── layout.ts
│   │   │   ├── query.ts
│   │   │   ├── response.ts
│   │   │   └── table.ts
│   │   ├── base.ts
│   │   ├── content.ts
│   │   ├── document.ts
│   │   ├── expense.ts
│   │   ├── form.ts
│   │   ├── geometry.ts
│   │   ├── id.ts
│   │   ├── index.ts
│   │   ├── layout.ts
│   │   ├── query.ts
│   │   └── table.ts
│   ├── test/
│   │   ├── data/
│   │   │   ├── analyzeid-test-drivers-license-response.json
│   │   │   ├── analyzeid-test-passport-response.json
│   │   │   ├── expense-missing-geoms-response.json
│   │   │   ├── financial-document-response.json
│   │   │   ├── form1005-response.json
│   │   │   ├── invoice-expense-response.json
│   │   │   ├── paystub-response.json
│   │   │   ├── table-example-response.json
│   │   │   ├── test-failed-response.json
│   │   │   ├── test-inprogress-response.json
│   │   │   ├── test-multicol-response-2.json
│   │   │   ├── test-multicol-response.json
│   │   │   ├── test-query-response.json
│   │   │   ├── test-response.json
│   │   │   └── test-twocol-header-footer-response.json
│   │   ├── integ/
│   │   │   └── aws-sdk.test.ts
│   │   ├── tsconfig.json
│   │   └── unit/
│   │       ├── api-models.test.ts
│   │       ├── base.test.ts
│   │       ├── content.test.ts
│   │       ├── corpus/
│   │       │   ├── header-footer.test.ts
│   │       │   └── reading-order.test.ts
│   │       ├── document.test.ts
│   │       ├── expense.test.ts
│   │       ├── form.test.ts
│   │       ├── geometry.test.ts
│   │       ├── id.test.ts
│   │       ├── index.test.ts
│   │       ├── layout.test.ts
│   │       ├── query.test.ts
│   │       └── table.test.ts
│   ├── tsconfig.browser.json
│   ├── tsconfig.cjs.json
│   ├── tsconfig.es.json
│   ├── tsconfig.json
│   └── tsconfig.types.json
└── src-python/
    ├── .style.yapf
    ├── .yapfignore
    ├── README.md
    ├── a2i/
    │   ├── README.md
    │   ├── __init__.py
    │   ├── a2i-response.json
    │   ├── a2irp.py
    │   └── a2irptest.py
    ├── bin/
    │   └── amazon-textract-pipeline
    ├── extras/
    │   └── dev.txt
    ├── setup.cfg
    ├── setup.py
    ├── tests/
    │   ├── data/
    │   │   ├── 180-degree-roation.json
    │   │   ├── 2023-Q2-table-model-sample.json
    │   │   ├── all_features_with_floating_title_header.json
    │   │   ├── analyzeExpenseResponse-multipage.json
    │   │   ├── bounding_box_issue.json
    │   │   ├── employment-application.json
    │   │   ├── gib.json
    │   │   ├── gib1.json
    │   │   ├── gib_10_degrees.json
    │   │   ├── gib__10_degrees.json
    │   │   ├── gib__15_degrees.json
    │   │   ├── gib__180_degrees.json
    │   │   ├── gib__25_degrees.json
    │   │   ├── gib__270_degrees.json
    │   │   ├── gib__90_degrees.json
    │   │   ├── gib__minus_10_degrees.json
    │   │   ├── gib_multi_page_table_merge.json
    │   │   ├── gib_multi_page_tables.json
    │   │   ├── gib_multi_tables_multi_page_sample.json
    │   │   ├── in-table-footer.json
    │   │   ├── in-table-title.json
    │   │   ├── issue_83.json
    │   │   ├── lending-doc-output.json
    │   │   ├── lending-package-no-signature.json
    │   │   ├── little_women_page_1.json
    │   │   ├── multi-page-forms-samples-2-page.json
    │   │   ├── multi-tables-multi-page-sample.json
    │   │   ├── patient_intake_form_sample.json
    │   │   ├── paystub_with_signature.json
    │   │   ├── queries_sample.json
    │   │   ├── request_for_verification_of_employment.json
    │   │   ├── table-performance-pretty.json
    │   │   ├── tables_with_headers_and_merged_cells.json
    │   │   ├── tables_with_headers_out_of_order_cells.json
    │   │   ├── tables_with_merged_cells_sample1.json
    │   │   ├── tables_with_merged_cells_sample2.json
    │   │   ├── test-trp2-analyzeid_sample_multi_page.json
    │   │   ├── test-trp2_analyzeid_sample1.json
    │   │   ├── test-trp2_analyzeid_sample1_with_OCR.json
    │   │   ├── test-trp2_analyzeid_sample2.json
    │   │   ├── test_table_merged_text.json
    │   │   ├── test_trp2_expense_sample1.json
    │   │   ├── test_trp2_expense_sample2.json
    │   │   ├── test_trp2_expense_sample3.json
    │   │   ├── test_trp2_expense_sample4.json
    │   │   └── textract-new-tables-api.json
    │   ├── test-response.json
    │   ├── test_base_trp2.py
    │   ├── test_merged.py
    │   ├── test_t_tables.py
    │   ├── test_trp.py
    │   ├── test_trp2.py
    │   ├── test_trp2_analyzeid.py
    │   ├── test_trp2_expense.py
    │   └── test_trp2_lending.py
    ├── textract-mapping/
    │   ├── README.md
    │   ├── __init__.py
    │   ├── loan-app-response.json
    │   ├── mapping-response.json
    │   ├── mapping.py
    │   └── mappingtest.py
    └── trp/
        ├── __init__.py
        ├── t_pipeline.py
        ├── t_tables.py
        ├── trp2.py
        ├── trp2_analyzeid.py
        ├── trp2_expense.py
        └── trp2_lending.py

================================================
FILE CONTENTS
================================================

================================================
FILE: .flake8
================================================
[flake8]
ignore = E501,W503


================================================
FILE: .github/PULL_REQUEST_TEMPLATE.md
================================================
*Issue #, if available:*

*Description of changes:*


By submitting this pull request, I confirm that you can use, modify, copy, and redistribute this contribution, under the terms of your choice.


================================================
FILE: .github/workflows/test_pull_request.yml
================================================
# Controls when the action will run. Triggers the workflow on push or pull request
# events but only for the main branch and changes in folder src-python
name: Test-Pull-Request
on:
  pull_request:
    types: [assigned, opened, synchronize, reopened]
    paths:
      - src-python

  workflow_dispatch:

# Run the tests
jobs:
  build:
    runs-on: ubuntu-latest
    strategy:
      matrix:
        python-version: ["3.8", "3.9", "3.10", "3.11"]
    defaults:
      run:
        working-directory: ./src-python

    steps:
      # Checks out the repository
      - uses: actions/checkout@v3
      - name: Set up Python ${{ matrix.python-version }}
        uses: actions/setup-python@v4
        with:
          python-version: ${{ matrix.python-version }}

      # Install package locally
      - name: Install package
        run: pip install -e .

      # Install dev dependencies
      - name: Install dependencies
        run: pip install -r ./extras/dev.txt

      # Run tests
      - name: Test
        run: pytest


================================================
FILE: .idea/amazon-textract-response-parser.iml
================================================
<?xml version="1.0" encoding="UTF-8"?>
<module type="PYTHON_MODULE" version="4">
  <component name="NewModuleRootManager">
    <content url="file://$MODULE_DIR$">
      <excludeFolder url="file://$MODULE_DIR$/venv" />
    </content>
    <orderEntry type="jdk" jdkName="Python 3.8 (venv)" jdkType="Python SDK" />
    <orderEntry type="sourceFolder" forTests="false" />
  </component>
  <component name="PyDocumentationSettings">
    <option name="format" value="EPYTEXT" />
    <option name="myDocStringFormat" value="Epytext" />
  </component>
  <component name="TestRunnerService">
    <option name="PROJECT_TEST_RUNNER" value="py.test" />
  </component>
</module>

================================================
FILE: .idea/inspectionProfiles/profiles_settings.xml
================================================
<component name="InspectionProjectProfileManager">
  <settings>
    <option name="USE_PROJECT_PROFILE" value="false" />
    <version value="1.0" />
  </settings>
</component>

================================================
FILE: .idea/misc.xml
================================================
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.8 (venv)" project-jdk-type="Python SDK" />
</project>

================================================
FILE: .idea/modules.xml
================================================
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
  <component name="ProjectModuleManager">
    <modules>
      <module fileurl="file://$PROJECT_DIR$/.idea/amazon-textract-response-parser.iml" filepath="$PROJECT_DIR$/.idea/amazon-textract-response-parser.iml" />
    </modules>
  </component>
</project>

================================================
FILE: .idea/vcs.xml
================================================
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
  <component name="VcsDirectoryMappings">
    <mapping directory="$PROJECT_DIR$" vcs="Git" />
  </component>
</project>

================================================
FILE: CODE_OF_CONDUCT.md
================================================
## Code of Conduct
This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct).
For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact
opensource-codeofconduct@amazon.com with any additional questions or comments.


================================================
FILE: CONTRIBUTING.md
================================================
# Contributing Guidelines

Thank you for your interest in contributing to our project. Whether it's a bug report, new feature, correction, or additional
documentation, we greatly value feedback and contributions from our community.

Please read through this document before submitting any issues or pull requests to ensure we have all the necessary
information to effectively respond to your bug report or contribution.


## Reporting Bugs/Feature Requests

We welcome you to use the GitHub issue tracker to report bugs or suggest features.

When filing an issue, please check [existing open](https://github.com/aws-samples/amazon-textract-response-parser/issues), or [recently closed](https://github.com/aws-samples/amazon-textract-response-parser/issues?utf8=%E2%9C%93&q=is%3Aissue%20is%3Aclosed%20), issues to make sure somebody else hasn't already
reported the issue. Please try to include as much information as you can. Details like these are incredibly useful:

* A reproducible test case or series of steps
* The version of our code being used
* Any modifications you've made relevant to the bug
* Anything unusual about your environment or deployment


## Contributing via Pull Requests
Contributions via pull requests are much appreciated. Before sending us a pull request, please ensure that:

1. You are working against the latest source on the *master* branch.
2. You check existing open, and recently merged, pull requests to make sure someone else hasn't addressed the problem already.
3. You open an issue to discuss any significant work - we would hate for your time to be wasted.

To send us a pull request, please:

1. Fork the repository.
2. Modify the source; please focus on the specific change you are contributing. If you also reformat all the code, it will be hard for us to focus on your change.
3. Ensure local tests pass.
4. Commit to your fork using clear commit messages.
5. Send us a pull request, answering any default questions in the pull request interface.
6. Pay attention to any automated CI failures reported in the pull request, and stay involved in the conversation.

GitHub provides additional document on [forking a repository](https://help.github.com/articles/fork-a-repo/) and
[creating a pull request](https://help.github.com/articles/creating-a-pull-request/).


## Finding contributions to work on
Looking at the existing issues is a great way to find something to contribute on. As our projects, by default, use the default GitHub issue labels (enhancement/bug/duplicate/help wanted/invalid/question/wontfix), looking at any ['help wanted'](https://github.com/aws-samples/amazon-textract-response-parser/labels/help%20wanted) issues is a great place to start.


## Code of Conduct
This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct).
For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact
opensource-codeofconduct@amazon.com with any additional questions or comments.


## Security issue notifications
If you discover a potential security issue in this project we ask that you notify AWS/Amazon Security via our [vulnerability reporting page](http://aws.amazon.com/security/vulnerability-reporting/). Please do **not** create a public github issue.


## Licensing

See the [LICENSE](https://github.com/aws-samples/amazon-textract-response-parser/blob/master/LICENSE) file for our project's licensing. We will ask you to confirm the licensing of your contribution.

We may ask you to sign a [Contributor License Agreement (CLA)](http://en.wikipedia.org/wiki/Contributor_License_Agreement) for larger changes.


================================================
FILE: LICENSE
================================================

                                 Apache License
                           Version 2.0, January 2004
                        http://www.apache.org/licenses/

   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION

   1. Definitions.

      "License" shall mean the terms and conditions for use, reproduction,
      and distribution as defined by Sections 1 through 9 of this document.

      "Licensor" shall mean the copyright owner or entity authorized by
      the copyright owner that is granting the License.

      "Legal Entity" shall mean the union of the acting entity and all
      other entities that control, are controlled by, or are under common
      control with that entity. For the purposes of this definition,
      "control" means (i) the power, direct or indirect, to cause the
      direction or management of such entity, whether by contract or
      otherwise, or (ii) ownership of fifty percent (50%) or more of the
      outstanding shares, or (iii) beneficial ownership of such entity.

      "You" (or "Your") shall mean an individual or Legal Entity
      exercising permissions granted by this License.

      "Source" form shall mean the preferred form for making modifications,
      including but not limited to software source code, documentation
      source, and configuration files.

      "Object" form shall mean any form resulting from mechanical
      transformation or translation of a Source form, including but
      not limited to compiled object code, generated documentation,
      and conversions to other media types.

      "Work" shall mean the work of authorship, whether in Source or
      Object form, made available under the License, as indicated by a
      copyright notice that is included in or attached to the work
      (an example is provided in the Appendix below).

      "Derivative Works" shall mean any work, whether in Source or Object
      form, that is based on (or derived from) the Work and for which the
      editorial revisions, annotations, elaborations, or other modifications
      represent, as a whole, an original work of authorship. For the purposes
      of this License, Derivative Works shall not include works that remain
      separable from, or merely link (or bind by name) to the interfaces of,
      the Work and Derivative Works thereof.

      "Contribution" shall mean any work of authorship, including
      the original version of the Work and any modifications or additions
      to that Work or Derivative Works thereof, that is intentionally
      submitted to Licensor for inclusion in the Work by the copyright owner
      or by an individual or Legal Entity authorized to submit on behalf of
      the copyright owner. For the purposes of this definition, "submitted"
      means any form of electronic, verbal, or written communication sent
      to the Licensor or its representatives, including but not limited to
      communication on electronic mailing lists, source code control systems,
      and issue tracking systems that are managed by, or on behalf of, the
      Licensor for the purpose of discussing and improving the Work, but
      excluding communication that is conspicuously marked or otherwise
      designated in writing by the copyright owner as "Not a Contribution."

      "Contributor" shall mean Licensor and any individual or Legal Entity
      on behalf of whom a Contribution has been received by Licensor and
      subsequently incorporated within the Work.

   2. Grant of Copyright License. Subject to the terms and conditions of
      this License, each Contributor hereby grants to You a perpetual,
      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
      copyright license to reproduce, prepare Derivative Works of,
      publicly display, publicly perform, sublicense, and distribute the
      Work and such Derivative Works in Source or Object form.

   3. Grant of Patent License. Subject to the terms and conditions of
      this License, each Contributor hereby grants to You a perpetual,
      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
      (except as stated in this section) patent license to make, have made,
      use, offer to sell, sell, import, and otherwise transfer the Work,
      where such license applies only to those patent claims licensable
      by such Contributor that are necessarily infringed by their
      Contribution(s) alone or by combination of their Contribution(s)
      with the Work to which such Contribution(s) was submitted. If You
      institute patent litigation against any entity (including a
      cross-claim or counterclaim in a lawsuit) alleging that the Work
      or a Contribution incorporated within the Work constitutes direct
      or contributory patent infringement, then any patent licenses
      granted to You under this License for that Work shall terminate
      as of the date such litigation is filed.

   4. Redistribution. You may reproduce and distribute copies of the
      Work or Derivative Works thereof in any medium, with or without
      modifications, and in Source or Object form, provided that You
      meet the following conditions:

      (a) You must give any other recipients of the Work or
          Derivative Works a copy of this License; and

      (b) You must cause any modified files to carry prominent notices
          stating that You changed the files; and

      (c) You must retain, in the Source form of any Derivative Works
          that You distribute, all copyright, patent, trademark, and
          attribution notices from the Source form of the Work,
          excluding those notices that do not pertain to any part of
          the Derivative Works; and

      (d) If the Work includes a "NOTICE" text file as part of its
          distribution, then any Derivative Works that You distribute must
          include a readable copy of the attribution notices contained
          within such NOTICE file, excluding those notices that do not
          pertain to any part of the Derivative Works, in at least one
          of the following places: within a NOTICE text file distributed
          as part of the Derivative Works; within the Source form or
          documentation, if provided along with the Derivative Works; or,
          within a display generated by the Derivative Works, if and
          wherever such third-party notices normally appear. The contents
          of the NOTICE file are for informational purposes only and
          do not modify the License. You may add Your own attribution
          notices within Derivative Works that You distribute, alongside
          or as an addendum to the NOTICE text from the Work, provided
          that such additional attribution notices cannot be construed
          as modifying the License.

      You may add Your own copyright statement to Your modifications and
      may provide additional or different license terms and conditions
      for use, reproduction, or distribution of Your modifications, or
      for any such Derivative Works as a whole, provided Your use,
      reproduction, and distribution of the Work otherwise complies with
      the conditions stated in this License.

   5. Submission of Contributions. Unless You explicitly state otherwise,
      any Contribution intentionally submitted for inclusion in the Work
      by You to the Licensor shall be under the terms and conditions of
      this License, without any additional terms or conditions.
      Notwithstanding the above, nothing herein shall supersede or modify
      the terms of any separate license agreement you may have executed
      with Licensor regarding such Contributions.

   6. Trademarks. This License does not grant permission to use the trade
      names, trademarks, service marks, or product names of the Licensor,
      except as required for reasonable and customary use in describing the
      origin of the Work and reproducing the content of the NOTICE file.

   7. Disclaimer of Warranty. Unless required by applicable law or
      agreed to in writing, Licensor provides the Work (and each
      Contributor provides its Contributions) on an "AS IS" BASIS,
      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
      implied, including, without limitation, any warranties or conditions
      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
      PARTICULAR PURPOSE. You are solely responsible for determining the
      appropriateness of using or redistributing the Work and assume any
      risks associated with Your exercise of permissions under this License.

   8. Limitation of Liability. In no event and under no legal theory,
      whether in tort (including negligence), contract, or otherwise,
      unless required by applicable law (such as deliberate and grossly
      negligent acts) or agreed to in writing, shall any Contributor be
      liable to You for damages, including any direct, indirect, special,
      incidental, or consequential damages of any character arising as a
      result of this License or out of the use or inability to use the
      Work (including but not limited to damages for loss of goodwill,
      work stoppage, computer failure or malfunction, or any and all
      other commercial damages or losses), even if such Contributor
      has been advised of the possibility of such damages.

   9. Accepting Warranty or Additional Liability. While redistributing
      the Work or Derivative Works thereof, You may choose to offer,
      and charge a fee for, acceptance of support, warranty, indemnity,
      or other liability obligations and/or rights consistent with this
      License. However, in accepting such obligations, You may act only
      on Your own behalf and on Your sole responsibility, not on behalf
      of any other Contributor, and only if You agree to indemnify,
      defend, and hold each Contributor harmless for any liability
      incurred by, or claims asserted against, such Contributor by reason
      of your accepting any such warranty or additional liability.


================================================
FILE: README.md
================================================
# Textract Response Parser

You can use Textract response parser library to easily parse JSON returned by Amazon Textract. The library parses JSON and provides programming language specific constructs to work with different parts of the document. [textractor](https://github.com/aws-samples/amazon-textract-textractor) is an example of a PoC batch processing tool that takes advantage of the Textract response parser library and generates output in multiple formats.

## Python Usage

For documentation on usage see: [src-python/README.md](src-python/README.md)

## JavaScript/TypeScript Usage

For documentation on usage see: [src-js/README.md](src-js/README.md)

## C# Usage

### Forms

```csharp
document.Pages.ForEach(page => {
    Console.WriteLine("Print Lines and Words:");
    page.Lines.ForEach(line => {
        Console.WriteLine("{0}--{1}", line.Text, line.Confidence);
        line.Words.ForEach(word => {
            Console.WriteLine("{0}--{1}", word.Text, word.Confidence);
        });
    });
    Console.WriteLine("Print Fields:");
    page.Form.Fields.ForEach(f => {
        Console.WriteLine("Field: Key: {0}, Value {1}", f.Key, f.Value);
    });
    Console.WriteLine("Get Field by Key:");
    var key = "Phone Number:";
    var field = page.Form.GetFieldByKey(key);
    if(field != null) {
        Console.WriteLine("Field: Key: {0}, Value: {1}", field.Key, field.Value);
    }
});
```

### Tables

```csharp
document.Pages.ForEach(page => {
    page.Tables.ForEach(table => {
        var r = 0;
        table.Rows.ForEach(row => {
            r++;
            var c = 0;
            row.Cells.ForEach(cell => {
                c++;
                Console.WriteLine("Table [{0}][{1}] = {2}--{3}", r, c, cell.Text, cell.Confidence);
            });
        });
    });
});
```

Check out the `src-csharp` folder for instructions on how to run [.NET Core C#](src-csharp/readme.md) samples

## Other Resources

- [Large scale document processing with Amazon Textract - Reference Architecture](https://github.com/aws-samples/amazon-textract-serverless-large-scale-document-processing)
- [Batch processing tool](https://github.com/aws-samples/amazon-textract-textractor)
- [Code samples](https://github.com/aws-samples/amazon-textract-code-samples)

## License Summary

This sample code is made available under the Apache License V2.0 license. See the LICENSE file.



================================================
FILE: src-csharp/LICENSE
================================================
Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved.

Permission is hereby granted, free of charge, to any person obtaining a copy of
this software and associated documentation files (the "Software"), to deal in
the Software without restriction, including without limitation the rights to
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software is furnished to do so.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

================================================
FILE: src-csharp/Program.cs
================================================
using System;
using Microsoft.Extensions.Configuration;
using Amazon.Textract;
using Amazon.Textract.Model;
using System.Collections.Generic;
using System.Threading.Tasks;

namespace parser {
	class Program {
		const string BucketName = "<your-bucket-name>";
		const string FormFile = "employmentapp.png";
		static void Main(string[] args) {
			var textractAnalysisClient = BuildTextractClient();
			var document = PrepareDocument(textractAnalysisClient, "FORMS");
			document.Pages.ForEach(page => {
				Console.WriteLine("Print Lines and Words:");
				page.Lines.ForEach(line => {
					Console.WriteLine("{0}--{1}", line.Text, line.Confidence);
					line.Words.ForEach(word => {
						Console.WriteLine("{0}--{1}", word.Text, word.Confidence);
					});
				});
				Console.WriteLine("Print Fields:");
				page.Form.Fields.ForEach(f => {
					Console.WriteLine("Field: Key: {0}, Value {1}", f.Key, f.Value);
				});
				Console.WriteLine("Get Field by Key:");
				var key = "Phone Number:";
				var field = page.Form.GetFieldByKey(key);
				if(field != null) {
					Console.WriteLine("Field: Key: {0}, Value: {1}", field.Key, field.Value);
				}
			});

			document = PrepareDocument(textractAnalysisClient, "TABLES");
			document.Pages.ForEach(page => {
				page.Tables.ForEach(table => {
					var r = 0;
					table.Rows.ForEach(row => {
						r++;
						var c = 0;
						row.Cells.ForEach(cell => {
							c++;
							Console.WriteLine("Table [{0}][{1}] = {2}--{3}", r, c, cell.Text, cell.Confidence);
						});
					});
				});
			});
		}

		static TextractDocument PrepareDocument(TextractTextAnalysisService textractAnalysisClient, string type) {
			var task = textractAnalysisClient.StartDocumentAnalysis(BucketName, FormFile, type);
			var jobId = task.Result;
			textractAnalysisClient.WaitForJobCompletion(jobId);
			var results = textractAnalysisClient.GetJobResults(jobId);
			return new TextractDocument(results);
		}

		static TextractTextAnalysisService BuildTextractClient() {
			var builder = new ConfigurationBuilder()
				.SetBasePath(Environment.CurrentDirectory)
				.AddJsonFile("appsettings.json", optional: false, reloadOnChange: true)
				.AddEnvironmentVariables()
				.Build();
			var awsOptions = builder.GetAWSOptions();
			return new TextractTextAnalysisService(awsOptions.CreateServiceClient<IAmazonTextract>());
		}
	}

	public class TextractTextAnalysisService {
		private IAmazonTextract textract;
		public TextractTextAnalysisService(IAmazonTextract textract) {
			this.textract = textract;
		}
		public GetDocumentAnalysisResponse GetJobResults(string jobId) {
			var response = this.textract.GetDocumentAnalysisAsync(new GetDocumentAnalysisRequest {
				JobId = jobId
			});
			response.Wait();
			return response.Result;
		}

		public bool IsJobComplete(string jobId) {
			var response = this.textract.GetDocumentAnalysisAsync(new GetDocumentAnalysisRequest {
				JobId = jobId
			});
			response.Wait();
			return !response.Result.JobStatus.Equals("IN_PROGRESS");
		}

		public async Task<string> StartDocumentAnalysis(string bucketName, string key, string featureType) {
			var request = new StartDocumentAnalysisRequest();
			var s3Object = new S3Object {
				Bucket = bucketName,
				Name = key
			};
			request.DocumentLocation = new DocumentLocation {
				S3Object = s3Object
			};
			request.FeatureTypes = new List<string> { featureType };
			var response = await this.textract.StartDocumentAnalysisAsync(request);
			return response.JobId;
		}

		public void WaitForJobCompletion(string jobId, int delay = 5000) {
			while(!IsJobComplete(jobId)) {
				this.Wait(delay);
			}
		}

		private void Wait(int delay = 5000) {
			Task.Delay(delay).Wait();
			Console.Write(".");
		}
	}
}


================================================
FILE: src-csharp/README.md
================================================
# Usage

## Forms

```csharp
document.Pages.ForEach(page => {
    Console.WriteLine("Print Lines and Words:");
    page.Lines.ForEach(line => {
        Console.WriteLine("{0}--{1}", line.Text, line.Confidence);
        line.Words.ForEach(word => {
            Console.WriteLine("{0}--{1}", word.Text, word.Confidence);
        });
    });
    Console.WriteLine("Print Fields:");
    page.Form.Fields.ForEach(f => {
        Console.WriteLine("Field: Key: {0}, Value {1}", f.Key, f.Value);
    });
    Console.WriteLine("Get Field by Key:");
    var key = "Phone Number:";
    var field = page.Form.GetFieldByKey(key);
    if(field != null) {
        Console.WriteLine("Field: Key: {0}, Value: {1}", field.Key, field.Value);
    }
});
```

## Tables

```csharp
document.Pages.ForEach(page => {
    page.Tables.ForEach(table => {
        var r = 0;
        table.Rows.ForEach(row => {
            r++;
            var c = 0;
            row.Cells.ForEach(cell => {
                c++;
                Console.WriteLine("Table [{0}][{1}] = {2}--{3}", r, c, cell.Text, cell.Confidence);
            });
        });
    });
});
```

# Test

## Prerequisites

- [Install](https://dotnet.microsoft.com/download) .NET Core
- [Install](https://docs.aws.amazon.com/cli/latest/userguide/cli-chap-install.html)
  and
  [Configure](https://docs.aws.amazon.com/cli/latest/userguide/cli-chap-configure.html)
  AWS CLI

Then

- Download source code to your local machine
- Run the following at a command line inside the source code folder to execute

```
dotnet run
```

# Extra

upload file to S3

```
aws s3 cp test-files/employmentapp.png s3://<your-s3-bucket>
```


================================================
FILE: src-csharp/TextractExtensions.cs
================================================
using System;
using System.Collections.Generic;

namespace Amazon.Textract.Model {

	public class Word {
		public Word(Block block, List<Block> blocks) {
			this.Block = block;
			this.Confidence = block.Confidence;
			this.Geometry = block.Geometry;
			this.Id = block.Id;
			this.Text = block == null ? string.Empty : block.Text;
		}

		public Block Block { get; set; }
		public float Confidence { get; set; }
		public Geometry Geometry { get; set; }
		public string Id { get; set; }
		public string Text { get; set; }

		public override string ToString() {
			return Text;
		}
	}

	public class TextractDocument {
		private List<Block> blockMap = new List<Block>();
		List<List<Block>> documentPages = new List<List<Block>>();

		public TextractDocument(GetDocumentAnalysisResponse response) {
			this.Pages = new List<Page>();
			this.ResponsePages = new List<GetDocumentAnalysisResponse>();
			this.ResponsePages.Add(response);

			this.ParseDocumentPagesAndBlockMap();
			this.Parse();
		}

		private void ParseDocumentPagesAndBlockMap() {
			List<Block> documentPage = null;
			this.ResponsePages.ForEach(page => {
				page.Blocks.ForEach(block => {
					this.blockMap.Add(block);

					if(block.BlockType == "PAGE") {
						if(documentPage != null) {
							this.documentPages.Add(documentPage);
						}
						documentPage = new List<Block>();
						documentPage.Add(block);
					} else {
						documentPage.Add(block);
					}
				});
			});

			if(documentPage != null) {
				this.documentPages.Add(documentPage);
			}

		}

		private void Parse() {
			this.documentPages.ForEach(documentPage => {
				var page = new Page(documentPage, this.blockMap);
				this.Pages.Add(page);
			});
		}

		public Block GetBlockById(string blockId) {
			return this.blockMap.Find(x => x.Id == blockId);
		}

		public List<GetDocumentAnalysisResponse> ResponsePages { get; set; }
		public List<Page> Pages { get; set; }
		public List<List<Block>> PageBlocks {
			get {
				return this.documentPages;
			}
		}
	}

	public class Table {
		public Table(Block block, List<Block> blocks) {
			this.Block = block;
			this.Confidence = block.Confidence;
			this.Geometry = block.Geometry;
			this.Id = block.Id;
			this.Rows = new List<Row>();
			var ri = 1;
			var row = new Row();

			var relationships = block.Relationships;
			if(relationships != null && relationships.Count > 0) {
				relationships.ForEach(r => {
					if(r.Type == "CHILD") {
						r.Ids.ForEach(id => {
							var cell = new Cell(blocks.Find(b => b.Id == id), blocks);
							if(cell.RowIndex > ri) {
								this.Rows.Add(row);
								row = new Row();
								ri = cell.RowIndex;
							}
							row.Cells.Add(cell);
						});
						if(row != null && row.Cells.Count > 0)
							this.Rows.Add(row);
					}
				});
			}
		}
		public List<Row> Rows { get; set; }
		public Block Block { get; set; }
		public float Confidence { get; set; }
		public Geometry Geometry { get; set; }
		public string Id { get; set; }

		public override string ToString() {
			var result = new List<string>();
			result.Add(string.Format("Table{0}===={0}", Environment.NewLine));
			this.Rows.ForEach(r => {
				result.Add(string.Format("Row{0}===={0}{1}{0}", Environment.NewLine, r));
			});
			return string.Join("", result);
		}
	}

	public class SelectionElement {
		public SelectionElement(Block block, List<Block> blocks) {
			this.Confidence = block.Confidence;
			this.Geometry = block.Geometry;
			this.Id = block.Id;
			this.SelectionStatus = block.SelectionStatus;
		}
		public float Confidence { get; set; }
		public Geometry Geometry { get; set; }
		public string Id { get; set; }
		public string SelectionStatus { get; set; }

	}

	public class Row {
		public Row() {
			this.Cells = new List<Cell>();
		}
		public List<Cell> Cells { get; set; }

		public override string ToString() {
			var result = new List<string>();
			this.Cells.ForEach(c => {
				result.Add(string.Format("[{0}]", c));
			});
			return string.Join("", result);
		}
	}

	public class Page {
		public Page(List<Block> blocks, List<Block> blockMap) {
			this.Blocks = blocks;
			this.Text = string.Empty;
			this.Lines = new List<Line>();
			this.Form = new Form();
			this.Tables = new List<Table>();
			this.Content = new List<dynamic>();

			blocks.ForEach(b => {
				if(b.BlockType == "PAGE") {
					this.Geometry = new NewGeometry(b.Geometry);
					this.Id = b.Id;
				} else if(b.BlockType == "LINE") {
					var l = new Line(b, blockMap);
					this.Lines.Add(l);
					this.Content.Add(l);
					this.Text = this.Text + l.Text + Environment.NewLine;
				} else if(b.BlockType == "TABLE") {
					var t = new Table(b, blockMap);
					this.Tables.Add(t);
					this.Content.Add(t);
				} else if(b.BlockType == "KEY_VALUE_SET") {
					if(b.EntityTypes.Contains("KEY")) {
						var f = new Field(b, blockMap);
						if(f.Key != null) {
							this.Form.AddField(f);
							this.Content.Add(f);
						}
					}
				}
			});

		}

		public List<IndexedText> GetLinesInReadingOrder() {
			var lines = new List<IndexedText>();
			var columns = new List<Column>();
			this.Lines.ForEach(line => {
				var columnFound = false;
				for(var index = 0; index < columns.Count; index++) {
					var column = columns[index];
					var bb = line.Geometry.BoundingBox;
					var bbLeft = bb.Left;
					var bbRight = bb.Left + bb.Width;
					var bbCentre = bb.Left + (bb.Width / 2);
					var columnCentre = column.Left + (column.Right / 2);

					if((bbCentre > column.Left && bbCentre < column.Right) || (columnCentre > bbLeft && columnCentre < bbRight)) {
						lines.Add(new IndexedText { ColumnIndex = index, Text = line.Text });
						columnFound = true;
						break;
					}
				}
				if(!columnFound) {
					var bb = line.Geometry.BoundingBox;
					columns.Add(new Column { Left = bb.Left, Right = bb.Left + bb.Width });
					lines.Add(new IndexedText { ColumnIndex = columns.Count - 1, Text = line.Text });
				}
			});
			lines.FindAll(line => line.ColumnIndex == 0).ForEach(line => Console.WriteLine(line));
			return lines;
		}

		public string GetTextInReadingOrder() {
			var lines = this.GetLinesInReadingOrder();
			var text = string.Empty;
			lines.ForEach(line => {
				text = text + line.Text + "\n";
			});
			return text;
		}


		public List<Block> Blocks { get; set; }
		public string Text { get; set; }
		public List<Line> Lines { get; set; }
		public Form Form { get; set; }
		public List<Table> Tables { get; set; }
		public List<dynamic> Content { get; set; }
		public Geometry Geometry { get; set; }
		public string Id { get; set; }

		public override string ToString() {
			var result = new List<string>();
			result.Add(string.Format("Page{0}===={0}", Environment.NewLine));
			this.Content.ForEach(c => {
				result.Add(string.Format("{1}{0}", Environment.NewLine, c));
			});
			return string.Join("", result);
		}

		public class Column {
			public float Left { get; set; }
			public float Right { get; set; }

			public override string ToString() {
				return string.Format("Left: {0}, Right :{1}", this.Left, this.Right);
			}
		}

		public class IndexedText {
			public int ColumnIndex { get; set; }
			public string Text { get; set; }

			public override string ToString() {
				return string.Format("[{0}] {1}", this.ColumnIndex, this.Text);
			}
		}
	}

	public class NewGeometry : Geometry {

		public NewGeometry(Geometry geometry) : base() {
			this.BoundingBox = geometry.BoundingBox;
			this.Polygon = geometry.Polygon;
			var bb = new NewBoundingBox(this.BoundingBox.Width, this.BoundingBox.Height, this.BoundingBox.Left, this.BoundingBox.Top);
			var pgs = new List<Point>();
			Polygon.ForEach(pg => pgs.Add(new Point {
				X = pg.X,
				Y = pg.Y
			}));

			BoundingBox = bb;
			Polygon = pgs;
		}

		public override string ToString() {
			return string.Format("BoundingBox: {0}{1}", BoundingBox, Environment.NewLine);
		}


	}

	public class NewBoundingBox : BoundingBox {
		public NewBoundingBox(float width, float height, float left, float top) : base() {
			this.Width = width;
			this.Height = height;
			this.Left = left;
			this.Top = top;
		}

		public override string ToString() {
			return string.Format("width: {0}, height: {1}, left: {2}, top: {3}", Width, Height, Left, Top);
		}
	}

	public class Line {
		public Line(Block block, List<Block> blocks) {
			this.Block = block;
			this.Confidence = block.Confidence;
			this.Geometry = block.Geometry;
			this.Id = block.Id;
			this.Text = block == null ? string.Empty : block.Text;
			this.Words = new List<Word>();

			var relationships = block.Relationships;
			if(relationships != null && relationships.Count > 0) {
				relationships.ForEach(r => {
					if(r.Type == "CHILD") {
						r.Ids.ForEach(id => {
							this.Words.Add(new Word(blocks.Find(b => b.BlockType == "WORD" && b.Id == id), blocks));
						});
					}
				});
			}
		}

		public float Confidence { get; set; }
		public Geometry Geometry { get; set; }
		public string Id { get; set; }
		public List<Word> Words { get; set; }
		public string Text { get; set; }
		public Block Block { get; set; }

		public override string ToString() {
			return string.Format(@"
                Line{0}===={0}
                {1} {0}
                Words{0}----{0}
                {2}{0}
                ----
            ", Environment.NewLine, this.Text, string.Join(", ", this.Words));
		}
	}

	public class Form {
		public List<Field> Fields { get; set; }
		private Dictionary<string, Field> fieldMap;

		public Form() {
			this.Fields = new List<Field>();
			this.fieldMap = new Dictionary<string, Field>();
		}

		public void AddField(Field field) {
			this.Fields.Add(field);
			this.fieldMap.Add(field.Key.ToString(), field);
		}
		public Field GetFieldByKey(string key) {
			return this.fieldMap.GetValueOrDefault(key);
		}

		public List<Field> SearchFieldsByKey(string key) {
			return this.Fields.FindAll(f => f.Key.ToString().ToLower().Contains(key.ToLower()));
		}

		public override string ToString() {
			return string.Join("\n", this.Fields);
		}
	}

	public class FieldValue {
		public FieldValue(Block block, List<string> children, List<Block> blocks) {
			this.Block = block;
			this.Confidence = block.Confidence;
			this.Geometry = block.Geometry;
			this.Id = block.Id;
			this.Text = string.Empty;
			this.Content = new List<dynamic>();

			var words = new List<string>();
			if(children != null && children.Count > 0) {
				children.ForEach(c => {
					var wordBlock = blocks.Find(b => b.Id == c);
					if(wordBlock.BlockType == "WORD") {
						var w = new Word(wordBlock, blocks);
						this.Content.Add(w);
						words.Add(w.Text);
					} else if(wordBlock.BlockType == "SELECTION_ELEMENT") {
						var selection = new SelectionElement(wordBlock, blocks);
						this.Content.Add(selection);
						words.Add(selection.SelectionStatus);
					}
				});
			}

			if(words.Count > 0) {
				this.Text = string.Join(" ", words);
			}
		}
		public List<dynamic> Content { get; set; }
		public Block Block { get; set; }
		public float Confidence { get; set; }
		public Geometry Geometry { get; set; }
		public string Id { get; set; }
		public string Text { get; set; }

		public override string ToString() {
			return Text;
		}
	}

	public class FieldKey {
		public FieldKey(Block block, List<string> children, List<Block> blocks) {
			this.Block = block;
			this.Confidence = block.Confidence;
			this.Geometry = block.Geometry;
			this.Id = block.Id;
			this.Text = string.Empty;
			this.Content = new List<dynamic>();

			var words = new List<string>();

			if(children != null && children.Count > 0) {
				children.ForEach(c => {
					var wordBlock = blocks.Find(b => b.Id == c);
					if(wordBlock.BlockType == "WORD") {
						var w = new Word(wordBlock, blocks);
						this.Content.Add(w);
						words.Add(w.Text);
					}
				});
			}

			if(words.Count > 0) {
				this.Text = string.Join(" ", words);
			}

		}
		public List<dynamic> Content { get; set; }
		public Block Block { get; set; }
		public float Confidence { get; set; }
		public Geometry Geometry { get; set; }
		public string Id { get; set; }
		public string Text { get; set; }

		public override string ToString() {
			return Text;
		}
	}

	public class Field {
		public Field(Block block, List<Block> blocks) {
			var relationships = block.Relationships;
			if(relationships != null && relationships.Count > 0) {
				relationships.ForEach(r => {
					if(r.Type == "CHILD") {
						this.Key = new FieldKey(block, r.Ids, blocks);
					} else if(r.Type == "VALUE") {
						r.Ids.ForEach(id => {
							var v = blocks.Find(b => b.Id == id);
							if(v.EntityTypes.Contains("VALUE")) {
								var vr = v.Relationships;
								if(vr != null && vr.Count > 0) {
									vr.ForEach(vc => {
										if(vc.Type == "CHILD") {
											this.Value = new FieldValue(v, vc.Ids, blocks);
										}
									});
								}
							}
						});
					}
				});
			}
		}
		public FieldKey Key { get; set; }
		public FieldValue Value { get; set; }

		public override string ToString() {
			var k = this.Key == null ? string.Empty : this.Key.ToString();
			var v = this.Value == null ? string.Empty : this.Value.ToString();
			return string.Format(@"
                {0}Field{0}===={0}
                Key: {1}, Value: {2}
            ", Environment.NewLine, k, v);
		}
	}

	public class Cell {
		public Cell(Block block, List<Block> blocks) {
			this.Block = block;
			this.ColumnIndex = block.ColumnIndex;
			this.ColumnSpan = block.ColumnSpan;
			this.Confidence = block.Confidence;
			this.Content = new List<dynamic>();
			this.Geometry = block.Geometry;
			this.Id = block.Id;
			this.RowIndex = block.RowIndex;
			this.RowSpan = block.RowSpan;
			this.Text = string.Empty;

			var relationships = block.Relationships;
			if(relationships != null && relationships.Count > 0) {
				relationships.ForEach(r => {
					if(r.Type == "CHILD") {
						r.Ids.ForEach(id => {
							var rb = blocks.Find(b => b.Id == id);
							if(rb.BlockType == "WORD") {
								var w = new Word(rb, blocks);
								this.Content.Add(w);
								this.Text = this.Text + w.Text + " ";
							} else if(rb.BlockType == "SELECTION_ELEMENT") {
								var se = new SelectionElement(rb, blocks);
								this.Content.Add(se);
								this.Text = this.Text + se.SelectionStatus + ", ";
							}
						});
					}

				});
			}
		}
		public int RowIndex { get; set; }
		public int RowSpan { get; set; }
		public int ColumnIndex { get; set; }
		public int ColumnSpan { get; set; }
		public List<dynamic> Content { get; set; }
		public Block Block { get; set; }
		public float Confidence { get; set; }
		public Geometry Geometry { get; set; }
		public string Id { get; set; }
		public string Text { get; set; }

		public override string ToString() {
			return this.Text;
		}
	}
}

================================================
FILE: src-csharp/appsettings.json
================================================
{
  "AWS": {
    "Profile": "default",
    "Region": "us-west-2"
  }
}


================================================
FILE: src-csharp/parser.csproj
================================================
<Project Sdk="Microsoft.NET.Sdk">

  <PropertyGroup>
    <OutputType>Exe</OutputType>
    <TargetFramework>netcoreapp2.2</TargetFramework>
  </PropertyGroup>

<ItemGroup>
    <PackageReference Include="AWSSDK.Extensions.NETCore.Setup" Version="3.3.100.1" />
    <PackageReference Include="AWSSDK.S3" Version="3.3.102.12" />
    <PackageReference Include="AWSSDK.Textract" Version="3.3.101.23" />
    <PackageReference Include="Microsoft.Extensions.Configuration" Version="2.2.0" />
    <PackageReference Include="Microsoft.Extensions.Configuration.EnvironmentVariables" Version="2.2.4" />
    <PackageReference Include="Microsoft.Extensions.Configuration.Json" Version="2.2.0" />
    <PackageReference Include="System.Drawing.Common" Version="4.7.2" />
  </ItemGroup>

  <ItemGroup>
    <None Update="test-files\*">
      <CopyToOutputDirectory>Always</CopyToOutputDirectory>
    </None>
    
  </ItemGroup>

</Project>


================================================
FILE: src-js/.eslintrc.js
================================================
module.exports = {
  parser: "@typescript-eslint/parser", // Specifies the ESLint parser
  parserOptions: {
    ecmaVersion: 2020, // Allows for the parsing of modern ECMAScript features
    sourceType: "module" // Allows for the use of imports
  },
  plugins: ["@typescript-eslint", "prettier"],
  extends: [
    // "plugin:@typescript-eslint/recommended", // Uses the recommended rules from the @typescript-eslint/eslint-plugin
    // "prettier/@typescript-eslint", // Uses eslint-config-prettier to disable ESLint rules from @typescript-eslint/eslint-plugin that would conflict with prettier
    // "plugin:prettier/recommended", // Enables eslint-plugin-prettier and eslint-config-prettier. This will display prettier errors as ESLint errors. Make sure this is always the last configuration in the extends array.
    "eslint:recommended",
    "plugin:@typescript-eslint/eslint-recommended",
    "plugin:@typescript-eslint/recommended",
    "prettier",
  ],
  rules: {
    // Place to specify ESLint rules. Can be used to overwrite rules specified from the extended config
    // e.g. "@typescript-eslint/explicit-function-return-type": "off",
    "prettier/prettier": 2,
  },
};


================================================
FILE: src-js/.nvmrc
================================================
lts/hydrogen


================================================
FILE: src-js/.prettierrc.js
================================================
module.exports = {
  printWidth: 110,
};


================================================
FILE: src-js/CHANGELOG.md
================================================
# Changelog

## 0.4.3 (2024-11-19)
### Changed
- Bumped dev dependencies (including `cross-spawn`, `lint-staged`, `rollup`) for dependabot/audit
### Fixed
- `.html()` no longer fails on empty pages with no content (proposed fix for [this AWS re:Post question](https://repost.aws/questions/QU68wHh5vLSkiDC9Vt4lXXsw))

## 0.4.2 (2024-06-28)
### Added
- Filter content by block type in a variety of contexts, with `includeBlockTypes` (allow-list) and `skipBlockTypes` (deny-list) options. These filters are available in the core `iter/listContent()`, `Layout.iter/listItems()` and `LayoutItem.iter/listLayoutChildren()` accessors, but can also be used to hide certain content (like page headers and footers) when you render with `.html({...})`. ([#179](https://github.com/aws-samples/amazon-textract-response-parser/issues/179))
- Low-level relationship traversal via `iter/listRelatedItemsByRelType()` is now supported from `Page`s (PAGE blocks)
- New accessor on `SelectionElement.isSelected`, in convenient boolean format (versus the 2-member `.selectionStatus` enumeration)
- Form `Field.isCheckbox` and `FieldValue.isCheckbox`, check if a K->V field corresponds to a (label)->(checkbox) pair. Also added `{Field/FieldValue}.isSelected` and `.selectionStatus`, which return `null` for non-'checkbox' fields. (Pre-work for [#183](https://github.com/aws-samples/amazon-textract-response-parser/issues/183))
### Changed
- `WithContent` mixin options refactored to more closely mirror `IBlockTypeFilterOpts`, because WithContent now aligns to `iter/listRelatedItemsByRelType()` under the hood. This will give us more fine-grained but standardised control of missing and unexpected non-content child block type handling, per item class... But means some warning/error behaviour when parsing Textract JSON might have shifted a little (hopefully for the better).
- A page's `Layout` no longer keeps any internal list-of-items state, instead referring to the parent `PAGE` block's child relationships directly.

## 0.4.1 (2024-06-04)
### Added
- `iter/listRelatedItemsByRelType()` utility methods on all host-linked block wrapper objects, as most common use-cases for `relatedBlockIdsByRelType()` were just to then fetch the parsed wrapper for the retrieved block ID. Hope to further standardise across `childBlockIds`, `relatedBlockIdsByRelType`, and these new methods in a future release - but this might require some breaking changes to drive consistency in the handling of invalid JSONs (with missing block IDs, etc).
- `iter/listLayoutChildren()` utility methods to generically traverse (nested?) child layout elements. We support generic & recursive access, but today the only known nesting is LAYOUT_LIST->LAYOUT_TEXT.
### Fixed
- `html()`, `str()` and `text` representations of page `Layout` no longer duplicate the content of `LAYOUT_TEXT` children under `LAYOUT_LIST` obects. ([#177](https://github.com/aws-samples/amazon-textract-response-parser/issues/177))
### Deprecated
- Page `Layout.nItems` is ambiguous: Prefer `.nItemsTotal` for previous behaviour (counting all direct and indirect children) or `.nItemsDirect` to count only top-level layout items, excluding those referenced as children by others.

## 0.4.0 (2024-02-06)
### Added
- Load and navigate [Amazon Textract Layout analysis](https://aws.amazon.com/blogs/machine-learning/amazon-textracts-new-layout-feature-introduces-efficiencies-in-general-purpose-and-generative-ai-document-processing-tasks/) data. ([#164](https://github.com/aws-samples/amazon-textract-response-parser/issues/164))
- Serialize individual elements, pages and documents to semantic markup with `.html()` (for page and document level, currently depends on `Layout` being enabled).
- Proper support for [table title and footer elements](https://aws.amazon.com/blogs/machine-learning/announcing-enhanced-table-extractions-with-amazon-textract/) (`TABLE_TITLE` and `TABLE_FOOTER`) linked from tables. ([#171](https://github.com/aws-samples/amazon-textract-response-parser/issues/171))
- Support [signature detection results](https://aws.amazon.com/blogs/machine-learning/detect-signatures-on-documents-or-images-using-the-signatures-feature-in-amazon-textract/) (`SIGNATURE` blocks)
- More complete exposure of Textract API model constructs and `base.ts` utility functions in external-facing TRP API
### Changed
- **(BREAKING)** Previously-exposed `CellBase` class is removed, due to refactoring `Cell` and `MergedCell` to depend more on composable mixins and less on fragile hierarchy of (now internal) `CellBaseGeneric`. Use `Cell | MergedCell` instead for typing.
- `Page`s now explicitly track parsed objects in their scope by block ID, which reduced state tracking requirements for other objects (like `Line`, `Query`) as we work toward supporting more edit/mutation operations. See `IBlockManager.registerParsedItem()` and `.getItemByBlockId()` for details. This may result in some **minor warning & error behavior changes** when handling invalid or incomplete Textract JSON.
- Split out `api-models/document` types to better align with library components, and made some minor typing updates.
### Fixed
- `Table.nCells` now correctly reflects merged cells (instead of just counting all sub-cells).
- Support alternative `KEY` and `VALUE` blocks for Forms K-V data, observed in place of the typical `KEY_VALUE_SET` blocks for some test data files (Was this a temporary API issue? A change going forward? 🤷‍♂️)
### Deprecated
- `ApiBlockWrapper` base class is now slated to become internal-only: Please let us know if you have use-cases
- Various re-exports from `/api-modules/document` sub-module: Prefer importing direct from top-level
- `ApiAsyncJobOuputInProgress` typo superseded by `ApiAsyncJobOutputInProgress`, but original not yet fully removed

## 0.3.1 (2023-08-28)
### Fixed
- Suppress "content may be truncated" warnings when API `NextToken` is present but `null` ([#154](https://github.com/aws-samples/amazon-textract-response-parser/issues/154))
- Fix typed `TABLE_FOOTER` and `TABLE_SECTION_HEADER` EntityType values to match the [API doc](https://docs.aws.amazon.com/textract/latest/dg/API_Block.html) ([#158](https://github.com/aws-samples/amazon-textract-response-parser/issues/158))

## 0.3.0 (2023-07-31)
### Added
- **(BREAKING)** `ignoreMerged` and `repeatMultiRowCells` options on `Table` methods are now wrapped into `opts` objects for better future extensibility and clearer user code.
- Expose the `ignoreMerged` option through `Table.rowAt()`, `Table.iterRows()`, and `Table.listRows()`, to enable navigating table rows ignoring merged cells.
- Page-level access to [Amazon Textract Queries](https://docs.aws.amazon.com/textract/latest/dg/queryresponse.html) results. (Still assessing compositing architecture for a unified document-level view in future) ([#80](https://github.com/aws-samples/amazon-textract-response-parser/issues/80))
- Average OCR (text recognition) confidence is now available on form fields (and their keys and values) as well as tables, table rows, and table cells - via `getOcrConfidence()`, with configurable aggregation method (including minimum, mean, etc.).
- `EntityTypes` for tables and table cells/merged-cells are now accessible through `Table.tableType` property and `Cell.hasEntityTypes()` function - and also added to the underlying API data types. ([#78](https://github.com/aws-samples/amazon-textract-response-parser/issues/78))
### Changed
- **(BREAKING)** UMD module output `dist/umd` removed, following deprecation at v0.2.0 and no requests from users to restore it.
### Fixed
- Corrected wrongly typed `ApiCellBlock.Relationships` from an array of `ApiChildRelationship` to an optional array of same: This field may be omitted altogether when a cell is detected but has no content.
- Corrected wrongly typed `ApiKeyValueSetBlock.EntityTypes` data model from `ApiKeyValueEntityType` to an array of same.

## 0.2.2 (2023-06-19)
### Fixed
- Removed `browser` field from package.json because front end bundlers like webpack use it, and the (IIFE `dist/browser`) build it pointed to was not appropriate for these build systems. Added `jsdelivr` field in its place to help ensure direct-to-browser CDN imports continue to consume the IIFE build by default. ([Issue #139](https://github.com/aws-samples/amazon-textract-response-parser/issues/139))

## 0.2.1 (2023-05-22)
### Fixed
- `.geometry` on Expense result fields is now optional, as the underlying field may not be returned by Amazon Textract in some cases. Typings updated to reflect the fix. ([Issue #102](https://github.com/aws-samples/amazon-textract-response-parser/issues/102))

## 0.2.0 (2022-04-28)
### Added
- Initial support for Amazon Textract [identity document APIs](https://docs.aws.amazon.com/textract/latest/dg/how-it-works-identity.html).
- Document-level Form field access and querying via `TextractDocument.form` in addition to `Page.form`.
- `Page.pageNumber` to find and return 1-based index of the current page in the parent document.
- New ES (esnext) module output in `dist/es` and `module` hint in package.json to encourage compatible tools to use this output.
### Changed
- Use CommonJS `dist/cjs` as default NPM module format instead of previous UMD `dist/umd`.
- Separate type declarations into `dist/types` to reduce duplication and build size.
- Use new [merged table cells](https://aws.amazon.com/about-aws/whats-new/2022/03/amazon-textract-updates-tables-check-detection/) feature by default, rather than classic split cells.
- Eliminate trailing whitespace previously automatically added to Cell.text
### Deprecated
- UMD module output `dist/umd` slated to be removed in a future version: Please let us know if the other format options don't work for you!

## 0.1.2 (2021-12-16)
### Added
- Header and footer segmentation utility (by text `LINE`)
### Changed
- Significantly improved `inReadingOrder` results for multi-column documents.


================================================
FILE: src-js/README.md
================================================
# Textract Response Parser for JavaScript/TypeScript

This library loads [Amazon Textract](https://docs.aws.amazon.com/textract/latest/dg/what-is.html) API response JSONs into structured classes with helper methods, for easier post-processing.

It's designed to work in both NodeJS and browser environments, and to support projects in either JavaScript or TypeScript.

> ⚠️ **Warning:** If you're migrating from another TRP implementation such as the [Textract Response Parser for Python](https://github.com/aws-samples/amazon-textract-response-parser/tree/master/src-python), please note that the APIs and available features may be substantially different. Please let us know if there's a feature you're missing!


## Installation

You can use TRP in your JavaScript or TypeScript NPM projects:

```sh
$ npm install amazon-textract-response-parser
```

```js
// With CommonJS-style require:
const { TextractDocument, TextractIdentity } = require("amazon-textract-response-parser");
// Or ES-style module imports:
import { TextractDocument, TextractExpense } from "amazon-textract-response-parser";
```

...Or link directly in the browser - for example via a CDN like [unpkg](https://unpkg.com/):

```html
<script src="https://unpkg.com/amazon-textract-response-parser@x.y.z"></script>

<script>
  // Use the main parser classes:
  var doc = new trp.TextractDocument(...);
  // Or other exported utility functions/classes/enums/etc:
  var avg = trp.aggregate([1, 2, 3], trp.AggregationMethod.Mean);
</script>
```

To enable this, the distribution of this library provides multiple builds:

- `dist/cjs` (default `main`), for CommonJS environments like NodeJS - including most front end applications built with tools like React and Webpack.
- `dist/es` (default `module`), for ES6/ES2015/esnext capable environments.
- `dist/browser` (default `jsdelivr` and `unpkg`), for linking directly from browser HTML with no module framework (IIFE).

This means that **deep imports** will depend on your build environment, but are generally discouraged anyway and may not work correctly with TypeScript. Check out the [examples/](examples/README.md) folder on GitHub for some basic starters using the different styles.


## Loading data

Initialize a `TextractDocument` (or `TextractExpense`, `TextractIdentity`) by providing the parsed response JSON object from the corresponding [Amazon Textract APIs](https://docs.aws.amazon.com/textract/latest/dg/API_Reference.html) such as [GetDocumentAnalysis](https://docs.aws.amazon.com/textract/latest/dg/API_GetDocumentAnalysis.html), [AnalyzeID](https://docs.aws.amazon.com/textract/latest/dg/API_AnalyzeID.html), or [AnalyzeExpense](https://docs.aws.amazon.com/textract/latest/dg/API_AnalyzeExpense.html). In most cases, providing an **array** of response objects is also supported (for use when a large Amazon Textract response was split/paginated).

For example, loading a response JSON from file in NodeJS:

```js
fs.readFile("./my-analyze-document-response.json", (err, resBuffer) => {
  if (err) throw err;
  const doc = new TextractDocument(JSON.parse(resBuffer));
  // ...
});
```

If you're using TypeScript, you may need to **typecast** your input JSON while loading it.

> The `ApiResponsePage` input interface exposed and expected by this module is more constrained than - but functionally compatible with - the output types produced by the [AWS SDK for JavaScript Textract Client](https://docs.aws.amazon.com/AWSJavaScriptSDK/v3/latest/clients/client-textract/index.html).

```typescript
import { ApiAnalyzeExpenseResponse } from "amazon-textract-response-parser";
import { TextractClient, AnalyzeExpenseCommand } from "@aws-sdk/client-textract";
const textract = new TextractClient({});

async function main() {
  const textractResponse = await textract.send(
    new AnalyzeExpenseCommand({
      Document: { Bytes: await fs.readFile("...") },
    })
  );
  const expense = new TextractExpense((textractResponse as unknown) as ApiAnalyzeExpenseResponse);
}
```

With your data loaded in to a TRP `TextractDocument` or similar, you're ready to take advantage of the higher-level TRP.js functions to navigate and analyze the result.


## Generic document text navigation

In general, this library avoids directly exposing **arrays** in results (see the *Mutation operations* section below). Instead, you can use:

- `.n***` properties to count items
- `.list***()` functions to return a copy of the underlying array
- `.iter***()` functions to iterate through collections, or
- `.***At***()` functions to fetch a specific item from a collection

For example:

```typescript
// Navigate the document hierarchy:
console.log(`Opened doc with ${doc.nPages} pages`);
console.log(
  `The first word of the first line is ${doc.pageNumber(1).lineAtIndex(0).wordAtIndex(0).text}`
);

// Iterate through content:
for (const page of doc.iterPages()) {
  // (In Textract's output order...)
  for (const line of page.iterLines()) {
    for (const word of line.iterWords()) {
      console.log(word.text);
    }
  }
}

// ...Or get snapshot arrays instead of iterators, if you need:
const linesArrsByPage = doc.listPages().map((p) => p.listLines());
```

These arrays are in the raw order returned by Amazon Textract, which is not necessarily a logical human reading order - especially for multi-column documents. See the *Layout analysis* and *List text in approximate reading order* sections below for extra content sorting utilities.


## Queries

The results of [Amazon Textract Queries](https://docs.aws.amazon.com/textract/latest/dg/queryresponse.html) are accessible at the page level under `page.queries`. You can `get*` a query by exact question text or alias, or `search*` them by case-insensitive substrings:

```typescript
doc.listPages().forEach((page) => {
  // Log a quick human-readable overview of queries & answers:
  console.log(page.queries.str());

  // Get a query (and its top result's text) by exact alias:
  const customer = page.queries.getQueryByAlias("customer_name")?.topResult?.text;

  // Get possible results of a query from most to least confident:
  const shippingAddrCandidates =
    page.queries.getQueryByAlias("shipping_addr")?.listResultsByConfidence() || [];
  const shippingAddrTopConf = shippingAddrCandidates[0].confidence;

  // Seaching matches queries e.g. 'What is the Shipping Address?', 'FIND THE BILLING ADDRESS', etc
  const addrQueries = page.queries.searchQueriesByQuestion("address");
});
```


## Forms (Key-Value pairs)

As well as looping through the [form data key-value pairs](https://docs.aws.amazon.com/textract/latest/dg/how-it-works-kvp.html) in the document, you can query fields by key:

```typescript
console.log(doc.form.nFields);
const fields = doc.form.listFields();

// Exact match:
const addr = doc.form.getFieldByKey("Address").value?.text;

// Search key containing (case-insensitive):
const addresses = doc.form.searchFieldsByKey("address");
addresses.forEach((addrField) => { console.log(addrField.key.text); });
```

Note that the `Field.confidence`, `FieldKey.confidence` and `FieldValue.confidence` scores reflect confidence of the **key-value structure detection** model. For aggregated OCR confidence of their **actual text**, use `.getOcrConfidence()` instead.

You can also search form keys at the individual page level, or look up the page number for detected fields:

```typescript
const fieldByDoc = doc.form.getFieldByKey("Address");
console.log(`Detected Address on page ${fieldByDoc.parentPage.pageNumber}`);

const page = doc.pageNumber(1);
const fieldByPage = page.form.getFieldByKey("Address");
```

`field.isCheckbox` is true for fields whose value contain exactly one SelectionElement object: meaning they're a (key=label)->(value=checkbox/radio) pair. For these fields, you can directly use `field.selectionStatus` or `field.isSelected` to look up the value's status. For other (non-checkbox) fields, they'll return `null`.


## Tables

This library's table navigation tools address **[merged cells](https://docs.aws.amazon.com/textract/latest/dg/how-it-works-tables.html) by default**, for convenience.

```typescript
console.log(page.nTables);
const table = page.tableAtIndex(0);

// Index cells by row, column, or both:
const headerStrs = table.cellsAt(1, null)?.map(cell => cell.text);
const firstColCells = table.cellsAt(null, 1);
const targetCell = table.cellAt(2, 4);

// Iterate over rows/cells:
for (const row of table.iterRows()) {
  for (const cell of row.iterCells()) {
    console.log(cell.text);
  }
}
```

Further configuration arguments can be used to change the treatment of merged cells if needed:

```typescript
// Iterate over rows repeating any cells spanning multiple rows:
for (const row of table.iterRows({repeatMultiRowCells: true})) {}

// Return split sub-cells instead of merged cells when indexing:
const firstColCellFragments = table.cellsAt(null, 1, {ignoreMerged: true});
```

The `Table.confidence`, `Row.getConfidence()` and `Cell.confidence` scores reflect confidence of the **table structure detection** model. For aggregated OCR confidence of the text contained inside, use `.getOcrConfidence()` instead.

Use `Table.tableType` and `Cell.hasEntityTypes()` to explore the more advanced [entity types](https://docs.aws.amazon.com/textract/latest/dg/how-it-works-tables.html) extracted by Amazon Textract: For example column headers, title cells, footer cells, and summary cells:

```typescript
import { ApiTableCellEntityType, ApiTableEntityType } from "amazon-textract-response-parser";

const isSemiStruct = table.tableType === ApiTableEntityType.SemiStructuredTable;
const colHeaders = table.rowAt(1).listCells()
  .filter((c) => c.hasEntityTypes(ApiTableCellEntityType.ColumnHeader));
```

For [overall table-level title and footer captions](https://aws.amazon.com/blogs/machine-learning/announcing-enhanced-table-extractions-with-amazon-textract/), see `table.listTitles()` and `table.listFooters()`, etc.


## Layout analysis

[Layout analysis in Amazon Textract](https://aws.amazon.com/blogs/machine-learning/amazon-textracts-new-layout-feature-introduces-efficiencies-in-general-purpose-and-generative-ai-document-processing-tasks/) detects higher-level semantic components than the core text Lines & Words - like paragraphs and headings. If you enabled this analysis, you can access the results through the `page.layout` collection:

```typescript
// Loop through content in implied reading order (from Layout API):
page.layout.listItems().forEach((layItem) => {
  console.log(layItem.blockType);  // There are different kinds of Layout Item
  const textLines = layItem.listTextLines();  // All Layout* items can be queried for text LINEs
  const children = layItem.listContent();  // Usually text LINEs, but sometimes other Layout* items
  console.log(layItem.text + "\n");  // ...Or you can just pull up the text
});

// Filtering by content type is also supported:
for (const layItem of page.layout.listItems({
  skipBlockTypes: [
    ApiBlockType.LayoutHeader, ApiBlockType.LayoutFooter, ApiBlockType.LayoutPageNumber
  ],
})) {
  console.log(layItem.text);
}
```

If Forms and/or Tables analyses were also enabled, you'll be able to traverse from the relevant Layout object types to these more detailed representations. **However,** because these are separate analyses the correspondence may not be 1-to-1 and TRP is having to do some reconciliation under the hood:

```typescript
import { ApiBlockType, LayoutKeyValue, LayoutTable } from "amazon-textract-response-parser";

page.layout.listItems().forEach((layItem) => {
  if (layItem.blockType === ApiBlockType.LayoutKeyValue) {
    const fields = (layItem as LayoutKeyValue).listFields(); // Probably multiple
    fields.forEach((field) => console.log(field.key.text));
  } else if (layItem.blockType === ApiBlockType.LayoutTable) {
    const tables = (layItem as LayoutTable).listTables(); // Probably just 1
    tables.forEach((table) => console.log(table.nCells));
  }
});
```


### List text in approximate reading order (with or without `Layout`)

Particularly for multi-column documents, the default output sequence for Amazon Textract `LINE`/`WORD` OCR results will likely not be the overall reading order you'd like. For best performance, enable and use the `Layout` analysis because **layout items are returned in implied reading order** as estimated by the AI service.

Alternatively, TRP.js provides a **client-side heuristic algorithm** that can attempt to sort results without Layout. There are even some configuration parameters exposed to help you tune the results for your particular domain, and test harnesses in the [tests/unit/corpus folder](tests/unit/corpus) to help you experiment via `npm run test:unit`:

```typescript
import { ReadingOrderLayoutMode } from "amazon-textract-response-parser";

// By default, we automatically use `Layout` when it's available and heuristics when it's not:
let textInReadingOrder: string = page.getTextInReadingOrder();  // Just generate text
let pseudoParas = page.getLineClustersInReadingOrder();

// You can force use of `Layout` (throwing an error if none available):
let layText = page.getTextInReadingOrder({ useLayout: ReadingOrderLayoutMode.RequireLayout });
// Or fine-tune heuristic parameters:
let layParas = page.getLineClustersInReadingOrder({
  colHOverlapThresh = 0.75,
  paraVDistTol = 0.8,
  // ...
  useLayout: ReadingOrderLayoutMode.IgnoreLayout,
});

// Lines are clustered by "paragraph"/layout element:
for (const pseudoParagraph of pseudoParas) {
  for (const line of pseudoParagraph) {
    console.log(line.text);
  }
  console.log();  // Print a gap between "paragraphs"
}
```

When configured to use Layout analysis results, these functions should be equivalent to just looping through your `page.layout.iterItems()` to get the text from each one in order.


### Render documents to semantic markup/markdown

If you'd like to use AI/ML models to further post-process your Amazon Textract results, you have a choice between those that take text-only inputs - and "multi-modal" models that can also ingest structural information (see for example [this Amazon Comprehend feature](https://aws.amazon.com/about-aws/whats-new/2021/09/amazon-comprehend-extract-entities-native-format/) and [this Amazon SageMaker sample](https://github.com/aws-samples/amazon-textract-transformer-pipeline/tree/main)). While multi-modal models may work best on complex structured documents, the pace of research on text-only Large Language Models has historically been faster (perhaps because plain text data is easier to come by and work with).

**Semantic markup like HTML** provides somewhat of a middle ground where we can try to preserve the layout/form/table/etc structure Amazon Textract extracted, but still provide plain text. This may be particularly useful for working with **Generative Large Language Models** (GenAI/LLMs) like those on [Amazon Bedrock](https://aws.amazon.com/bedrock/).

```typescript
// Render HTML for individual components:
console.log(page.listTables[0].html());

// ...Or for whole pages/documents:
const docHtml = doc.html();
fs.writeFile("./my-doc.html", docHtml, (err) => {});
```

Some caveats to be aware of:

- Top-level `Page.html()` and `TextractDocument.html()` currently depend on Layout analysis being enabled, because the Layout results are used to sequence all the elements together.
- Only HTML is supported currently, but we're keen to add `.markdown()` if there's interest

You can also **filter out** types of content you don't want to include in your HTML. 

```typescript
// Most commonly, you'll `skip` high-level layout elements like `LayoutHeader`:
const docHtml = doc.html({
  skipBlockTypes: [
    ApiBlockType.LayoutHeader, ApiBlockType.LayoutFooter, ApiBlockType.LayoutPageNumber
  ],
});

// Skipping lower-level blocks is also possible, but can produce weird results:
const docHtmlNoCellsOrSelectors = doc.html({
  skipBlockTypes: [ApiBlockType.Cell, ApiBlockType.SelectionElement],
});

// Allow-listing is also possible, but you should include *everything* relevant:
const docTablesHtml = doc.html({
  includeBlockTypes: [
    ApiBlockType.Page,
    ApiBlockType.LayoutTable,
    ApiBlockType.Table,
    ApiBlockType.Cell,
    ApiBlockType.SelectionElement,
    ApiBlockType.Word,
  ],
});
```

If you have feedback about these features, please let us know in the GitHub issues to help prioritise!


### Segment headers and footers from main content

This is another task for which you might find [Textract Layout analysis](https://docs.aws.amazon.com/textract/latest/dg/layoutresponse.html) useful - by looping through layout items and filtering out those of type `LayoutHeader`, `LayoutFooter` and `PageNumber`.

However, TRP.js also provides a heuristic function you can try instead:

```typescript
const segmented = page.getLinesByLayoutArea(
  true  // (Also try to sort lines in reading order)
);

console.log("---- HEADER:")
console.log(segmented.header.map((l) => l.text).join("\n"));
console.log("\n---- CONTENT:")
console.log(segmented.content.map((l) => l.text).join("\n"));
console.log("\n---- FOOTER:")
console.log(segmented.footer.map((l) => l.text).join("\n"));
```

**Note:** Unlike the `*inReadingOrder` APIs, this utility has not yet been updated to use Textract Layout analysis when it's available. That behavior might change in future.


### Calculate average skew of page text

Calculating the overall skew of a page can be useful for validation checks: For example to detect and reject a strongly skewed image which might degrade the accuracy of tables, forms, or other downstream analyses.

```typescript
// Check the average angle/skew of detected text:
const skew = page.getModalWordOrientationDegrees();
```

This method aggregates the skew to find the most common angle across all content on the page.


## Signatures

If you enabled [signature detection in Amazon Textract](https://aws.amazon.com/blogs/machine-learning/detect-signatures-on-documents-or-images-using-the-signatures-feature-in-amazon-textract/), you can check for signatures at the page level:

```typescript
// e.g. print number of signatures detected by page:
doc.listPages()
      .forEach((page, ix) => { console.log(`${page.nSignatures} signatures on page ${ix+1}`); });
// ...Or get the position of the first signature on the first page:
const bbox = doc.pageNumber(1).listSignatures()[0].geometry.boundingBox;
```


## Expense (invoice and receipt) objects

Since the format of responses for Amazon Textract's [Expense results](https://docs.aws.amazon.com/textract/latest/dg/expensedocuments.html) is very different from the [general document analysis APIs](https://docs.aws.amazon.com/textract/latest/dg/how-it-works-document-layout.html), you can use the separate `TextractExpense` class in this library to process these.

```typescript
const expense = new TextractExpense(textractResponse);

// Iterate through content:
console.log(`Found ${expense.nDocs} expense docs in file`);
const expenseDoc = [...expense.iterDocs()][0];
for (const group of expenseDoc.iterLineItemGroups()) {
  for (const item of group.iterLineItems()) {
    console.log(`Found line item with ${item.nFields} fields`);
    for (const field of item.iterFields()) {
      ...
    }
  }
}

// Get snapshot arrays instead of iterators, if you need:
const summaryFieldsArrByDoc = expense.listDocs().map((doc) => doc.listSummaryFields());
const linesArrsByPage = doc.listPages().map((p) => p.listLines())

// Retrieve item fields by their tagged 'type':
const vendorNameFields = expenseDoc.searchSummaryFieldsByType("VENDOR_NAME");
console.log(`Found ${vendorNameFields.length} vendor name fields in doc summary`);
console.log(vendorNameFields[0].fieldType.text); // "VENDOR_NAME"
console.log(vendorNameFields[0].value.text); // e.g. "Amazon.com"
```


## Identity document objects

Similarly to expenses mentioned above, Amazon Textract offers specific APIs for [identity document analysis](https://docs.aws.amazon.com/textract/latest/dg/how-it-works-identity.html). You can use the separate `TextractIdentity` class in this library to process these.

```typescript
import { ApiAnalyzeIdResponse, TextractIdentity } from "amazon-textract-response-parser";
import { TextractClient, AnalyzeIDCommand } from "@aws-sdk/client-textract";
const textract = new TextractClient({});

async function main() {
  const textractResponse = await textract.send(
    new AnalyzeIDCommand({
      Document: { Bytes: await fs.readFile("...") },
    })
  );
  const identity = new TextractIdentity((textractResponse as unknown) as ApiAnalyzeIdResponse);
}
```

The library implements some enumerations of known values (for field types, ID types, and so on) to make processing AnalyzeID responses a little simpler:

```typescript
import { IdDocumentType, IdFieldType } from "amazon-textract-response-parser";

const idDoc = identity.getDocAtIndex(0); // (Or iterate, list docs in a result)

if (idDoc.idType === IdDocumentType.Passport) {
  // Fetch fields by known type:
  const passNumField = idDoc.getFieldByType(IdFieldType.DocumentNumber);
  console.log(
    `Passport number ${passNumField.value}, confidence ${passNumField.valueConfidence}%`
  );

} else if (idDoc.idType === IdDocumentType.DrivingLicense) {
  // ...Or list or iterate the document's fields:
  for (const field of idDoc.iterFields()) {
    console.log(`${field.fieldTypeRaw}: ${field.valueRaw}`);
  }

} else {
  // Produce human-readable representations of fields, documents, or whole responses:
  console.log(idDoc.str());
}
```


## Mutation operations

Easier analysis and querying of Textract results is useful, but what if you want to augment or edit your Textract JSONs with JS/TS Textract Response Parser?

In general:

- Where the library classes (`TextractDocument`, `Page`, `Word`, etc) offer mutation operations, these should modify the source API JSON object **in-place** and ensure self-consistency.
- For library classes that are backed by a specific object in the source API JSON, you can access it via the `.dict` property (`word.dict`, `table.dict`, etc) but then *you're* responsible for updating any required references in other objects if making changes there.
- Any individual-block-level changes you make to the underlying API JSON should be dynamically reflected in the parsed TRP objects (e.g. overriding word text, coordinates, etc)... But changes that affect inter-block relationships are more likely to cause staleness issues.

In particular for **array properties**, you'll note that TRP generally exposes getters and iterators (such as `table.nRows`, `table.iterRows()`, `table.listRows()`, `table.cellsAt()`) rather than direct access to lists - to avoid implying that arbitrary array mutations (such as `table.rows.pop()`) are properly supported.


## Other features and examples

For more examples on how to use the library, you can refer to the (basic) [examples](examples/) and (more complete) [test](test/) folders on GitHub, and the source code itself. If you have suggestions for additional features that would be useful, please open a GitHub issue!


## Development

The integration tests for this library validate the end-to-end toolchain for calling Amazon Textract and parsing the result, so note that to run the full `npm run test` command:

1. Your environment will need to be configured with a login to AWS (e.g. via the [AWS CLI](https://aws.amazon.com/cli/))
2. Billable API requests may be made

You can alternatively run just the local/unit tests via `npm run test:unit`.


================================================
FILE: src-js/bin/reading-order-diagnostic.js
================================================
/**
 * Basic script to extract and save reading-order text from Amazon Textract JSONs.
 *
 * This script uses the built NodeJS library, so check your build is up-to-date by first running
 * `npm run build`! JSON files are read from IN_FOLDER, parsed with the TRP, and reading-order text
 * files output to the OUT_FOLDER. This can be a helpful tool for debugging issues with (or writing
 * corpus tests for) the 'inReadingOrder' functions.
 */
/* eslint-disable no-undef */
/* eslint-disable @typescript-eslint/no-var-requires */

// Node Built-Ins:
const fs = require("fs");
const path = require("path");

// Local Dependencies:
const { TextractDocument } = require("../dist/cjs");

const IN_FOLDER = "test/data/corpus";
const OUT_FOLDER = "test/data/corpus-readingorder";

if (!fs.existsSync(OUT_FOLDER)) {
  fs.mkdirSync(OUT_FOLDER, { recursive: true });
}

fs.readdirSync(IN_FOLDER).forEach((file) => {
  let response;
  try {
    response = JSON.parse(fs.readFileSync(path.join(IN_FOLDER, file)));
  } catch (err) {
    console.error(`Skipping ${file} - doesn't look like valid JSON`);
    return;
  }
  const pageTexts = [];
  const doc = new TextractDocument(response);
  doc.listPages().forEach((page, ixPage) => {
    pageTexts.push(
      [
        "------------------------------------------------",
        `PAGE ${ixPage + 1}`,
        "------------------------------------------------",
        page.getTextInReadingOrder(),
      ].join("\n")
    );
  });

  const outFileRoot = path.join(OUT_FOLDER, file.split(".")[0]);
  fs.writeFileSync(`${outFileRoot}.readingorder.txt`, pageTexts.join("\n\n\n"));
  fs.writeFileSync(
    `${outFileRoot}.readingorder.json`,
    JSON.stringify(
      doc
        .listPages()
        .map((page) =>
          page._getLineClustersByColumn().map((col) => col.map((cluster) => cluster.map((line) => line.text)))
        ),
      null,
      2
    )
  );
  console.log(`Done ${file}`);
});
console.log("All done!");


================================================
FILE: src-js/examples/README.md
================================================
# Examples for TRP.js

This folder contains example projects using the Amazon Textract Response Parser for JavaScript/TypeScript from various different build environments, to help you get started.

> ⚠️ **Note:** While all of the example projects reference local API response JSON files, some also make Amazon Textract API calls by default - so running them may incur (typically very small) charges. See [Amazon Textract Pricing](https://aws.amazon.com/textract/pricing/) for details.


## Pre-requisites for running the examples


### Local builds of TRP.js

The projects use the **local build** of the library for pre-publication testing, so you'll need to run `npm run build` in the parent `src-js` folder before they'll work.

To instead switch to published TRP.js versions (if you're using an example as a skeleton for your own project):

- For NodeJS projects, Replace the package.json relative path in `"amazon-textract-response-parser": "file:../.."` with a normal version spec like `"amazon-textract-response-parser": "^0.4.3"`, and re-run `npm install`
- For browser IIFE projects, edit the `<script>` tag in the HTML to point to your chosen CDN or downloaded `trp.min.js` location


### API credentials for Amazon Textract

For the example projects that demonstrate actual integration with Amazon Textract, we create a [TextractClient](https://docs.aws.amazon.com/AWSJavaScriptSDK/v3/latest/client/textract/) with empty [configuration](https://docs.aws.amazon.com/AWSJavaScriptSDK/v3/latest/Package/-aws-sdk-client-textract/TypeAlias/TextractClientConfigType/). This assumes that your AWS IAM **credentials** and default **region** are pre-configured for access through e.g. environment variables.

If you're new to setting up AWS credentials for CLI and SDK access in general, refer to the credentials guidance in the [AWS SDK for JavaScript (v3) Developer Guide](https://docs.aws.amazon.com/sdk-for-javascript/v3/developer-guide/setting-credentials.html) and/or the [AWS CLI user guide](https://docs.aws.amazon.com/cli/latest/userguide/cli-configure-files.html).


## Working with multi-page documents or many documents at once

The ['synchronous' request/response APIs](https://docs.aws.amazon.com/textract/latest/dg/sync.html) used in these examples generally only support images or single-page documents. Multi-page documents will need to use [Asynchronous Textract APIs](https://docs.aws.amazon.com/textract/latest/dg/async.html) instead. Since Asynchronous APIs like [StartDocumentAnalysis](https://docs.aws.amazon.com/AWSJavaScriptSDK/v3/latest/client/textract/command/StartDocumentAnalysisCommand/) return a **job ID** rather than an immediate result, applications will need to wait and [GetDocumentAnalysis](https://docs.aws.amazon.com/AWSJavaScriptSDK/v3/latest/client/textract/command/GetDocumentAnalysisCommand/) to retrieve the result once it's ready. You'll also need to **upload the source document to Amazon S3** rather than passing it directly in the API request.

Furthermore, Amazon Textract applies [quota limits](https://docs.aws.amazon.com/textract/latest/dg/limits-quotas-explained.html) on these APIs.

As a result, applications processing multi-page documents will generally need to orchestrate uploading the source file to S3; starting the analysis job; and resuming the processing flow once [notified via Amazon SNS](https://docs.aws.amazon.com/textract/latest/dg/async-notification-payload.html) that the analysis is ready (which is much more quota-efficient than polling the `GetDocumentAnalysis` API)... Particularly spiky workflows (where many documents are submitted at once) may also want to implement queuing to manage inbound request rates.

A full end-to-end solution for this involves deploying cloud infrastructure like AWS Lambda functions and Amazon SNS topics, so is outside the scope of these TRP samples. Instead, refer to:

- [Amazon Textract IDP CDK Constructs](https://github.com/aws-samples/amazon-textract-idp-cdk-constructs) for composable, deployable solution components written in [AWS CDK](https://aws.amazon.com/cdk/).
- [Amazon Textract Textractor](https://github.com/aws-samples/amazon-textract-textractor/) which mainly provides Python bindings, but also a [handy CLI](https://aws-samples.github.io/amazon-textract-textractor/commandline.html) for processing a batch of documents for a quick PoC.
- Other code samples listed in the [Amazon Textract Developer Guide](https://docs.aws.amazon.com/textract/latest/dg/service_code_examples.html).


================================================
FILE: src-js/examples/browser-iife/main.html
================================================
<!DOCTYPE html>
<html>
  <head>
    <!--
      Add a <script> tag to your page to load the IIFE version of TRP.js
      
      In this example we use the local development version of the library via the dist/ folder
      (which means you'll need to have run `npm run build`!).
    -->
    <script src="../../dist/browser/trp.min.js"></script>
    <!--
      To use the published versions of the library through a CDN like jsDelivr or UNPKG, you might
      set up your tag something like the below.

        <script
          src="https://cdn.jsdelivr.net/npm/amazon-textract-response-parser@0.4.3"
          integrity="sha384-alwjvV+x6bUphISfuDH4sYFrZGG8wUyerOE69EjRDcgkKbLNzYg4lWvDYp+L+7W9"
          crossorigin="anonymous"
        ></script>
      
      or:

        <script
          src="https://unpkg.com/amazon-textract-response-parser@0.4.3"
          integrity="sha384-alwjvV+x6bUphISfuDH4sYFrZGG8wUyerOE69EjRDcgkKbLNzYg4lWvDYp+L+7W9"
          crossorigin="anonymous"
        ></script>

      These CDNs should resolve `/dist/browser/trp.min.js` as the default file automatically, but
      you can specify it explicitly at the end of the URL if you prefer. You could check the
      expected integrity hash via a tool like https://www.srihash.org/
    -->
  </head>
  <body>
    <!--
      After loading the TRP script above, the global `window.trp` object should be set for your
      code to use. In this example we've put the example JavaScript in main.js - but you could
      inline it to the <script> tag in this HTML if you prefer:
    -->
    <script src="./main.js"></script>
    <!--
      Browser JavaScript generally can't access the local filesystem for security reasons, so this
      example uses a file upload control which the test runner will automatically operate:
    -->
    <h1>Select an Amazon Textract API response JSON file to analyze</h1>
    <input id="textractJson" type="file" onchange="onFileUpload(this)"/>
  </body>
</html>


================================================
FILE: src-js/examples/browser-iife/main.js
================================================
/**
 * Browser Javascript for main.html to demonstrate using TRP.js via IIFE <script> tag.
 *
 * This script assumes the TRP.js IIFE bundle has already been included (e.g. via another <script>
 * tag), to define the global `trp` object.
 *
 * Basic initial tests run as soon as the script is executed, but we can only demonstrate loading
 * a Textract JSON once the HTML file input is updated, which will trigger `onFileUpload()`.
 */

// Set a global flag which will be polled by Puppeteer to determine when the tests have succeeded:
window.fileProcessed = false;

// Run initial tests that don't require the Textract JSON:
console.log("Checking some expected components are accessible through IIFE...");
const CHILD = trp.ApiRelationshipType.Child;
const ApiTextType = trp.ApiTextType;
const ApiKeyValueEntityTypeEnum = trp.ApiKeyValueEntityType;
const ApiJobStatus = trp.ApiJobStatus;
const ApiTableEntityType = trp.ApiTableEntityType;
const aggregate = trp.aggregate;
const Word = trp.Word;
const TexDoc = trp.TextractDocument;
const TexExp = trp.TextractExpense;
const FormGeneric = trp.FormGeneric;
const Geometry = trp.Geometry;
const TexId = trp.TextractIdentity;
const QueryResultGeneric = trp.QueryResultGeneric;
const TableGeneric = trp.TableGeneric;
console.log("Testing aggregate() utility function...");
const aggTest = aggregate([1, 2, 3, 4], trp.AggregationMethod.Mean);
if (aggTest !== 2.5) {
  throw new Error(`Expected trp.aggregate([1, 2, 3, 4], "MEAN") to return 2.5: Got ${aggTest}`);
}

/**
 * Demonstrate basic parsing and analysis of a Textract response JSON with TRP
 *
 * @param {string} textractJsonStr
 */
function parseDocResult(textractJsonStr) {
  console.log("Loading Amazon Textract response JSON with TRP...");
  const doc = new trp.TextractDocument(JSON.parse(textractJsonStr));

  console.log(`Got ${doc.nPages} pages`);
  console.log("Testing getLineClustersInReadingOrder()");
  doc.pageNumber(1).getLineClustersInReadingOrder();

  console.log("Marking tests finished");
  window.fileProcessed = true;
}

/**
 * Extract JSON content when user selects a file, and call `parseDocResult()` with it.
 *
 * @param {HTMLInputElement} fileInput The type="file" input that generated the event
 */
function onFileUpload(fileInput) {
  const file = fileInput.files[0];
  if (file) {
    const reader = new FileReader();
    reader.onload = (evt) => {
      parseDocResult(evt.target.result);
    };
    reader.onerror = (err) => {
      throw err;
    };
    reader.readAsText(file, "UTF-8");
  } else {
    throw new Error("No JSON file selected");
  }
}


================================================
FILE: src-js/examples/browser-iife/package.json
================================================
{
  "name": "@amazon-textract-response-parser-examples/browser-iife",
  "version": "0.0.1",
  "description": "Example of using TRP.js via global object from HTML <script> tag",
  "main": "main.js",
  "type": "module",
  "scripts": {
    "test": "node test.js"
  },
  "author": "Amazon Rekognition Textract Demos <rekognition-textract-demos@amazon.com>",
  "license": "Apache-2.0",
  "private": true,
  "dependencies": {
    "colors": "^1.4.0",
    "puppeteer": "^22.9.0"
  }
}


================================================
FILE: src-js/examples/browser-iife/test.js
================================================
/**
 * Script to test main.html using headless Chrome browser via Puppeteer
 *
 * This test script opens the HTML file in a headless browser, selects an example Textract response
 * JSON from the test data folder, and then waits for the JS to process the file.
 */
// NodeJS Built-Ins:
import path from "node:path";
import { fileURLToPath } from "node:url";

// External Dependencies:
import colors from "colors";
import puppeteer from "puppeteer";

// We need an absolute path to main.html for puppeteer, but __dirname is not defined in ES module
// mode NodeJS - so need to go through some extra steps:
const __filename = fileURLToPath(import.meta.url);
const __dirname = path.dirname(__filename);
const HTML_URI = `file:///${__dirname}/main.html`;
const RESPONSE_JSON_PATH = path.join(__dirname, "..", "..", "test", "data", "test-response.json");

// A try/catch block is necessary to run top-level `await`s in NodeJS module:
// eslint-disable-next-line no-useless-catch
try {
  // Launch the browser & configure event listeners to forward console msgs + throw errors:
  const browser = await puppeteer.launch();
  const page = await browser.newPage();
  page
    .on("console", (message) =>
      console.log(`[${message.type().substring(0, 3).toUpperCase()}] ${message.text()}`),
    )
    .on("pageerror", (err) => {
      console.error("Browser page error");
      throw err;
    })
    .on("requestfailed", (req) => {
      console.error("Browser request failed");
      throw new Error(req.failure().errorText);
    });

  console.log(colors.green("Opening main.html in browser..."));
  await page.goto(HTML_URI);

  console.log(colors.green("Uploading Amazon Textract sample JSON..."));
  const elementHandle = await page.$("input#textractJson");
  await elementHandle.uploadFile(RESPONSE_JSON_PATH);

  console.log(colors.green("Waiting for sample JS to process the file..."));
  await page.waitForFunction("window.fileProcessed", {
    timeout: 5000, // 5sec
  });

  console.log(colors.green("Closing browser..."));
  await browser.close();
  console.log(colors.green("Done!"));
} catch (err) {
  // We want to re-throw any error so it marks the test run as failed:
  throw err;
}


================================================
FILE: src-js/examples/nodejs-import/main.js
================================================
/**
 * Example script using TRP.js from NodeJS with ES-style module imports
 *
 * This script shows how you can get started with TRP using either local Amazon Textract JSON
 * response files, or calling synchronous Amazon Textract APIs.
 */
// NodeJS Built-Ins:
import { strict as assert } from "node:assert";
import { mkdir, readFile, writeFile } from "node:fs/promises";

// External Dependencies:
import { TextractClient, AnalyzeDocumentCommand } from "@aws-sdk/client-textract";

// TRP.js:
import { TextractDocument, TextractExpense } from "amazon-textract-response-parser";

// You could also directly `import` static JSON test data files like this:
// (These data file imports will only work in this example project, because the files aren't
// published to NPM)
// import staticTestResponse from "amazon-textract-response-parser/test/data/test-response.json" assert { type: "json" };
// import staticExpResponse from "amazon-textract-response-parser/test/data/invoice-expense-response.json" assert { type: "json" };

// Quick smoke tests with pre-existing data files:
// (Note top-level `await`s like this may only work with the `--experimental-specifier-resolution` flag.
// Otherwise you may need to wrap the code in an `async function`)
const staticTestResponse = JSON.parse(await readFile("../../test/data/test-response.json", "utf-8"));
const staticDoc = new TextractDocument(staticTestResponse);
assert.strictEqual(staticDoc.nPages, 1);
assert.strictEqual(staticDoc.pageNumber(1).nTables, 1);
staticDoc.pageNumber(1).getLineClustersInReadingOrder();
staticDoc.pageNumber(1).getLinesByLayoutArea();

const staticExpResponse = JSON.parse(
  await readFile("../../test/data/invoice-expense-response.json", "utf-8"),
);
const staticExpense = new TextractExpense(staticExpResponse);
assert.strictEqual(staticExpense.nDocs, 1);
const staticExpenseDoc = [...staticExpense.iterDocs()][0];
assert.strictEqual(staticExpenseDoc.nSummaryFields, 31);

// Actually call Amazon Textract and use the results:
const textract = new TextractClient({});
const textractResponse = await textract.send(
  new AnalyzeDocumentCommand({
    Document: {
      Bytes: await readFile("../../test/data/default_document_4.png"),
    },
    FeatureTypes: ["FORMS", "LAYOUT", "TABLES"],
  }),
);
const doc = new TextractDocument(textractResponse);
assert.strictEqual(doc.nPages, 1);
assert.strictEqual(Math.abs(doc.pageNumber(1).getModalWordOrientationDegrees()), 0);

// Render the test doc to an HTML file:
// eslint-disable-next-line @typescript-eslint/no-empty-function
await mkdir("./data-tmp", { recursive: true }).catch((_) => {});
await writeFile("./data-tmp/doc.html", doc.html(), "utf-8");

console.log("Done!");


================================================
FILE: src-js/examples/nodejs-import/package.json
================================================
{
  "name": "@amazon-textract-response-parser-examples/nodejs-import",
  "version": "0.0.1",
  "description": "Example of using TRP.js from NodeJS with ES-style 'import' statements",
  "main": "main.js",
  "type": "module",
  "scripts": {
    "test": "node --experimental-specifier-resolution=node main.js"
  },
  "author": "Amazon Rekognition Textract Demos <rekognition-textract-demos@amazon.com>",
  "license": "Apache-2.0",
  "private": true,
  "dependencies": {
    "@aws-sdk/client-textract": "^3.499.0",
    "amazon-textract-response-parser": "file:../.."
  },
  "engines": {
    "node": ">=18"
  }
}


================================================
FILE: src-js/examples/nodejs-require/main.js
================================================
/**
 * Example script using TRP.js from NodeJS with CommonJS-style module `require()`s
 *
 * This script shows how you can get started with TRP using either local Amazon Textract JSON
 * response files, or calling synchronous Amazon Textract APIs.
 */
// NodeJS Built-Ins:
const assert = require("node:assert").strict;
const { mkdir, readFile, writeFile } = require("node:fs/promises");

// External Dependencies:
const { TextractClient, AnalyzeDocumentCommand } = require("@aws-sdk/client-textract");

// TRP.js:
const { TextractDocument, TextractExpense } = require("amazon-textract-response-parser");

// You could also directly `require` static JSON test data files like this:
// (These data file imports will only work in this example project, because the files aren't
// published to NPM)
// const testResponse = require("amazon-textract-response-parser/test/data/test-response.json");
// const testExpenseResponse = require("amazon-textract-response-parser/test/data/invoice-expense-response.json");

/**
 * Quick smoke tests with pre-existing data files
 */
async function testStaticFiles() {
  const staticTestResponse = JSON.parse(await readFile("../../test/data/test-response.json", "utf-8"));
  const staticDoc = new TextractDocument(staticTestResponse);
  assert.strictEqual(staticDoc.nPages, 1);
  assert.strictEqual(staticDoc.pageNumber(1).nTables, 1);
  staticDoc.pageNumber(1).getLineClustersInReadingOrder();
  staticDoc.pageNumber(1).getLinesByLayoutArea();

  const staticExpResponse = JSON.parse(
    await readFile("../../test/data/invoice-expense-response.json", "utf-8"),
  );
  const staticExpense = new TextractExpense(staticExpResponse);
  assert.strictEqual(staticExpense.nDocs, 1);
  const staticExpenseDoc = [...staticExpense.iterDocs()][0];
  assert.strictEqual(staticExpenseDoc.nSummaryFields, 31);
}

/**
 * Actually call Amazon Textract and use the results
 */
async function testCallTextract() {
  const textract = new TextractClient({});
  const textractResponse = await textract.send(
    new AnalyzeDocumentCommand({
      Document: {
        Bytes: await readFile("../../test/data/default_document_4.png"),
      },
      FeatureTypes: ["FORMS", "LAYOUT", "TABLES"],
    }),
  );
  const doc = new TextractDocument(textractResponse);
  assert.strictEqual(doc.nPages, 1);
  assert.strictEqual(Math.abs(doc.pageNumber(1).getModalWordOrientationDegrees()), 0);

  // Render the test doc to an HTML file:
  // eslint-disable-next-line @typescript-eslint/no-empty-function
  await mkdir("./data-tmp", { recursive: true }).catch((_) => {});
  await writeFile("./data-tmp/doc.html", doc.html(), "utf-8");
}

testStaticFiles()
  .then(() => testCallTextract())
  .then(() => console.log("Done!"));


================================================
FILE: src-js/examples/nodejs-require/package.json
================================================
{
  "name": "@amazon-textract-response-parser-examples/nodejs-require",
  "version": "0.0.1",
  "description": "Example of using TRP.js from NodeJS with CommonJS-style 'require' statements",
  "main": "main.js",
  "scripts": {
    "test": "node main.js"
  },
  "author": "Amazon Rekognition Textract Demos <rekognition-textract-demos@amazon.com>",
  "license": "Apache-2.0",
  "private": true,
  "dependencies": {
    "@aws-sdk/client-textract": "^3.499.0",
    "amazon-textract-response-parser": "file:../.."
  }
}


================================================
FILE: src-js/examples/nodejs-typescript/package.json
================================================
{
  "name": "@amazon-textract-response-parser-examples/nodejs-typescript",
  "version": "0.0.1",
  "description": "Example of using TRP.js from NodeJS with TypeScript",
  "main": "dist/main.js",
  "scripts": {
    "build": "tsc",
    "clean": "rimraf dist",
    "start": "node dist/main.js",
    "test": "npm run clean && npm run build && npm run start"
  },
  "author": "Amazon Rekognition Textract Demos <rekognition-textract-demos@amazon.com>",
  "license": "Apache-2.0",
  "private": true,
  "dependencies": {
    "@aws-sdk/client-textract": "^3.499.0",
    "amazon-textract-response-parser": "file:../.."
  },
  "devDependencies": {
    "rimraf": "^5.0.5",
    "typescript": "^4.9.5"
  }
}


================================================
FILE: src-js/examples/nodejs-typescript/src/main.ts
================================================
/**
 * Example script using TRP.js from NodeJS with ES-style module imports in TypeScript
 *
 * This script shows how you can get started with TRP using either local Amazon Textract JSON
 * response files, or calling synchronous Amazon Textract APIs.
 */
// NodeJS Built-Ins:
import { strict as assert } from "node:assert";
import { mkdir, readFile, writeFile } from "node:fs/promises";

// External Dependencies:
import { TextractClient, AnalyzeDocumentCommand } from "@aws-sdk/client-textract";

// TRP.js:
import {
  ApiAnalyzeDocumentResponse,
  ApiAnalyzeExpenseResponse,
  TextractDocument,
  TextractExpense,
} from "amazon-textract-response-parser";

// You could also directly `import` static JSON test data files like this:
// (These data file imports will only work in this example project, because the files aren't
// published to NPM)
// eslint-disable-next-line @typescript-eslint/no-var-requires
// const testResponse: ApiAnalyzeDocumentResponse = require("amazon-textract-response-parser/test/data/test-response.json");
// eslint-disable-next-line @typescript-eslint/no-var-requires
// const testExpenseResponse: ApiAnalyzeExpenseResponse = require("amazon-textract-response-parser/test/data/invoice-expense-response.json");

/**
 * Quick smoke tests with pre-existing data files
 */
async function testStaticFiles() {
  const staticTestResponse: ApiAnalyzeDocumentResponse = JSON.parse(
    await readFile("../../test/data/test-response.json", "utf-8"),
  );
  const staticDoc = new TextractDocument(staticTestResponse);
  assert.strictEqual(staticDoc.nPages, 1);
  assert.strictEqual(staticDoc.pageNumber(1).nTables, 1);
  staticDoc.pageNumber(1).getLineClustersInReadingOrder();
  staticDoc.pageNumber(1).getLinesByLayoutArea();

  const staticExpResponse: ApiAnalyzeExpenseResponse = JSON.parse(
    await readFile("../../test/data/invoice-expense-response.json", "utf-8"),
  );
  const staticExpense = new TextractExpense(staticExpResponse);
  assert.strictEqual(staticExpense.nDocs, 1);
  const staticExpenseDoc = [...staticExpense.iterDocs()][0];
  assert.strictEqual(staticExpenseDoc.nSummaryFields, 31);
}

/**
 * Actually call Amazon Textract and use the results
 */
async function testCallTextract() {
  const textract = new TextractClient({});
  const textractResponse = await textract.send(
    new AnalyzeDocumentCommand({
      Document: {
        Bytes: await readFile("../../test/data/default_document_4.png"),
      },
      FeatureTypes: ["FORMS", "LAYOUT", "TABLES"],
    }),
  );
  const doc = new TextractDocument(textractResponse as ApiAnalyzeDocumentResponse);
  assert.strictEqual(doc.nPages, 1);
  const angle = doc.pageNumber(1).getModalWordOrientationDegrees();
  assert.notStrictEqual(angle, null);
  assert.strictEqual(Math.abs(angle as number), 0);

  // Render the test doc to an HTML file:
  // eslint-disable-next-line @typescript-eslint/no-empty-function
  await mkdir("./data-tmp", { recursive: true }).catch((_) => {});
  await writeFile("./data-tmp/doc.html", doc.html(), "utf-8");
}

testStaticFiles()
  .then(() => testCallTextract())
  .then(() => console.log("Done!"));


================================================
FILE: src-js/examples/nodejs-typescript/tsconfig.json
================================================
{
  "compilerOptions": {
    "esModuleInterop": true,
    "forceConsistentCasingInFileNames": true,
    "lib": ["es2016"],
    "module": "CommonJS",
    "moduleResolution": "node",
    "noFallthroughCasesInSwitch": true,
    "noImplicitOverride": true,
    "outDir": "dist",
    "removeComments": true,
    "sourceMap": true,
    "strict": true,
    "target": "ES6"
  },
  "exclude": ["dist/", "node_modules/", "**/*.spec.*"],
  "include": ["src/"]
}


================================================
FILE: src-js/jest.config.js
================================================
module.exports = {
  preset: "ts-jest",
  testEnvironment: "node",
  // `examples` packages define their own test commands
  testPathIgnorePatterns: ["/examples/", "/node_modules/"]
};


================================================
FILE: src-js/package.json
================================================
{
  "name": "amazon-textract-response-parser",
  "version": "0.4.3",
  "description": "Parse API responses from Amazon Textract with higher-level helpers",
  "keywords": [
    "aws",
    "amazon-textract",
    "textract"
  ],
  "main": "dist/cjs/index.js",
  "module": "dist/es/index.js",
  "types": "dist/types/index.d.ts",
  "jsdelivr": "dist/browser/trp.min.js",
  "unpkg": "dist/browser/trp.min.js",
  "directories": {
    "lib": "lib"
  },
  "files": [
    "/dist"
  ],
  "scripts": {
    "audit:examples": "(set -e && for ex in examples/*; do if [ $ex == 'examples/README.md' ]; then continue; fi; (cd $ex && npm audit fix); done)",
    "build:browser": "rollup -c",
    "build:cjs": "tsc -p tsconfig.cjs.json",
    "build:es": "tsc -p tsconfig.es.json",
    "//": "# TODO: Remove below `replace` fix when https://github.com/microsoft/TypeScript/issues/54879 resolved",
    "build:types": "tsc -p tsconfig.types.json && replace 'readonly text: string;' 'get text(): string;' ./dist/types -r",
    "build": "concurrently -n 'iife,cjs,es,types' 'npm run build:browser' 'npm run build:cjs' 'npm run build:es' 'npm run build:types'",
    "clean": "rimraf dist",
    "//": "# TODO: clean:examples command is not yet cross-platform - requires bash-like shell",
    "clean:examples": "(set -e && for ex in examples/*; do if [ $ex == 'examples/README.md' ]; then continue; fi; (cd $ex && rimraf node_modules); done)",
    "prepublishOnly": "npm run clean && npm run build && npm run test:examples",
    "prettify": "prettier --config .prettierrc.js 'src/**/*.{js,ts,tsx}' --write",
    "lint:src": "eslint --quiet --fix 'src/**/*.{js,ts,tsx}'",
    "lint": "eslint --quiet --fix '{src,test}/**/*.{js,ts,tsx}'",
    "reading-order-diagnostic": "npm run build && node bin/reading-order-diagnostic.js",
    "test": "jest --coverage",
    "//": "# TODO: test:examples command is not yet cross-platform - requires bash-like shell",
    "test:examples": "(set -e && for ex in examples/*; do if [ $ex == 'examples/README.md' ]; then continue; fi; (cd $ex && npm install && npm run test); done)",
    "test:unit": "jest --coverage --testPathPattern=test/unit",
    "update:examples": "(set -e && for ex in examples/*; do if [ $ex == 'examples/README.md' ]; then continue; fi; (cd $ex && npm update); done)"
  },
  "repository": {
    "directory": "src-js",
    "type": "git",
    "url": "git+https://github.com/aws-samples/amazon-textract-response-parser.git"
  },
  "author": "Amazon Rekognition Textract Demos <rekognition-textract-demos@amazon.com>",
  "license": "Apache-2.0",
  "bugs": {
    "url": "https://github.com/aws-samples/amazon-textract-response-parser/issues"
  },
  "homepage": "https://github.com/aws-samples/amazon-textract-response-parser#readme",
  "devDependencies": {
    "@aws-sdk/client-textract": "^3.421.0",
    "@rollup/plugin-node-resolve": "^15.0.0",
    "@rollup/plugin-terser": "^0.4.4",
    "@rollup/plugin-typescript": "^11.0.0",
    "@types/jest": "^29.0.0",
    "@typescript-eslint/eslint-plugin": "^6.1.0",
    "@typescript-eslint/parser": "^6.1.0",
    "concurrently": "^8.0.1",
    "eslint": "^8.0.0",
    "eslint-config-prettier": "^9.0.0",
    "eslint-plugin-prettier": "^5.1.3",
    "husky": "^8.0.3",
    "jest": "^29.0.0",
    "lint-staged": "^15.2.10",
    "prettier": "^3.0.3",
    "replace": "^1.2.2",
    "rimraf": "^5.0.0||^6.0.0",
    "rollup": "^3.29.5",
    "ts-jest": "^29.0.0",
    "typescript": "^5.3.3"
  },
  "husky": {
    "hooks": {
      "pre-commit": "lint-staged"
    }
  },
  "lint-staged": {
    "*.{js,ts,tsx}": [
      "eslint --fix"
    ]
  }
}


================================================
FILE: src-js/rollup.config.mjs
================================================
// External Dependencies:
import resolve from "@rollup/plugin-node-resolve";
import typescript from "@rollup/plugin-typescript";
import terser from "@rollup/plugin-terser";

export default [
  {
    input: "src/index.ts",
    output: {
      name: "trp",
      file: "dist/browser/trp.min.js",
      format: "iife",
      sourcemap: true,
      plugins: [
        terser({
          keep_classnames: true,
          keep_fnames: true,
        }),
      ],
    },
    plugins: [
      resolve({ extensions: [".js", ".json", ".ts"] }),
      typescript({ tsconfig: "tsconfig.browser.json" }),
    ],
  },
];


================================================
FILE: src-js/src/api-models/base.ts
================================================
/**
 * Common Textract API models used by (multiple features in) the Textract Response Parser.
 *
 * This file collects types/interfaces common across multiple API sections.
 */
// Local Dependencies:
import { ApiGeometry } from "./geometry";

/**
 * Enumeration of all inter-Block Relationship types defined by Textract
 *
 * See: https://docs.aws.amazon.com/textract/latest/dg/API_Relationship.html
 */
export const enum ApiRelationshipType {
  /**
   * Used to link QUERY blocks to QUERY_RESULTs
   */
  Answer = "ANSWER",
  /**
   * Used in a range of block types to link content within the block (e.g. LINE->WORD)
   */
  Child = "CHILD",
  /**
   * TODO: Usage of this type is not clear at time of writing
   */
  ComplexFeatures = "COMPLEX_FEATURES",
  /**
   * Used to link from a TABLE to its MERGED_CELL children
   *
   * (CHILD is used for the table's standard/un-merged CELL blocks, and from MERGED_CELL->CELL)
   */
  MergedCell = "MERGED_CELL",
  /**
   * Used to link from a TABLE to its associated TABLE_FOOTER, if present
   */
  TableFooter = "TABLE_FOOTER",
  /**
   * Used to link from a TABLE to its associated TABLE_TITLE, if present
   */
  TableTitle = "TABLE_TITLE",
  /**
   * Used to link from a forms/K-V KEY block to its associated VALUE block
   */
  Value = "VALUE",
}

/**
 * Internal interface for Relationships linking a `Block` to a list of (one or more) others
 *
 * See `ApiRelationship` and: https://docs.aws.amazon.com/textract/latest/dg/API_Relationship.html
 */
interface IRelationshipBase {
  /**
   * Unique IDs of target API `Block`s linked by the relationship
   */
  Ids: string[];
  /**
   * Semantic type of this relationship
   */
  Type: ApiRelationshipType;
}

export interface ApiAnswerRelationship extends IRelationshipBase {
  Type: ApiRelationshipType.Answer;
}

export interface ApiChildRelationship extends IRelationshipBase {
  Type: ApiRelationshipType.Child;
}

export interface ApiComplexFeaturesRelationship extends IRelationshipBase {
  Type: ApiRelationshipType.ComplexFeatures;
}

export interface ApiMergedCellRelationship extends IRelationshipBase {
  Type: ApiRelationshipType.MergedCell;
}

export interface ApiTableFooterRelationship extends IRelationshipBase {
  Type: ApiRelationshipType.TableFooter;
}

export interface ApiTableTitleRelationship extends IRelationshipBase {
  Type: ApiRelationshipType.TableTitle;
}

export interface ApiValueRelationship extends IRelationshipBase {
  Type: ApiRelationshipType.Value;
}

/**
 * Relationship linking a `Block` to a list of (one or more) others
 *
 * See: https://docs.aws.amazon.com/textract/latest/dg/API_Relationship.html
 */
export type ApiRelationship =
  | ApiAnswerRelationship
  | ApiChildRelationship
  | ApiComplexFeaturesRelationship
  | ApiMergedCellRelationship
  | ApiTableFooterRelationship
  | ApiTableTitleRelationship
  | ApiValueRelationship;

/**
 * Enumeration of all types of `Block` (content item) detected by Amazon Textract
 *
 * See: https://docs.aws.amazon.com/textract/latest/dg/API_Block.html
 */
export const enum ApiBlockType {
  /**
   * Plain table cell ignoring any merges (see MERGED_CELL instead)
   *
   * See: https://docs.aws.amazon.com/textract/latest/dg/how-it-works-tables.html
   */
  Cell = "CELL",
  /**
   * Key element for a Form Data Key-Value pair (alternative style seen in some test cases)
   *
   * See: https://docs.aws.amazon.com/textract/latest/dg/how-it-works-kvp.html
   */
  Key = "KEY",
  /**
   * Compound block for Form Key-Value pairs - more recently replaced by KEY and VALUE
   *
   * Textract Form data results used to use this type for both the Key and Value of detected K-V
   * pairs, with the `EntityType` field (and relationships) differentiating which was which. More
   * recent responses appear to only use `KEY` and `VALUE` BlockTypes instead.
   *
   * See: https://docs.aws.amazon.com/textract/latest/dg/how-it-works-kvp.html
   */
  KeyValueSet = "KEY_VALUE_SET",
  /**
   * Layout analysis result for a diagram / image / figure
   *
   * See: https://docs.aws.amazon.com/textract/latest/dg/layoutresponse.html
   */
  LayoutFigure = "LAYOUT_FIGURE",
  /**
   * Layout analysis result segmenting page footer from other content
   *
   * See: https://docs.aws.amazon.com/textract/latest/dg/layoutresponse.html
   */
  LayoutFooter = "LAYOUT_FOOTER",
  /**
   * Layout analysis result segmenting page header from other content
   *
   * See: https://docs.aws.amazon.com/textract/latest/dg/layoutresponse.html
   */
  LayoutHeader = "LAYOUT_HEADER",
  /**
   * Layout analysis result for a Forms Key-Value pair
   *
   * See: https://docs.aws.amazon.com/textract/latest/dg/layoutresponse.html
   */
  LayoutKeyValue = "LAYOUT_KEY_VALUE",
  /**
   * Layout analysis result for a list (e.g. bullet points or numbered paragraphs)
   *
   * See: https://docs.aws.amazon.com/textract/latest/dg/layoutresponse.html
   */
  LayoutList = "LAYOUT_LIST",
  /**
   * Layout analysis result for a page number annotation
   *
   * See: https://docs.aws.amazon.com/textract/latest/dg/layoutresponse.html
   */
  LayoutPageNumber = "LAYOUT_PAGE_NUMBER",
  /**
   * Layout analysis result for a section-level heading
   *
   * See: https://docs.aws.amazon.com/textract/latest/dg/layoutresponse.html
   */
  LayoutSectionHeader = "LAYOUT_SECTION_HEADER",
  /**
   * Layout analysis result for a table
   *
   * See: https://docs.aws.amazon.com/textract/latest/dg/layoutresponse.html
   */
  LayoutTable = "LAYOUT_TABLE",
  /**
   * Layout analysis result for a paragraph of text
   *
   * See: https://docs.aws.amazon.com/textract/latest/dg/layoutresponse.html
   */
  LayoutText = "LAYOUT_TEXT",
  /**
   * Layout analysis result for the main title of the document
   *
   * See: https://docs.aws.amazon.com/textract/latest/dg/layoutresponse.html
   */
  LayoutTitle = "LAYOUT_TITLE",
  /**
   * A contiguous string of non-breaking-whitespace separated `WORD`s
   *
   * See: https://docs.aws.amazon.com/textract/latest/dg/how-it-works-lines-words.html
   */
  Line = "LINE",
  /**
   * Merged cell spanning multiple underlying rows or columns of a TABLE
   *
   * See: https://docs.aws.amazon.com/textract/latest/dg/how-it-works-tables.html
   */
  MergedCell = "MERGED_CELL",
  /**
   * Top-level container for an individual page (also present for single-image/-page requests)
   *
   * See: https://docs.aws.amazon.com/textract/latest/dg/how-it-works-pages.html
   */
  Page = "PAGE",
  /**
   * Page-level instance of an input "Query" asked of the document
   *
   * See: https://docs.aws.amazon.com/textract/latest/dg/queryresponse.html
   */
  Query = "QUERY",
  /**
   * (Potentially one of multiple) results returned for an input QUERY
   *
   * See: https://docs.aws.amazon.com/textract/latest/dg/queryresponse.html
   */
  QueryResult = "QUERY_RESULT",
  /**
   * A boolean selection element such as a checkbox, radio button, circled word, etc
   *
   * See: https://docs.aws.amazon.com/textract/latest/dg/how-it-works-selectables.html
   */
  SelectionElement = "SELECTION_ELEMENT",
  /**
   * A detected signature (possibly in a key-value pair or table cell)
   *
   * See: https://docs.aws.amazon.com/textract/latest/dg/how-it-works-analyzing.html
   */
  Signature = "SIGNATURE",
  /**
   * An overall table object
   *
   * See: https://docs.aws.amazon.com/textract/latest/dg/how-it-works-tables.html
   */
  Table = "TABLE",
  /**
   * A trailing/footer caption associated with a TABLE
   *
   * See: https://docs.aws.amazon.com/textract/latest/dg/how-it-works-tables.html
   */
  TableFooter = "TABLE_FOOTER",
  /**
   * A leading/header caption associated with a TABLE
   *
   * See: https://docs.aws.amazon.com/textract/latest/dg/how-it-works-tables.html
   */
  TableTitle = "TABLE_TITLE",
  /**
   * Value element for a Form Data Key-Value pair (alternative style seen in some test cases)
   *
   * See: https://docs.aws.amazon.com/textract/latest/dg/how-it-works-kvp.html
   */
  Value = "VALUE",
  /**
   * An individual "word" of text (string of characters not separated by whitespace)
   *
   * See: https://docs.aws.amazon.com/textract/latest/dg/how-it-works-lines-words.html
   */
  Word = "WORD",
}

/**
 * Subset type of the ApiBlockType enumeration that corresponds to LAYOUT_* blocks
 *
 * TODO: Should/can all the Layout-specific stuff in here be moved out of `base.ts`?
 */
export type ApiLayoutBlockType =
  | ApiBlockType.LayoutFigure
  | ApiBlockType.LayoutFooter
  | ApiBlockType.LayoutHeader
  | ApiBlockType.LayoutKeyValue
  | ApiBlockType.LayoutList
  | ApiBlockType.LayoutPageNumber
  | ApiBlockType.LayoutSectionHeader
  | ApiBlockType.LayoutTable
  | ApiBlockType.LayoutText
  | ApiBlockType.LayoutTitle;

/**
 * Canonical Set of API block types counted as Layout* elements
 *
 * See: https://docs.aws.amazon.com/textract/latest/dg/layoutresponse.html
 */
export const LAYOUT_BLOCK_TYPES = new Set([
  ApiBlockType.LayoutFigure,
  ApiBlockType.LayoutFooter,
  ApiBlockType.LayoutHeader,
  ApiBlockType.LayoutKeyValue,
  ApiBlockType.LayoutList,
  ApiBlockType.LayoutPageNumber,
  ApiBlockType.LayoutSectionHeader,
  ApiBlockType.LayoutTable,
  ApiBlockType.LayoutText,
  ApiBlockType.LayoutTitle,
]);

/**
 * Check if an API Block.BlockType corresponds to a Layout* element.
 */
export function isLayoutBlockType(blockType: ApiBlockType): boolean {
  return LAYOUT_BLOCK_TYPES.has(blockType);
}

/**
 * Basic interface common to all types of Textract API `Block` (individual detected items)
 *
 * See: https://docs.aws.amazon.com/textract/latest/dg/API_Block.html
 */
export interface ApiBlockBase {
  /**
   * Type of item detected
   */
  BlockType: ApiBlockType;
  /**
   * 0-100 based confidence score the model assigned to this detection
   *
   * (May not be present on all block types, for example QUERY)
   */
  Confidence?: number;
  /**
   * Position of the item on the input image / page
   */
  Geometry?: ApiGeometry;
  /**
   * Unique ID of this `Block`
   *
   * As mentioned in https://docs.aws.amazon.com/textract/latest/dg/API_Block.html this ID is only
   * technically guaranteed to be unique within the scope of the API request
   */
  readonly Id: string;
  /**
   * Related Blocks
   */
  readonly Relationships?: ApiRelationship[];
}


================================================
FILE: src-js/src/api-models/content.ts
================================================
/**
 * Low-level content Textract API models used by the Textract Response Parser.
 *
 * This file collects types/interfaces specific to the low level text/content objects detected by
 * general document analysis, including text words/lines and selection elements.
 *
 * See:
 * - https://docs.aws.amazon.com/textract/latest/dg/how-it-works-lines-words.html
 * - https://docs.aws.amazon.com/textract/latest/dg/how-it-works-selectables.html
 */
// Local Dependencies:
import { ApiBlockBase, ApiBlockType, ApiChildRelationship } from "./base";
import { ApiGeometry } from "./geometry";

/**
 * Enumeration of detectable text types: handwriting vs computer printed text
 */
export const enum ApiTextType {
  Handwriting = "HANDWRITING",
  Printed = "PRINTED",
}

/**
 * Block representing a contiguous sequence of characters with no whitespace
 *
 * (Might include e.g. hyphens, underscores, etc)
 */
export interface ApiWordBlock extends ApiBlockBase {
  BlockType: ApiBlockType.Word;
  /**
   * 0-100 based confidence of the OCR model in extracting the text of this word
   */
  Confidence: number;
  Geometry: ApiGeometry; // Should always be present for WORD blocks
  /**
   * Text extracted by the OCR model
   */
  Text: string;
  /**
   * Whether the text appears hand-written or computer-generated
   */
  TextType: ApiTextType;
}

/**
 * Block representing a contiguous sequence of whitespace-separated WORDs
 */
export interface ApiLineBlock extends ApiBlockBase {
  BlockType: ApiBlockType.Line;
  /**
   * TODO: Is this confidence of structure or aggregate of child words' OCR?
   */
  Confidence: number;
  Geometry: ApiGeometry; // Should always be present for LINE blocks
  /**
   * Links to individual WORD blocks in this line
   */
  readonly Relationships: ApiChildRelationship[];
  /**
   * Pre-computed concatenated text for the overall line - no need to loop through
   */
  Text: string;
}

/**
 * Enumeration of (boolean) states a selection element can take
 *
 * See: https://docs.aws.amazon.com/textract/latest/dg/how-it-works-selectables.html
 */
export const enum ApiSelectionStatus {
  Selected = "SELECTED",
  NotSelected = "NOT_SELECTED",
}

/**
 * Block representing a selection element (e.g. checkbox, radio button, circled option)
 *
 * See: https://docs.aws.amazon.com/textract/latest/dg/how-it-works-selectables.html
 */
export interface ApiSelectionElementBlock extends ApiBlockBase {
  BlockType: ApiBlockType.SelectionElement;
  /**
   * 0-100 based confidence of the model detecting this selection element and its status
   */
  Confidence: number;
  Geometry: ApiGeometry; // Should always be present for SELECTION_ELEMENT blocks
  /**
   * Whether the element is selected/ticked/checked/etc, or not
   */
  SelectionStatus: ApiSelectionStatus;
}

/**
 * Block representing a detected signature
 *
 * Signature blocks don't return OCR results, just their presence and locations
 *
 * See: https://docs.aws.amazon.com/textract/latest/dg/how-it-works-analyzing.html
 */
export interface ApiSignatureBlock extends ApiBlockBase {
  BlockType: ApiBlockType.Signature;
  /**
   * 0-100 based confidence of the signature detection model for this result
   */
  Confidence: number;
  Geometry: ApiGeometry; // Should always be present for SIGNATURE blocks
  /**
   * SIGNATURE blocks don't seem to specify any related blocks
   */
  Relationships?: never[];
}


================================================
FILE: src-js/src/api-models/document.ts
================================================
/**
 * Document processing Textract API models used by the Textract Response Parser.
 *
 * This file collects types/interfaces specific to Textract's overall "document" APIs (rather than
 * e.g. Expense and Identity, or the individual components like Forms/Queries/Tables)
 *
 * See: https://docs.aws.amazon.com/textract/latest/dg/how-it-works-document-layout.html
 */
// Local Dependencies:
import { ApiBlockBase, ApiBlockType, ApiChildRelationship } from "./base";
import { ApiLineBlock, ApiSelectionElementBlock, ApiSignatureBlock, ApiWordBlock } from "./content";
import { ApiKeyBlock, ApiKeyValueSetBlock, ApiValueBlock } from "./form";
import { ApiGeometry } from "./geometry";
import { ApiLayoutBlock } from "./layout";
import { ApiQueryBlock, ApiQueryResultBlock } from "./query";
import {
  ApiCellBlock,
  ApiMergedCellBlock,
  ApiTableBlock,
  ApiTableFooterBlock,
  ApiTableTitleBlock,
} from "./table";

// Temporary re-exports for consistency with old API:
export {
  /**
   * @deprecated Please import direct from top-level TRP.js
   */
  ApiRelationshipType,
  /**
   * @deprecated Please import direct from top-level TRP.js
   */
  ApiAnswerRelationship,
  /**
   * @deprecated Please import direct from top-level TRP.js
   */
  ApiChildRelationship,
  /**
   * @deprecated Please import direct from top-level TRP.js
   */
  ApiComplexFeaturesRelationship,
  /**
   * @deprecated Please import direct from top-level TRP.js
   */
  ApiMergedCellRelationship,
  /**
   * @deprecated Please import direct from top-level TRP.js
   */
  ApiValueRelationship,
  /**
   * @deprecated Please import direct from top-level TRP.js
   */
  ApiRelationship,
  /**
   * @deprecated Please import direct from top-level TRP.js
   */
  ApiBlockType,
} from "./base";
export {
  /**
   * @deprecated Please import direct from top-level TRP.js
   */
  ApiTextType,
  /**
   * @deprecated Please import direct from top-level TRP.js
   */
  ApiWordBlock,
  /**
   * @deprecated Please import direct from top-level TRP.js
   */
  ApiLineBlock,
  /**
   * @deprecated Please import direct from top-level TRP.js
   */
  ApiSelectionStatus,
  /**
   * @deprecated Please import direct from top-level TRP.js
   */
  ApiSelectionElementBlock,
} from "./content";
export {
  /**
   * @deprecated Please import direct from top-level TRP.js
   */
  ApiKeyValueEntityType,
  /**
   * @deprecated Please import direct from top-level TRP.js
   */
  ApiKeyValueSetBlock,
} from "./form";
export {
  /**
   * @deprecated Please import direct from top-level TRP.js
   */
  ApiQueryBlock,
  /**
   * @deprecated Please import direct from top-level TRP.js
   */
  ApiQueryResultBlock,
} from "./query";
export {
  /**
   * @deprecated Please import direct from top-level TRP.js
   */
  ApiTableEntityType,
  /**
   * @deprecated Please import direct from top-level TRP.js
   */
  ApiTableBlock,
  /**
   * @deprecated Please import direct from top-level TRP.js
   */
  ApiTableCellEntityType,
  /**
   * @deprecated Please import direct from top-level TRP.js
   */
  ApiCellBlock,
  /**
   * @deprecated Please import direct from top-level TRP.js
   */
  ApiMergedCellBlock,
} from "./table";

/**
 * Block representing an overall page within a (potentially multi-page) document
 */
export interface ApiPageBlock extends ApiBlockBase {
  BlockType: ApiBlockType.Page;
  Geometry: ApiGeometry; // Always present for PAGE blocks
  /**
   * Top-level content contained within this page
   *
   * (These Blocks may in turn link to further sub-levels e.g. from TABLE to CELL)
   */
  readonly Relationships?: ApiChildRelationship[];
}

/**
 * Type describing actual 'Block' objects returnable by Textract general document analysis
 *
 * See: https://docs.aws.amazon.com/textract/latest/dg/API_Block.html
 */
export type ApiBlock =
  | ApiCellBlock
  | ApiKeyBlock
  | ApiKeyValueSetBlock
  | ApiLayoutBlock
  | ApiLineBlock
  | ApiMergedCellBlock
  | ApiPageBlock
  | ApiQueryBlock
  | ApiQueryResultBlock
  | ApiSelectionElementBlock
  | ApiSignatureBlock
  | ApiTableBlock
  | ApiTableFooterBlock
  | ApiTableTitleBlock
  | ApiValueBlock
  | ApiWordBlock;


================================================
FILE: src-js/src/api-models/expense.ts
================================================
/**
 * Expense analysis Textract API models used by the Textract Response Parser.
 *
 * This file collects types/interfaces specific to Textract's expense/invoice/receipt analysis APIs
 * as per https://docs.aws.amazon.com/textract/latest/dg/invoices-receipts.html
 */

// Local Dependencies:
import { ApiGeometry } from "./geometry";

export interface ApiExpenseComponentDetection {
  Confidence: number;
  /**
   * Geometry may be absent if 'Text' is "".
   */
  Geometry?: ApiGeometry;
  Text: string;
}

export interface ApiExpenseFieldType {
  Confidence: number;
  Text: string;
}

export interface ApiExpenseField {
  LabelDetection?: ApiExpenseComponentDetection;
  PageNumber: number;
  Type: ApiExpenseFieldType;
  ValueDetection: ApiExpenseComponentDetection;
}

export interface ApiExpenseLineItem {
  LineItemExpenseFields: ApiExpenseField[];
}

export interface ApiExpenseLineItemGroup {
  readonly LineItemGroupIndex: number;
  LineItems: ApiExpenseLineItem[];
}

export interface ApiExpenseDocument {
  readonly ExpenseIndex: number;
  LineItemGroups: ApiExpenseLineItemGroup[];
  SummaryFields: ApiExpenseField[];
}


================================================
FILE: src-js/src/api-models/form.ts
================================================
/**
 * Form data (key-value pairs) analysis Textract API models used by the Textract Response Parser.
 *
 * This file collects types/interfaces specific to the Form data / Key-Value pairs functionality in
 * general document analysis: https://docs.aws.amazon.com/textract/latest/dg/how-it-works-kvp.html
 */
// Local Dependencies:
import { ApiBlockBase, ApiBlockType, ApiChildRelationship, ApiValueRelationship } from "./base";
import { ApiGeometry } from "./geometry";

/**
 * Enumeration of EntityTypes supported for form key/value objects
 */
export const enum ApiKeyValueEntityType {
  /**
   * The key or "label" of the pair
   */
  Key = "KEY",
  /**
   * The value or "data" of the pair
   */
  Value = "VALUE",
}

/**
 * Alternative style for Key (label) item in a Forms key-value pair
 *
 * Conventionally, forms results appear to use KEY_VALUE_SET blocks for both key and value.
 * However, distinct 'KEY' and 'VALUE' blocks were observed in some responses.
 */
export interface ApiKeyBlock extends ApiBlockBase {
  BlockType: ApiBlockType.Key;
  /**
   * 0-100 based confidence that this is a K-V key (*separate* from actual text OCR confidence!)
   */
  Confidence: number;
  /**
   * For compatibility with KEY_VALUE_SET, but always contains KEY for this BlockType
   */
  EntityTypes: ApiKeyValueEntityType.Key[];
  Geometry: ApiGeometry; // Believe Geometry should always be present on this block type
  /**
   * Links to key text (CHILD) blocks and corresponding value/result (VALUE)
   */
  Relationships: Array<ApiChildRelationship | ApiValueRelationship>;
}

/**
 * Either a key or a value item for a key-value pair in a Forms analysis result
 *
 * Usually this `KEY_VALUE_SET` BlockType is used to indicate both key and value in K-V results,
 * with the `EntityTypes` (and relationship patterns) differentiating between the two. In some
 * cases, an alternative pattern has been observed using distinct `KEY` and `VALUE` blocks instead.
 */
export interface ApiKeyValueSetBlock extends ApiBlockBase {
  BlockType: ApiBlockType.KeyValueSet;
  /**
   * 0-100 based confidence of the key-value structure model (*separate* from text OCR confidence!)
   *
   * Confidence of the key-value relation is separate from text extraction confidence.
   */
  Confidence: number;
  /**
   * Differentiates whether this block is a KEY or a VALUE in the K-V pair
   */
  EntityTypes: ApiKeyValueEntityType[];
  Geometry: ApiGeometry; // Believe Geometry should always be present on this block type
  /**
   * Links to text (CHILD) blocks and corresponding value/result (VALUE when EntityTypes=KEY)
   *
   * May not be present for VALUE blocks with no text (empty/unfilled form elements)
   */
  Relationships?: Array<ApiChildRelationship | ApiValueRelationship>;
}

/**
 * Alternative style for Value (data) item in a Forms key-value pair
 *
 * Conventionally, forms results appear to use KEY_VALUE_SET blocks for both key and value.
 * However, distinct 'KEY' and 'VALUE' blocks were observed in some responses.
 */
export interface ApiValueBlock extends ApiBlockBase {
  BlockType: ApiBlockType.Value;
  /**
   * 0-100 based confidence of this key-value relation (*separate* from text OCR confidence!)
   */
  Confidence: number;
  /**
   * For compatibility with KEY_VALUE_SET, but always contains VALUE for this BlockType
   */
  EntityTypes: ApiKeyValueEntityType.Value[];
  Geometry: ApiGeometry; // Believe Geometry should always be present on this block type
  // `Relationships` may not be present when the value is empty/unfilled
  /**
   * Links to value text (CHILD) blocks, if present (might not be for empty/unfilled form elements)
   */
  Relationships?: ApiChildRelationship[];
}


================================================
FILE: src-js/src/api-models/geometry.ts
================================================
/**
 * Core geometry/shape API models used by the Textract Response Parser.
 */

/**
 * An axis-aligned bounding box on the page in relative coordinates
 *
 * See: https://docs.aws.amazon.com/textract/latest/dg/API_BoundingBox.html
 */
export interface ApiBoundingBox {
  /**
   * Height of the box relative to the input page/image, from 0-1
   */
  Height: number;
  /**
   * X coordinate of the left side of the box relative to the input page/image edge, from 0-1
   */
  Left: number;
  /**
   * Y coordinate of the top of the box relative to the top of the input page/image edge, from 0-1
   */
  Top: number;
  /**
   * Width of the box relative to the input page/image, from 0-1
   */
  Width: number;
}

/**
 * A specific point on the page in relative coordinates
 *
 * See: https://docs.aws.amazon.com/textract/latest/dg/API_Point.html
 */
export interface ApiPoint {
  /**
   * X coordinate of the point relative to the input page/image edge, from 0-1
   */
  X: number;
  /**
   * Y coordinate of the point relative to the top of the input page/image edge, from 0-1
   */
  Y: number;
}

/**
 * Information about the location of a detected element on the input page/image
 *
 * See: https://docs.aws.amazon.com/textract/latest/dg/API_Geometry.html
 */
export interface ApiGeometry {
  /**
   * The smallest coordinate-aligned box containing the element
   */
  BoundingBox: ApiBoundingBox;
  /**
   * A fine-grained polygon around the element (usually a 4-point box, but may not be axis-aligned)
   */
  Polygon: ApiPoint[];
}


================================================
FILE: src-js/src/api-models/id.ts
================================================
/**
 * Identity document analysis Textract API models used by the Textract Response Parser.
 *
 * This file collects types/interfaces specific to Textract's ID analysis APIs
 */

export interface ApiIdentityDocumentFieldType {
  /**
   * 0-100 confidence score of the detected text
   */
  Confidence: number;
  /**
   * Theoretically possible per the API docs but I haven't seen this in practice so far.
   */
  NormalizedValue?: {
    Value: string;
    ValueType: string;
  };
  Text: string;
}

export interface ApiIdentityDocumentFieldValueDetection {
  /**
   * 0-100 confidence score for the detected value text
   */
  Confidence: number;
  /**
   * Only "DATE" ("yy-MM-ddThh:mm:ss") type is mentioned per the API doc
   *
   * https://docs.aws.amazon.com/textract/latest/dg/API_NormalizedValue.html
   */
  NormalizedValue?: {
    Value: string;
    ValueType: string;
  };
  Text: string;
}

export interface ApiIdentityDocumentField {
  Type: ApiIdentityDocumentFieldType;
  ValueDetection: ApiIdentityDocumentFieldValueDetection;
}

export interface ApiIdentityDocument {
  readonly DocumentIndex: number;
  IdentityDocumentFields: ApiIdentityDocumentField[];
}


================================================
FILE: src-js/src/api-models/index.ts
================================================
/**
 * Amazon Textract API models (TypeScript interfaces) used by the response parser.

 * While these models should correspond fairly closely to those in the actual typings for the
 * @aws-sdk/client-textract module, there may be some cases where we can be more specific - and
 * maintaining lets us avoid introducing dependencies of the AWS SDK for JS.
 */

export {
  ApiAnswerRelationship,
  ApiBlockBase,
  ApiBlockType,
  ApiChildRelationship,
  ApiComplexFeaturesRelationship,
  ApiMergedCellRelationship,
  ApiRelationship,
  ApiRelationshipType,
  ApiTableFooterRelationship,
  ApiTableTitleRelationship,
  ApiValueRelationship,
  isLayoutBlockType,
} from "./base";
export {
  ApiLineBlock,
  ApiSelectionStatus,
  ApiSelectionElementBlock,
  ApiSignatureBlock,
  ApiTextType,
  ApiWordBlock,
} from "./content";
export { ApiBlock, ApiPageBlock } from "./document";
export {
  ApiExpenseComponentDetection,
  ApiExpenseDocument,
  ApiExpenseField,
  ApiExpenseFieldType,
  ApiExpenseLineItem,
  ApiExpenseLineItemGroup,
} from "./expense";
export { ApiKeyBlock, ApiKeyValueEntityType, ApiKeyValueSetBlock, ApiValueBlock } from "./form";
export { ApiBoundingBox, ApiPoint, ApiGeometry } from "./geometry";
export {
  ApiIdentityDocument,
  ApiIdentityDocumentField,
  ApiIdentityDocumentFieldType,
  ApiIdentityDocumentFieldValueDetection,
} from "./id";
export {
  ApiLayoutBlock,
  ApiLayoutFigureBlock,
  ApiLayoutFooterBlock,
  ApiLayoutHeaderBlock,
  ApiLayoutKeyValueBlock,
  ApiLayoutListBlock,
  ApiLayoutPageNumberBlock,
  ApiLayoutSectionHeaderBlock,
  ApiLayoutTableBlock,
  ApiLayoutTextBlock,
  ApiLayoutTitleBlock,
} from "./layout";
export { ApiQueryBlock, ApiQueryResultBlock } from "./query";
export {
  ApiAnalyzeDocumentResponse,
  ApiAnalyzeExpenseResponse,
  ApiAnalyzeIdResponse,
  ApiAsyncDocumentAnalysis,
  ApiAsyncDocumentTextDetection,
  /**
   * @deprecated Backward compatibility for typo: Please use ApiAsyncJobOutputInProgress
   */
  ApiAsyncJobOuputInProgress,
  ApiAsyncJobOutputFailed,
  ApiAsyncJobOutputInProgress,
  ApiAsyncJobOutputPartialSuccess,
  ApiAsyncJobOuputSucceded,
  ApiDetectDocumentTextResponse,
  ApiDocumentMetadata,
  ApiJobStatus,
  ApiResponsePage,
  ApiResponsePages,
  ApiResponseWithContent,
  ApiResultWarning,
} from "./response";
export {
  ApiCellBlock,
  ApiMergedCellBlock,
  ApiTableBlock,
  ApiTableCellEntityType,
  ApiTableEntityType,
  ApiTableFooterBlock,
  ApiTableTitleBlock,
} from "./table";


================================================
FILE: src-js/src/api-models/layout.ts
================================================
/**
 * Layout analysis Textract API models used by the Textract response parser.
 *
 * This file collects types/interfaces specific to the Layout analysis functionality in general
 * document analysis: https://docs.aws.amazon.com/textract/latest/dg/layoutresponse.html
 */
// Local Dependencies:
import { ApiBlockBase, ApiBlockType, ApiChildRelationship } from "./base";
import { ApiGeometry } from "./geometry";

// (Internal base interface to reduce duplication)
interface ApiLayoutBlockBase extends ApiBlockBase {
  /**
   * 0-100 based confidence of the detection of this layout object (*separate* from OCR confidence)
   *
   * This confidence reflects only the Layout model's scoring for identifying this structural
   * element in the document - not the OCR of the text/content within.
   */
  Confidence: number;
  Geometry: ApiGeometry; // (Geometry should always be present for Layout objects)
  /**
   * Content within the layout item, if any is present
   *
   * (Layout objects will nearly always have content, except e.g. LAYOUT_FIGURE)
   */
  Relationships?: ApiChildRelationship[];
}

/**
 * Layout Block representing an image / diagram / figure
 *
 * From example responses, these blocks are unlikely to contain any content.
 */
export interface ApiLayoutFigureBlock extends ApiLayoutBlockBase {
  BlockType: ApiBlockType.LayoutFigure;
}

/**
 * Layout Block representing an element of the page footer
 *
 * From example responses, there seems to be a separate element for each cluster of text (usually
 * horizontal clusters), each linking to LINE children. LAYOUT_PAGE_NUMBER blocks are peers, not
 * children.
 */
export interface ApiLayoutFooterBlock extends ApiLayoutBlockBase {
  BlockType: ApiBlockType.LayoutFooter;
}

/**
 * Layout Block representing an element of the page header
 *
 * From example responses, there seems to be a separate element for each cluster of text (usually
 * horizontal clusters), each linking to LINE children. LAYOUT_PAGE_NUMBER blocks are peers, not
 * children.
 */
export interface ApiLayoutHeaderBlock extends ApiLayoutBlockBase {
  BlockType: ApiBlockType.LayoutHeader;
}

/**
 * Layout Block representing a key-value / form field pair (use FORMS analysis for more detail)
 */
export interface ApiLayoutKeyValueBlock extends ApiLayoutBlockBase {
  BlockType: ApiBlockType.LayoutKeyValue;
}

/**
 * Layout Block representing a list (e.g. bullet points or numbered paragraphs)
 */
export interface ApiLayoutListBlock extends ApiLayoutBlockBase {
  BlockType: ApiBlockType.LayoutList;
}

/**
 * Layout Block representing a page number annotation
 *
 * From example responses, these link to (normally exactly one) LINE children
 */
export interface ApiLayoutPageNumberBlock extends ApiLayoutBlockBase {
  BlockType: ApiBlockType.LayoutPageNumber;
}

/**
 * Layout Block representing an individual section heading/title
 */
export interface ApiLayoutSectionHeaderBlock extends ApiLayoutBlockBase {
  BlockType: ApiBlockType.LayoutSectionHeader;
}

/**
 * Layout Block representing a table (use TABLES analysis for more detailed structure)
 */
export interface ApiLayoutTableBlock extends ApiLayoutBlockBase {
  BlockType: ApiBlockType.LayoutTable;
}

/**
 * Layout Block representing a paragraph of text
 */
export interface ApiLayoutTextBlock extends ApiLayoutBlockBase {
  BlockType: ApiBlockType.LayoutText;
}

/**
 * Layout Block representing a top-level document title
 */
export interface ApiLayoutTitleBlock extends ApiLayoutBlockBase {
  BlockType: ApiBlockType.LayoutTitle;
}

/**
 * All possible Blocks returned by Textract LAYOUT analysis of overall doc/page structure
 *
 * See: https://docs.aws.amazon.com/textract/latest/dg/layoutresponse.html
 */
export type ApiLayoutBlock =
  | ApiLayoutFigureBlock
  | ApiLayoutFooterBlock
  | ApiLayoutHeaderBlock
  | ApiLayoutKeyValueBlock
  | ApiLayoutListBlock
  | ApiLayoutPageNumberBlock
  | ApiLayoutSectionHeaderBlock
  | ApiLayoutTableBlock
  | ApiLayoutTextBlock
  | ApiLayoutTitleBlock;


================================================
FILE: src-js/src/api-models/query.ts
================================================
/**
 * Queries analysis Textract API models used by the Textract Response Parser.
 *
 * This file collects types/interfaces specific to the Amazon Textract Queries functionality:
 * https://docs.aws.amazon.com/textract/latest/dg/queryresponse.html
 */
// Local Dependencies:
import { ApiAnswerRelationship, ApiBlockBase, ApiBlockType } from "./base";

/**
 * Page-level instance of the input Query/question submitted for the analysis
 *
 * See: https://docs.aws.amazon.com/textract/latest/dg/queryresponse.html
 */
export interface ApiQueryBlock extends ApiBlockBase {
  BlockType: ApiBlockType.Query;
  /**
   * Since QUERY blocks represent input questions, they don't carry confidence scores
   */
  Confidence: never;
  /**
   * Page number for this query block
   *
   * When a query is applied to multiple pages, it generates several QUERY blocks in the result -
   * each the 'CHILD' of one page and each with a Page number.
   */
  Page?: number;
  /**
   * The Query input to the API
   */
  readonly Query: {
    /**
     * The optional unique alias provided for this query (to simplify retrieval/searching)
     */
    Alias?: string;
    /**
     * The original question/query text
     */
    Text: string;
  };
  /**
   * Links to (potentially multiple) QUERY_RESULT answer objects, if any were found.
   */
  Relationships?: ApiAnswerRelationship[];
}

/**
 * Detected result/answer for a Query
 *
 * See: https://docs.aws.amazon.com/textract/latest/dg/queryresponse.html
 */
export interface ApiQueryResultBlock extends ApiBlockBase {
  BlockType: ApiBlockType.QueryResult;
  /**
   * 0-100 based confidence of the Textract model for this answer to the Query
   */
  Confidence: number;
  /**
   * Page number for this query result
   *
   * If not present, refer to the linked QUERY's parent PAGE
   */
  Page?: number;
  /**
   * Text of the result
   */
  Text: string;
  /**
   * QUERY_RESULT blocks do not seem to link answers through to underlying WORD/LINEs/etc
   */
  Relationships?: never[];
  /**
   * (Legacy) when present, appeared to mirror `Text` - which should be preferred
   *
   * @deprecated Does not seem to be in use in current responses
   */
  SearchKey?: string;
}


================================================
FILE: src-js/src/api-models/response.ts
================================================
/**
 * Top-level API response models used by the Textract Response Parser.
 *
 * These models/interfaces cover the top-level response structures as returned by Textract APIs
 */

// Local Dependencies:
import { ApiExpenseDocument } from "./expense";
import { ApiIdentityDocument } from "./id";
import { ApiBlock } from "./document";

/**
 * Enumeration of status messages reported for async API jobs
 */
export const enum ApiJobStatus {
  Failed = "FAILED",
  InProgress = "IN_PROGRESS",
  PartialSuccess = "PARTIAL_SUCCESS",
  Succeeded = "SUCCEEDED",
}

/**
 * Information about an input document
 *
 * See: https://docs.aws.amazon.com/textract/latest/dg/API_DocumentMetadata.html
 */
export interface ApiDocumentMetadata {
  Pages: number;
}

/**
 * Result data from an Amazon Textract expense analysis
 *
 * See: https://docs.aws.amazon.com/textract/latest/dg/API_AnalyzeExpense.html
 *
 * (Async GetExpenseAnalysis results share the structure but add fields)
 */
export interface ApiAnalyzeExpenseResponse {
  DocumentMetadata: ApiDocumentMetadata;
  ExpenseDocuments: ApiExpenseDocument[];
}

/**
 * Result data from an Amazon Textract identity document analysis
 *
 * See: https://docs.aws.amazon.com/textract/latest/dg/API_AnalyzeID.html
 */
export interface ApiAnalyzeIdResponse {
  readonly AnalyzeIDModelVersion: string;
  DocumentMetadata: ApiDocumentMetadata;
  IdentityDocuments: ApiIdentityDocument[];
}

export interface ApiResponseWithContent {
  Blocks: ApiBlock[];
  DocumentMetadata: ApiDocumentMetadata;
}

/**
 * Result data from an Amazon Textract general document analysis
 *
 * See: https://docs.aws.amazon.com/textract/latest/dg/API_GetDocumentAnalysis.html
 */
export interface ApiAnalyzeDocumentResponse extends ApiResponseWithContent {
  AnalyzeDocumentModelVersion: string;
  HumanLoopActivationOutput?: {
    HumanLoopActivationConditionsEvaluationResults: string;
    HumanLoopActivationReasons: string[];
    HumanLoopArn: string;
  };
}

/**
 * Result data from an Amazon Textract text detection job (OCR only, no analysis)
 *
 * See: https://docs.aws.amazon.com/textract/latest/dg/API_GetDocumentTextDetection.html
 */
export interface ApiDetectDocumentTextResponse extends ApiResponseWithContent {
  DetectDocumentTextModelVersion: string;
}

export interface ApiAsyncJobOutputInProgress {
  JobStatus: "IN_PROGRESS";
  StatusMessage?: string; // If not completed
  Warnings?: [
    {
      ErrorCode: string;
      Pages: number[];
    },
  ];
}

/**
 * @deprecated Backward compatibility for typo: Please use ApiAsyncJobOutputInProgress
 */
export interface ApiAsyncJobOuputInProgress extends ApiAsyncJobOutputInProgress {}

/**
 * Shared fields reported for Get*Analysis APIs concerning asynchronous jobs
 */
interface ApiAsyncJobOutputStatus {
  JobStatus: "IN_PROGRESS" | "SUCCEEDED" | "FAILED" | "PARTIAL_SUCCESS";
  /**
   * When present, a continuation token to fetch the next section of the response
   *
   * In some cases this field can be present but set to null, as raised in
   * https://github.com/aws-samples/amazon-textract-response-parser/issues/154
   */
  NextToken?: string | null;
  StatusMessage?: string;
  Warnings?: ApiResultWarning[];
}

export interface ApiResultWarning {
  ErrorCode: string;
  Pages: number[];
}

export interface ApiAsyncJobOuputSucceded extends ApiResponseWithContent, ApiAsyncJobOutputStatus {
  JobStatus: "SUCCEEDED";
}

export interface ApiAsyncJobOutputPartialSuccess extends ApiResponseWithContent, ApiAsyncJobOutputStatus {
  JobStatus: "PARTIAL_SUCCESS";
}

export interface ApiAsyncJobOutputFailed extends ApiAsyncJobOutputStatus {
  JobStatus: "FAILED";
}

export type ApiAsyncDocumentAnalysis =
  | ApiAsyncJobOutputInProgress
  | ({ AnalyzeDocumentModelVersion: string } & (
      | ApiAsyncJobOutputFailed
      | ApiAsyncJobOutputPartialSuccess
      | ApiAsyncJobOuputSucceded
    ));

export type ApiAsyncDocumentTextDetection =
  | ApiAsyncJobOutputInProgress
  | ({ DetectDocumentTextModelVersion: string } & (
      | ApiAsyncJobOutputFailed
      | ApiAsyncJobOutputPartialSuccess
      | ApiAsyncJobOuputSucceded
    ));

export type ApiResponsePage =
  | ApiAnalyzeDocumentResponse
  | ApiAsyncDocumentAnalysis
  | ApiAsyncDocumentTextDetection
  | ApiDetectDocumentTextResponse;

export type ApiResponsePages =
  | ApiAnalyzeDocumentResponse[]
  | ApiAsyncDocumentAnalysis[]
  | ApiAsyncDocumentTextDetection[]
  | ApiDetectDocumentTextResponse[];


================================================
FILE: src-js/src/api-models/table.ts
================================================
/**
 * Table analysis Textract API models used by the Textract Response Parser.
 *
 * This file collects types/interfaces specific to the tables structure analysis functionality in
 * general document analysis:
 * https://docs.aws.amazon.com/textract/latest/dg/how-it-works-tables.html
 */
// Local Dependencies:
import {
  ApiBlockBase,
  ApiBlockType,
  ApiChildRelationship,
  ApiMergedCellRelationship,
  ApiTableFooterRelationship,
  ApiTableTitleRelationship,
} from "./base";
import { ApiGeometry } from "./geometry";

/**
 * Enumeration of EntityTypes supported for top-level TABLE objects
 *
 * TODO: Haven't seen a clear definition yet of the deliniation between these
 */
export const enum ApiTableEntityType {
  StructuredTable = "STRUCTURED_TABLE",
  SemiStructuredTable = "SEMI_STRUCTURED_TABLE",
}

/**
 * Block representing an overall table with extracted structure
 */
export interface ApiTableBlock extends ApiBlockBase {
  BlockType: ApiBlockType.Table;
  /**
   * 0-100 based confidence of the table identification model (separate from OCR content conf)
   */
  Confidence: number;
  /**
   * If present, defines whether this is a structured or semi-structured table
   */
  EntityTypes?: ApiTableEntityType[];
  Geometry: ApiGeometry;
  /**
   * Links to (merged or underlying) cells, titles, or footers contained within this table
   *
   * CHILD relationships point to underlying cells; MERGED_CELL to merged cells; TABLE_FOOTER and
   * TABLE_TITLE to footers and titles respectively.
   */
  Relationships: Array<
    ApiChildRelationship | ApiMergedCellRelationship | ApiTableFooterRelationship | ApiTableTitleRelationship
  >;
}

/**
 * Enumeration of EntityTypes supported for individual table cells
 */
export const enum ApiTableCellEntityType {
  Title = "TABLE_TITLE",
  Footer = "TABLE_FOOTER",
  SectionTitle = "TABLE_SECTION_TITLE",
  ColumnHeader = "COLUMN_HEADER",
  Summary = "TABLE_SUMMARY",
}

/**
 * Block representing an *underlying* cell before consideration of merged cells
 *
 * CELL blocks ignore any merged cell structure, treating the table as a uniform grid. For a more
 * human-like representation, you probably want to check for any linked MERGED_CELL entries first.
 */
export interface ApiCellBlock extends ApiBlockBase {
  BlockType: ApiBlockType.Cell;
  /**
   * 1-based index of the table column for this underlying cell
   */
  ColumnIndex: number;
  /**
   * This property is always = 1 as CELL blocks ignore merged cell structure
   */
  ColumnSpan: 1;
  /**
   * 0-100 based confidence of the table structure model (separate from OCR content confidence)
   */
  Confidence: number;
  /**
   * Metadata tagging the type of cell e.g. column header, summary, etc.
   */
  EntityTypes?: ApiTableCellEntityType[];
  Geometry: ApiGeometry; // Should always be present for CELL blocks
  /**
   * Content items (e.g. WORD, SELECTION_ELEMENT) within this underlying cell
   */
  Relationships?: ApiChildRelationship[];
  /**
   * 1-based index of the table row for this underlying cell
   */
  RowIndex: number;
  /**
   * This property is always = 1 as CELL blocks ignore merged cell structure
   */
  RowSpan: 1;
}

/**
 * Block representing a merged cell spanning multiple rows/columns of the underlying structure
 */
export interface ApiMergedCellBlock extends ApiBlockBase {
  BlockType: ApiBlockType.MergedCell;
  /**
   * 1-based index of the starting column for this merged cell
   */
  ColumnIndex: number;
  /**
   * Number of underlying table columns this cell covers
   */
  ColumnSpan: number;
  /**
   * 0-100 based confidence of the table structure model (separate from OCR content confidence)
   */
  Confidence: number;
  /**
   * Metadata tagging the type of cell e.g. column header, summary, etc.
   */
  EntityTypes?: ApiTableCellEntityType[];
  Geometry: ApiGeometry; // Should always be present for MERGED_CELL blocks
  /**
   * Underlying CELL Blocks covered by this merged cell.
   */
  Relationships: ApiChildRelationship[];
  /**
   * 1-based index of the table row for this underlying cell
   */
  RowIndex: number;
  /**
   * Number of underlying table rows this cell covers
   */
  RowSpan: number;
}

/**
 * Block representing a trailing/footer caption associated with a table
 */
export interface ApiTableFooterBlock extends ApiBlockBase {
  BlockType: ApiBlockType.TableFooter;
  /**
   * 0-100 based confidence of the table structure model (separate from OCR content confidence)
   */
  Confidence: number;
  Geometry: ApiGeometry; // Should always be present for TABLE_FOOTER blocks
  /**
   * As far as I can tell, TABLE_FOOTER blocks always link directly to WORD children (not LINE)
   */
  readonly Relationships: ApiChildRelationship[];
}

/**
 * Block representing a leading/header caption associated with a table
 */
export interface ApiTableTitleBlock extends ApiBlockBase {
  BlockType: ApiBlockType.TableTitle;
  /**
   * 0-100 based confidence of the table structure model (separate from OCR content confidence)
   */
  Confidence: number;
  Geometry: ApiGeometry; // Should always be present for TABLE_TITLE blocks
  /**
   * As far as I can tell, TABLE_TITLE blocks always link directly to WORD children (not LINE)
   */
  readonly Relationships: ApiChildRelationship[];
}


================================================
FILE: src-js/src/base.ts
================================================
/**
 * Common shared utilities, interfaces, etc.
 */

// Local Dependencies:
import { ApiBlockType, ApiRelationshipType } from "./api-models/base";
import { ApiBlock } from "./api-models/document";
import { ApiDocumentMetadata } from "./api-models/response";

/**
 * Generic typing for a concrete class constructor to support TypeScript Mixins pattern
 *
 * (could use `abstract new` to type abstract base clasess)
 *
 * See: https://www.typescriptlang.org/docs/handbook/mixins.html
 */
export type Constructor<T> = new (...args: any[]) => T; // eslint-disable-line @typescript-eslint/no-explicit-any

/**
 * Base class for all classes which wrap over an actual Textract API object.
 *
 * Exposes the underlying object for access as `dict`.
 */
export class ApiObjectWrapper<T> {
  _dict: T;

  constructor(dict: T) {
    this._dict = dict;
  }

  /**
   * Raw underlying Amazon Textract API object that this parsed item wraps
   */
  get dict(): T {
    return this._dict;
  }
}

/**
 * Basic properties exposed by all classes which wrap over a Textract API `Block` object.
 */
export interface IApiBlockWrapper<T extends ApiBlock> {
  /**
   * Raw underlying Amazon Textract API `Block` object that this parsed item wraps
   */
  get dict(): T;
  /**
   * Unique ID of the underlying Amazon Textract API `Block` object that this parsed item wraps
   */
  get id(): string;
  /**
   * Type of underlying Amazon Textract API `Block` object that this parsed item wraps
   */
  get blockType(): ApiBlockType;
  /**
   * Dynamic accessor for the unique Block IDs of all CHILD relationships from this Block
   */
  get childBlockIds(): string[];
  /**
   * Fetch the unique Block IDs of this block's `Relationships`, filtered by type(s)
   * @param relType Only keep IDs corresponding to relations of this type (or list of types)
   */
  relatedBlockIdsByRelType(relType: ApiRelationshipType | ApiRelationshipType[]): string[];
}

export interface IWithText {
  /**
   * Return the text content of this element (and any child content)
   *
   * Unlike `.str()`, this includes only the actual text content and no semantic information.
   */
  get text(): string;
}

/**
 * Configurations for filtering rendering (e.g. HTML) by block type
 *
 * This interface is designed for general compatibility with `IBlockTypeFilterOpts`, but where the
 * concept of `onUnexpectedBlockType` may not be applicable.
 */
export interface IRenderOpts {
  /**
   * *Only* render blocks of the given type(s)
   *
   * By default, all blocks are returned unless otherwise documented. If you specify this filter,
   * you probably want to include *at least* ApiBlockType.Line and ApiBlockType.Word!
   */
  includeBlockTypes?: ApiBlockType | ApiBlockType[] | Set<ApiBlockType> | null;
  /**
   * Block types to omit from the results
   */
  skipBlockTypes?: ApiBlockType[] | Set<ApiBlockType> | null;
}

/**
 * Convenience method to check whether a block type filter spec allows a particular BlockType
 *
 * Useful for handling un-normalized arguments where e.g. the whole filter spec may be null, or the
 * specifiers haven't been normalized to `Set`s.
 *
 * @param filterOpts An (un-normalized) filter specification for methods like listContent
 * @param blockType The block type of interest
 * @returns false if the block type is explicitly disallowed by skip or include rules, else true
 */
export function doesFilterAllowBlockType(
  filterOpts: IRenderOpts | null | undefined,
  blockType: ApiBlockType,
): boolean {
  if (!filterOpts) return true;
  if (filterOpts.skipBlockTypes) {
    const skipBlockTypes = normalizeOptionalSet(filterOpts.skipBlockTypes);
    if (skipBlockTypes.has(blockType)) return false;
  }
  if (filterOpts.includeBlockTypes) {
    const includeBlockTypes = normalizeOptionalSet(filterOpts.includeBlockTypes);
    if (!includeBlockTypes.has(blockType)) return false;
  }
  return true;
}

export interface IRenderable extends IWithText {
  /**
   * Return a best-effort semantic HTML representation of this element and its content
   *
   * @param opts Optional configuration for filtering rendering to certain content types
   */
  html(opts?: IRenderOpts): string;

  // TODO: Add Markdown options in future?

  /**
   * Return a text representation of this element and its content
   *
   * Unlike `.text`, this may include additional characters to try and communicate the type of the
   * element for an overall representation of a page.
   */
  str(): string;
}

/**
 * Base for classes which wrap over a Textract API 'Block' object.
 */
export class ApiBlockWrapper<T extends ApiBlock> extends ApiObjectWrapper<T> implements IApiBlockWrapper<T> {
  get id(): string {
    return this._dict.Id;
  }

  get blockType(): ApiBlockType {
    return this._dict.BlockType;
  }

  get childBlockIds(): string[] {
    return this.relatedBlockIdsByRelType(ApiRelationshipType.Child);
  }

  relatedBlockIdsByRelType(relType: ApiRelationshipType | ApiRelationshipType[]): string[] {
    const isMultiType = Array.isArray(relType);
    let ids: string[] = [];
    (this._dict.Relationships || []).forEach((rs) => {
      if (isMultiType) {
        if (relType.indexOf(rs.Type) >= 0) {
          ids = ids.concat(rs.Ids);
        }
      } else {
        if (rs.Type === relType) {
          ids = ids.concat(rs.Ids);
        }
      }
    });
    return ids;
  }
}

/**
 * Parsed TRP object representing a document metadata descriptor from a Textract API result
 *
 * You'll usually create this via `TextractDocument`, `TextractExpense`, `TextractIdentity`
 * classes, etc - rather than directly.
 */
export class DocumentMetadata extends ApiObjectWrapper<ApiDocumentMetadata> {
  /**
   * Number of pages in the document, according to the Amazon Textract DocumentMetadata field
   */
  get nPages(): number {
    return this._dict?.Pages || 0;
  }
}

/**
 * Configuration options for iterating nested lists
 */
export interface INestedListOpts {
  /**
   * Include nested children (true) or top-level items only (false)
   */
  deep?: boolean;
}

/**
 * Utility function to create an iterable from a collection
 *
 * Input is a collection *fetching function*, rather than a direct collection, in case a user
 * re-uses the iterable after the parent object is mutated. For example:
 *
 * @example
 * const iterWords = line.iterWords(); // Implemented with getIterable(() => this._words)
 * let words = [...iterWords];
 * line._words = [];
 * let words = [...iterWords]; // Should return [] as expected
 */
export function getIterable<T>(collectionFetcher: () => T[]): Iterable<T> {
  const getIterator = (): Iterator<T> => {
    const collection = collectionFetcher();
    let ixItem = 0;
    return {
      next: (): IteratorResult<T> => {
        return ixItem < collection.length
          ? {
              done: false,
              value: collection[ixItem++],
            }
          : {
              done: true,
              value: undefined,
            };
      },
    };
  };
  return {
    [Symbol.iterator]: getIterator,
  };
}

/**
 * Configuration options for escaping text for HTML
 */
export interface IEscapeHtmlOpts {
  /**
   * Set true if escaping within an element attribute <el attr="...">
   *
   * For standard text nodes, there's no need to escape single or double quotes
   * @default false;
   */
  forAttr?: boolean;
}

/**
 * Escape a document text string for use in HTML (TextNodes only by default)
 * @param str Raw text to be escaped
 * @returns Escaped string ready to be used in a HTML document
 */
export function escapeHtml(str: string, { forAttr = false }: IEscapeHtmlOpts = {}): string {
  return str.replace(
    forAttr ? /[&<>'"]/g : /[&<>]/g,
    (entity) =>
      ({
        "&": "&amp;",
        "<": "&lt;",
        ">": "&gt;",
        "'": "&#39;",
        '"': "&quot;",
      })[entity] as string,
  );
}

/**
 * Configuration options for indenting text
 */
export interface IIndentOpts {
  /**
   * The character/string that should be used to indent text.
   *
   * We default to 1x tab (rather than e.g. 2x spaces) to minimize token count for LLM use-cases
   *
   * @default "\t"
   */
  character?: string;
  /**
   * The number of times the indent `character` should be repeated.
   *
   * We default to 1x tab (rather than e.g. 2x spaces) to minimize token count for LLM use-cases
   *
   * @default 1
   */
  count?: number;
  /**
   * Whether indentation should also be applied to empty & whitespace-only lines.
   * @default false
   */
  includeEmptyLines?: boolean;
  /**
   * Set true to skip the first line of text when applying indentation
   * @default false
   */
  skipFirstLine?: boolean;
}

/**
 * Indent all lines of `text` by a certain amount
 */
export function indent(
  text: string,
  { character = "\t", count = 1, includeEmptyLines = false, skipFirstLine = false }: IIndentOpts = {},
): string {
  const result = text.replace(includeEmptyLines ? /^/gm : /^(?!\s*$)/gm, character.repeat(count));
  if (skipFirstLine) {
    return result.substring(count * character.length);
  } else {
    return result;
  }
}

/**
 * Statistical methods for aggregating multiple scores/numbers into one representative value
 *
 * Different use-cases may wish to use different aggregations: For example summarizing OCR
 * confidence for a whole page or region based on the individual words/lines.
 */
export const enum AggregationMethod {
  GeometricMean = "GEOMEAN",
  Max = "MAX",
  Mean = "MEAN",
  Min = "MIN",
  Mode = "MODE",
}

/**
 * Get the most common value in an Iterable of numbers
 *
 * @returns The most common value, or null if `arr` was empty.
 */
export function modalAvg(arr: Iterable<number>): number | null {
  const freqs: { [key: number]: { value: number; freq: number } } = {};
  for (const item of arr) {
    if (freqs[item]) {
      ++freqs[item].freq;
    } else {
      freqs[item] = { value: item, freq: 1 };
    }
  }

  let maxFreq = 0;
  let mode: number | null = null;
  for (const item in freqs) {
    if (freqs[item].freq > maxFreq) {
      maxFreq = freqs[item].freq;
      mode = freqs[item].value;
    }
  }
  return mode;
}

/**
 * Summarize an Iterable of numbers using a statistic of your choice
 *
 * If `arr` is empty, this function will return `null`.
 */
export function aggregate(arr: Iterable<number>, aggMethod: AggregationMethod): number | null {
  // Altough some aggregations could process streaming-style, we'd need to implement zero-length
  // detection separately for each one. Therefore simplest method is just to extract arr as an
  // actual array up-front:
  const actualArr: Array<number> = Array.isArray(arr) ? arr : Array.from(arr);
  if (actualArr.length === 0) return null;

  if (aggMethod === AggregationMethod.GeometricMean) {
    // Performing arithmetic mean in logspace better numerically conditioned than just multiplying:
    return Math.exp(actualArr.reduce((acc, next) => acc + Math.log(next), 0) / actualArr.length);
  } else if (aggMethod === AggregationMethod.Max) {
    return Math.max(...actualArr);
  } else if (aggMethod === AggregationMethod.Mean) {
    return actualArr.reduce((acc, next) => acc + next, 0) / actualArr.length;
  } else if (aggMethod === AggregationMethod.Min) {
    return Math.min(...actualArr);
  } else if (aggMethod === AggregationMethod.Mode) {
    return modalAvg(actualArr);
  } else {
    throw new Error(`Unsupported aggMethod '${aggMethod}' not in allowed AggregationMethod enum`);
  }
}

/**
 * Extract the maximum value and the first index where it appears from an array of numbers
 *
 * If `arr` is empty or no elements are numeric, this function will return a value of `-Infinity`
 * and an index of `-1`.
 */
export function argMax(arr: number[]): { maxValue: number; maxIndex: number } {
  return arr.reduce(
    (state, nextVal, nextIx) => (nextVal > state.maxValue ? { maxValue: nextVal, maxIndex: nextIx } : state),
    { maxValue: -Infinity, maxIndex: -1 },
  );
}

/**
 * Possible actions to take when encountering a missing Block referenced in results
 *
 * "error" throws an error, "warn" logs a warning, and falsy values silently ignore.
 *
 * TODO: Could/should we introduce a callback option one day?
 */
export type ActionOnMissingBlock = "error" | "warn" | null;

/**
 * Possible actions to take when encountering an unexpected Block type in results
 *
 * "error" throws an error, "warn" logs a warning, and falsy values silently ignore.
 *
 * TODO: Could/should we introduce a callback option one day?
 */
export type ActionOnUnexpectedBlockType = "error" | "warn" | null;

/**
 * Configuration options for filtering collections of Textract API "Block"s by type
 */
export interface IBlockTypeFilterOpts {
  /**
   * Only return API Blocks of the given type(s)
   *
   * By default, all blocks are returned unless otherwise documented.
   */
  includeBlockTypes?: ApiBlockType | ApiBlockType[] | Set<ApiBlockType> | null;
  /**
   * Action to take on encountering a Block of unexpected BlockType
   *
   * Set "error" to throw an error, "warn" to log a warning, or falsy to skip silently.
   */
  onUnexpectedBlockType?: ActionOnUnexpectedBlockType;
  /**
   * Block types to silently skip/ignore in the results
   */
  skipBlockTypes?: ApiBlockType[] | Set<ApiBlockType> | null;
}

/**
 * Configuration options for handling data inconsistency in underlying Textract results
 */
export interface IMissingBlockOpts {
  /**
   * Action to take on encountering a Block of unexpected BlockType
   *
   * Set "error" to throw an error, "warn" to log a warning, or null/falsy to skip silently.
   */
  onMissingBlockId?: ActionOnMissingBlock;
}

/**
 * Normalize an optional Set-like or individual-object parameter to a Set
 */
export function normalizeOptionalSet<T, TArg extends T | T[] | Set<T> | null | undefined>(
  raw: TArg,
): T extends null ? null : T extends undefined ? undefined : Set<T>;
export function normalizeOptionalSet<T>(raw: T | T[] | Set<T> | null | undefined): Set<T> | null {
  if (raw instanceof Set) return raw;
  if (raw === null || typeof raw === "undefined") return null;
  if (Array.isArray(raw)) return new Set(raw);
  return new Set([raw]);
}

/**
 * Polyfill for Set.intersection() which is not available in all our target runtimes
 *
 * See: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Set/intersection
 */
export function setIntersection<T>(a: Set<T>, b: Set<T>) {
  const result = new Set(a);
  for (const entry of result) {
    if (!b.has(entry)) result.delete(entry);
  }
  return result;
}

/**
 * Polyfill for Set.union() which is not available in all our target runtimes
 *
 * See: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Set/union
 */
export function setUnion<T>(a: Set<T>, b: Set<T>) {
  const result = new Set(a);
  for (const entry of b) {
    result.add(entry);
  }
  return result;
}

/**
 * Interface for a (TextractDocument-like) object that can query Textract Blocks
 *
 * Unlike `IBlockManager` (below), implementers of `IDocBlocks` can only query underlying API Block
 * objects - and not their associated parsed TRP items.
 *
 * This interface is used to avoid circular references in child classes which need to reference some
 * TextractDocument-like parent, before the actual TextractDocument class is defined.
 */
export interface IDocBlocks {
  /**
   * Retrieve an underlying Amazon Textract API `Block` response object by its unique ID
   */
  getBlockById: { (blockId: string): ApiBlock | undefined };
  /**
   * List all underlying Amazon Textract API `Block` objects managed by this parser
   */
  listBlocks: { (): ApiBlock[] };
}

/**
 * Interface for a (Page-like) object that can query Textract Blocks and their parsed wrapper items
 *
 * This interface extends `IDocBlocks` to also support looking up parsed TRP items by underlying
 * `Block` ID. It's used to avoid circular references in child classes which need to reference some
 * Page-like parent, before the actual Page class is defined.
 */
export interface IBlockManager extends IDocBlocks {
  /**
   * Return a parsed TRP.js object corresponding to an API Block
   *
   * The return value is *nearly* always some subtype of `ApiBlockWrapper`, except that for form
   * fields we return the overall `Field` object instead of the `FieldKey`.
   *
   * @param blockId Unique ID of the API Block for which a parsed object should be fetched
   * @param allowBlockTypes Optional restriction on acceptable ApiBlockType(s) to return
   * @throws If no parsed object exists for the block ID, or it doesn't match `allowBlockTypes`
   */
  getItemByBlockId(
    blockId: string,
    allowBlockTypes?: ApiBlockType | ApiBlockType[] | null,
  ): IApiBlockWrapper<ApiBlock>;
  /**
   * Register a newly parsed ApiBlockWrapper for a particular block ID
   *
   * In cases where a BlockManager devolves parsing certain block types down to an intermediate
   * layer (e.g. QueryInstance parsing related QueryResult blocks) - the lower parser should use
   * this function to register the created items with the block manager to allow later retrieval.
   *
   * @param blockId Unique ID of the API Block for which a parsed object should be registered
   * @param allowBlockTypes Optional restriction on acceptable ApiBlockType(s) to return
   * @throws If no parsed object exists for the block ID, or it doesn't match `allowBlockTypes`
   */
  registerParsedItem(blockId: string, item: IApiBlockWrapper<ApiBlock>): void;
}

/**
 * Interface for objects that track a reference to the Page on which they're defined
 */
export interface IWithParentPage<TPage extends IBlockManager> {
  /**
   * Parsed TRP.js `Page` that this object is a member of
   */
  parentPage: TPage;
}

/**
 * Interface for usually (API block wrapper) objects that can traverse related parsed objects
 *
 * TODO: Should we enforce/guarantee related items are also `I{Hosted?}ApiBlockWrapper`s?
 */
export interface IWithRelatedItems<TRelated extends IApiBlockWrapper<ApiBlock>> {
  /**
   * Iterate through directly related Blocks' parsed wrapper items, with optional filters
   *
   * This low-level method traverses the `Relationships` of the wrapped block, but looks up the
   * linked block IDs to return the actual parsed wrapper items for each target - since that's
   * usually what you'll want to work with anyway.
   *
   * @param relType Type(s) of relationships to consider
   * @param opts Options for filtering the returned items
   */
  iterRelatedItemsByRelType(
    relType: ApiRelationshipType | ApiRelationshipType[],
    opts?: IBlockTypeFilterOpts,
  ): Iterable<TRelated>;

  /**
   * List directly related Blocks' parsed wrapper items, with optional filters
   *
   * This low-level method traverses the `Relationships` of the wrapped block, but looks up the
   * linked block IDs to return the actual parsed wrapper items for each target - since that's
   * usually what you'll want to work with anyway.
   *
   * @param relType Type(s) of relationships to consider
   * @param opts Options for filtering the returned items
   */
  listRelatedItemsByRelType(
    relType: ApiRelationshipType | ApiRelationshipType[],
    opts?: IBlockTypeFilterOpts,
  ): TRelated[];
}

/**
 * INTERNAL shared implementation for iterRelatedBlocksByRelType
 *
 * Iterate through Blocks related to `me` by a given `relType`, optionally filtering related blocks
 * by type.
 *
 * @param relType The type of relationship to iterate through
 * @param opts Filtering options and unexpected/missing block behaviour config
 * @param me Block wrapper from which relations should be traversed
 * @param host IDocBlocks that can be used to look up related blocks
 */
export function _implIterRelatedBlocksByRelType(
  relType: ApiRelationshipType | ApiRelationshipType[],
  {
    includeBlockTypes = null,
    onMissingBlockId = "error",
    onUnexpectedBlockType = null,
    skipBlockTypes = null,
  }: IBlockTypeFilterOpts & IMissingBlockOpts = {},
  me: IApiBlockWrapper<ApiBlock>,
  host: IDocBlocks,
): Iterable<ApiBlock> {
  // Normalize optional set parameters:
  includeBlockTypes = normalizeOptionalSet(includeBlockTypes);
  skipBlockTypes = normalizeOptionalSet(skipBlockTypes);

  const getIterator = (): Iterator<ApiBlock> => {
    const blockIds = me.relatedBlockIdsByRelType(relType);
    let ixItem = 0;
    return {
      next: (): IteratorResult<ApiBlock> => {
        while (ixItem < blockIds.length) {
          const blockId = blockIds[ixItem++];
          // TODO: Directly support IBlockTypeFilterOpts in getItemByBlockId maybe?
          const block = host.getBlockById(blockId);
          if (!block) {
            if (!onMissingBlockId) continue;
            const msg = `(While iterating ${relType} relations of parent ${me.id}) Referenced block ID ${blockId} not found!`;
            if (onMissingBlockId === "warn") {
              console.warn(msg);
              continue;
            } else {
              throw new Error(msg);
            }
          }
          if (skipBlockTypes && skipBlockTypes.has(block.BlockType)) continue;
          if (includeBlockTypes && !includeBlockTypes.has(block.BlockType)) {
            if (!onUnexpectedBlockType) continue;
            const msg = `(While iterating ${relType} relations of parent ${me.id}) Found unexpected block ID ${blockId} of type ${block.BlockType} not in set ${Array.from(includeBlockTypes)}`;
            if (onUnexpectedBlockType === "warn") {
              console.warn(msg);
              continue;
            } else {
              throw new Error(msg);
            }
          }
          return {
            done: false,
            value: block,
          };
        }
        return {
          done: true,
          value: undefined,
        };
      },
    };
  };
  return {
    [Symbol.iterator]: getIterator,
  };
}

/**
 * INTERNAL shared implementation for iterRelatedItemsByRelType
 *
 * Most block wrappers will inherit this method via `PageHostedApiBlockWrapper`, but `Page` itself
 * cannot: Because currently `Page` is the block manager for its own block. This layer lets us
 * provide the `IWithRelatedItems` interface on `Page` without duplication.
 *
 * TODO: Maybe we should make Page more like a regular block, and TextractDocument the manager?
 *
 * @param relType The type of relationship to iterate through
 * @param opts Filtering options and unexpected/missing block behaviour config
 * @param me Block wrapper from which relations should be traversed
 * @param host IBlockManager that can be used to look up related blocks + parsed items
 */
export function _implIterRelatedItemsByRelType(
  relType: ApiRelationshipType | ApiRelationshipType[],
  opts: IBlockTypeFilterOpts & IMissingBlockOpts = {},
  me: IApiBlockWrapper<ApiBlock>,
  host: IBlockManager,
): Iterable<IApiBlockWrapper<ApiBlock>> {
  const baseIterable = _implIterRelatedBlocksByRelType(relType, opts, me, host);
  const getIterator = (): Iterator<IApiBlockWrapper<ApiBlock>> => {
    const baseIterator = baseIterable[Symbol.iterator]();
    return {
      next: (): IteratorResult<IApiBlockWrapper<ApiBlock>> => {
        const nextResult = baseIterator.next();
        if (nextResult.done) return nextResult;
        return {
          done: false,
          value: host.getItemByBlockId(nextResult.value.Id, nextResult.value.BlockType),
        };
      },
    };
  };
  return {
    [Symbol.iterator]: getIterator,
  };
}

/**
 * INTERNAL shared implementation for listRelatedBlocksByRelType
 *
 * List Blocks related to `me` by a given `relType`, optionally filtering related blocks by type.
 *
 * @param relType The type of relationship to list targets for
 * @param opts Filtering options and unexpected/missing block behaviour config
 * @param me Block wrapper from which relations should be traversed
 * @param host IDocBlocks that can be used to look up related blocks
 */
export function _implListRelatedBlocksByRelType(
  relType: ApiRelationshipType | ApiRelationshipType[],
  {
    includeBlockTypes = null,
    onMissingBlockId = "error",
    onUnexpectedBlockType = null,
    skipBlockTypes = null,
  }: IBlockTypeFilterOpts & IMissingBlockOpts = {},
  me: IApiBlockWrapper<ApiBlock>,
  host: IDocBlocks,
): ApiBlock[] {
  // Normalize optional set parameters:
  includeBlockTypes = normalizeOptionalSet(includeBlockTypes);
  skipBlockTypes = normalizeOptionalSet(skipBlockTypes);

  // Retrieve initial list & check blocks all exist:
  let blocks: ApiBlock[] = [];
  for (const blockId of me.relatedBlockIdsByRelType(relType)) {
    const block = host.getBlockById(blockId);
    if (!block) {
      if (!onMissingBlockId) continue;
      const msg = `(While iterating ${relType} relations of parent ${me.id}) Referenced block ID ${blockId} not found!`;
      if (onMissingBlockId === "warn") {
        console.warn(msg);
        continue;
      } else {
        throw new Error(msg);
      }
    }
    blocks.push(block);
  }

  // Apply block type filters:
  if (skipBlockTypes) blocks = blocks.filter((block) => !skipBlockTypes.has(block.BlockType));
  if (includeBlockTypes) {
    blocks = blocks.filter((block) => {
      if (includeBlockTypes.has(block.BlockType)) return true;
      if (!onUnexpectedBlockType) return false;
      // Otherwise need to take action on this unexpected block:
      const msg = `(While iterating ${relType} relations of parent ${me.id}) Found unexpected block ID ${block.Id} of type ${block.BlockType} not in set ${Array.from(includeBlockTypes)}`;
      if (onUnexpectedBlockType === "warn") {
        console.warn(msg);
        return false;
      } else {
        throw new Error(msg);
      }
    });
  }
  return blocks;
}

/**
 * INTERNAL shared implementation for listRelatedItemsByRelType
 *
 * Most block wrappers will inherit this method via `PageHostedApiBlockWrapper`, but `Page` itself
 * cannot: Because currently `Page` is the block manager for its own block. This layer lets us
 * provide the `IWithRelatedItems` interface on `Page` without duplication.
 *
 * TODO: Maybe we should make Page more like a regular block, and TextractDocument the manager?
 *
 * @param relType The type of relationship to list targets for
 * @param opts Filtering options and unexpected/missing block behaviour config
 * @param me Block wrapper from which relations should be traversed
 * @param host IBlockManager that can be used to look up related blocks + parsed items
 */
export function _implListRelatedItemsByRelType(
  relType: ApiRelationshipType | ApiRelationshipType[],
  opts: IBlockTypeFilterOpts & IMissingBlockOpts = {},
  me: IApiBlockWrapper<ApiBlock>,
  host: IBlockManager,
): IApiBlockWrapper<ApiBlock>[] {
  return _implListRelatedBlocksByRelType(relType, opts, me, host).map((block) =>
    host.getItemByBlockId(block.Id, block.BlockType),
  );
}

/**
 * Base interface for classes which wrap over a Textract API `Block` *and* are parent doc/page-aware
 *
 * Holding a reference to the hosting page/document allows direct lookup of related parsed objects.
 */
export interface IHostedApiBlockWrapper<TBlock extends ApiBlock, TPage extends IBlockManager>
  extends ApiBlockWrapper<TBlock>,
    IWithParentPage<TPage>,
    IWithRelatedItems<IApiBlockWrapper<ApiBlock>> {}

/**
 * Base class for an item parser wrapping Textract `Block` object, that tracks its parent page
 *
 * Items derived from this base automatically register themselves with the parent page on construct
 */
export class PageHostedApiBlockWrapper<TBlock extends ApiBlock, TPage extends IBlockManager>
  extends ApiBlockWrapper<TBlock>
  implements IHostedApiBlockWrapper<TBlock, TPage>
{
  _parentPage: TPage;

  constructor(dict: TBlock, parentPage: TPage) {
    super(dict);
    this._parentPage = parentPage;
    parentPage.registerParsedItem(dict.Id, this);
  }

  get parentPage(): TPage {
    return this._parentPage;
  }

  iterRelatedItemsByRelType(
    relType: ApiRelationshipType | ApiRelationshipType[],
    opts: IBlockTypeFilterOpts = {},
  ): Iterable<IApiBlockWrapper<ApiBlock>> {
    return _implIterRelatedItemsByRelType(relType, opts, this, this.parentPage);
  }

  listRelatedItemsByRelType(
    relType: ApiRelationshipType | ApiRelationshipType[],
    opts: IBlockTypeFilterOpts = {},
  ): IApiBlockWrapper<ApiBlock>[] {
    return _implListRelatedItemsByRelType(relType, opts, this, this.parentPage);
  }
}


================================================
FILE: src-js/src/content.ts
================================================
/**
 * TRP classes for (generic document) low-level content objects
 */

// Local Dependencies:
import { ApiBlockType, ApiRelationshipType } from "./api-models/base";
import {
  ApiLineBlock,
  ApiSelectionElementBlock,
  ApiSelectionStatus,
  ApiSignatureBlock,
  ApiTextType,
  ApiWordBlock,
} from "./api-models/content";
import { ApiBlock } from "./api-models/document";
import {
  ActionOnUnexpectedBlockType,
  ApiBlockWrapper,
  Constructor,
  doesFilterAllowBlockType,
  escapeHtml,
  IApiBlockWrapper,
  IBlockManager,
  IBlockTypeFilterOpts,
  IHostedApiBlockWrapper,
  IRenderable,
  IRenderOpts,
  IWithParentPage,
  IWithText,
  normalizeOptionalSet,
  PageHostedApiBlockWrapper,
  setIntersection,
  setUnion,
} from "./base";
import { Geometry, IWithGeometry } from "./geometry";

/**
 * TRP.js parsed object for an individual word of text
 *
 * Wraps an Amazon Textract `WORD` block in the underlying API response.
 */
export class Word
  extends ApiBlockWrapper<ApiWordBlock>
  implements IRenderable, IWithGeometry<ApiWordBlock, Word>
{
  _geometry: Geometry<ApiWordBlock, Word>;

  constructor(block: ApiWordBlock) {
    super(block);
    this._geometry = new Geometry(block.Geometry, this);
  }

  /**
   * 0-100 based confidence of the OCR model in extracting the text of this word
   */
  get confidence(): number {
    return this._dict.Confidence;
  }
  set confidence(newVal: number) {
    this._dict.Confidence = newVal;
  }
  /**
   * Position of the word on the input image / page
   */
  get geometry(): Geometry<ApiWordBlock, Word> {
    return this._geometry;
  }
  /**
   * Text extracted by the OCR model
   */
  get text(): string {
    return this._dict.Text;
  }
  /**
   * Whether the text appears hand-written or computer-generated
   */
  get textType(): ApiTextType {
    return this._dict.TextType;
  }
  set textType(newVal: ApiTextType) {
    this._dict.TextType = newVal;
  }

  /**
   * The semantic `html()` representation of a `Word` is just the (HTML-escaped) text
   */
  html(opts?: IRenderOpts): string {
    if (!doesFilterAllowBlockType(opts, this.blockType)) return "";
    return escapeHtml(this.text);
  }

  /**
   * The basic human-readable `str()` representation of a `Word` is just the `.text`
   */
  str(): string {
    return this.text;
  }
}

/**
 * Interface for objects that have child items representing actual "content"
 *
 * Typically used for containers of low-level content items like `Word`s and `SelectionElement`s.
 * In some cases (like Layout), higher-level containers (of items like `LINE` may use the same
 * interface.
 *
 * For objects guaranteed to contain only `Word`s (no `SelectionElement`s), prefer `IWithWords`
 * instead.
 */
export interface IWithContent<TContent extends IApiBlockWrapper<ApiBlock> & IRenderable> extends IWithText {
  /**
   * Number of content items in this object
   */
  get nContentItems(): number;
  /**
   * Return the text content of this element, with additional options
   *
   * Unlike the plain `.text` property, this method supports filtering which block types are
   * included and controlling behaviour when unexpected ones are encountered.
   *
   * @param opts Optional configuration for filtering rendering to certain content types
   */
  getText(opts?: IBlockTypeFilterOpts): string;
  /**
   * Iterate through the Content items in this object
   *
   * Optionally filter certain content Block types by specifying `opts`
   *
   * @example
   * for (const item of cell.iterContent()) {
   *   console.log(item.text);
   * }
   * @example
   * [...cell.iterContent({ skipBlockTypes: [ApiBlockType.SelectionElement] })].forEach(
   *   (item) => console.log(item.text)
   * );
   */
  iterContent(opts?: IBlockTypeFilterOpts): Iterable<TContent>;
  /**
   * List the Content items in this object
   */
  listContent(opts?: IBlockTypeFilterOpts): Array<TContent>;
}

/**
 * Configuration options for WithContent mixin (see `buildWithContent`)
 */
export interface IWithContentMixinOptions {
  /**
   * What types of direct Child Block to consider as "content"
   *
   * Defaults to [SELECTION_ELEMENT, SIGNATURE, WORD] as per `buildWithContent`
   */
  contentTypes?: ApiBlockType[];

  /**
   * Action to take on encountering a child Block of unexpected BlockType
   *
   * Set "error" to throw an error, "warn" to log a warning, or falsy to skip silently.
   */
  onUnexpectedBlockType?: ActionOnUnexpectedBlockType;

  /**
   * Other types of direct child block that are expected but non-content
   *
   * This is optional to specify, but setting it up will provide more useful behaviour when using
   * strict `onUnexpectedBlockType`s settings.
   */
  otherExpectedChildTypes?: ApiBlockType[] | null;
}

/**
 * Mixin factory for elements that have child Content (such as `Word`s and/or `SelectionElement`s)
 *
 * While it's possible to apply a TS mixin to a generic base class (with expressions like
 * `extends MyMixin(BaseClass)<TBaseArgs...>`), mixins cannot alter the base class' constructor
 * signature so they can't introduce additional type arguments (generic aspects) of their own. This
 * double-call mixin factory pattern provides a workaround *only* for cases where we're able to
 * specify the mixin type arguments at the point it's applied: Enabling a somewhat generic
 * definition of "content" that consumer classes can dictate.
 *
 * For objects guaranteed to contain only `Word` items, prefer `WithWords` instead.
 *
 * See: https://stackoverflow.com/a/48492205/13352657
 *
 * @param contentBlockTypes API block types to be included when listing child "Content". Set `[]`
 *    to disable this filter and preserve all items
 */
export function buildWithContent<TContent extends IApiBlockWrapper<ApiBlock> & IRenderable>({
  contentTypes = [ApiBlockType.SelectionElement, ApiBlockType.Signature, ApiBlockType.Word],
  onUnexpectedBlockType = null,
  otherExpectedChildTypes = null,
}: IWithContentMixinOptions = {}) {
  // (contentTypes cannot be `undefined` or null because of the default value)
  const contentTypesSet: Set<ApiBlockType> = new Set(contentTypes);
  const defaultOnUnexpected = onUnexpectedBlockType; // Need to rename because it'll be shadowed
  const otherTypesSet: Set<ApiBlockType> = otherExpectedChildTypes
    ? new Set(otherExpectedChildTypes)
    : new Set();
  /**
   * TypeScript mixin for a container `Block` wrapper whose `Child` blocks are actual content
   *
   * (For example a LINE of text, or a CELL/MERGED_CELL)
   *
   * @param SuperClass Base class to be extended with the mixin properties
   */
  return function WithContent<
    TBlock extends ApiBlock,
    TPage extends IBlockManager,
    T extends Constructor<IHostedApiBlockWrapper<TBlock, TPage>>,
  >(SuperClass: T) {
    return class extends SuperClass implements IWithContent<TContent> {
      getText(opts?: IBlockTypeFilterOpts) {
        if (!doesFilterAllowBlockType(opts, this.blockType)) return "";
        return this.listContent(opts)
          .map((c) => c.text)
          .join(" ");
      }

      iterContent({
        includeBlockTypes = null,
        onUnexpectedBlockType = defaultOnUnexpected,
        skipBlockTypes = null,
      }: IBlockTypeFilterOpts = {}): Iterable<TContent> {
        if (includeBlockTypes) {
          includeBlockTypes = normalizeOptionalSet(includeBlockTypes);
          includeBlockTypes = setIntersection(contentTypesSet, includeBlockTypes);
        } else {
          includeBlockTypes = contentTypesSet;
        }
        if (skipBlockTypes) {
          skipBlockTypes;
          skipBlockTypes = normalizeOptionalSet(skipBlockTypes);
          skipBlockTypes = setUnion(skipBlockTypes, otherTypesSet);
        } else {
          skipBlockTypes = otherTypesSet;
        }
        return this.iterRelatedItemsByRelType(ApiRelationshipType.Child, {
          includeBlockTypes,
          onUnexpectedBlockType,
          skipBlockTypes,
        }) as Iterable<TContent>;
      }

      listContent({
        includeBlockTypes = null,
        onUnexpectedBlockType = defaultOnUnexpected,
        skipBlockTypes = null,
      }: IBlockTypeFilterOpts = {}): Array<TContent> {
        if (includeBlockTypes) {
          includeBlockTypes = normalizeOptionalSet(includeBlockTypes);
          includeBlockTypes = setIntersection(contentTypesSet, includeBlockTypes);
        } else {
          includeBlockTypes = contentTypesSet;
        }
        if (skipBlockTypes) {
          skipBlockTypes;
          skipBlockTypes = normalizeOptionalSet(skipBlockTypes);
          skipBlockTypes = setUnion(skipBlockTypes, otherTypesSet);
        } else {
          skipBlockTypes = otherTypesSet;
        }
        return this.listRelatedItemsByRelType(ApiRelationshipType.Child, {
          includeBlockTypes,
          onUnexpectedBlockType,
          skipBlockTypes,
        }) as TContent[];
      }

      get nContentItems(): number {
        return this.listContent().length;
      }

      /**
       * A default text representation getter that concatenates child content separated by spaces
       *
       * Objects (like `Line`) that define their own representation of the overall text or need to
       * join content with something other than a space (like a newline) should override this.
       */
      get text(): string {
        return this.getText();
      }
    };
  };
}

/**
 * Configuration options for WithWords mixin
 */
export interface IWithWordsMixinOptions {
  /**
   * Action to take on encountering a child Block of unexpected BlockType
   *
   * Set "error" to throw an error, "warn" to log a warning, or falsy to skip silently.
   */
  onUnexpectedBlockType?: ActionOnUnexpectedBlockType;

  /**
   * Other types of direct child block that are expected but non-content
   *
   * This is optional to specify, but setting it up will provide more useful behaviour when using
   * strict `onUnexpectedBlockType`s settings.
   */
  otherExpectedChildTypes?: ApiBlockType[] | null;
}

/**
 * Interface for objects that have child `Word`s (such as LINEs of text)
 *
 * For objects that might contain `SelectionElement`s as well (such as table cells), use
 * `IWithContent` instead.
 */
export interface IWithWords extends IWithText {
  /**
   * Number of text Words in this object
   */
  get nWords(): number;
  /**
   * Return the text content of this element, with additional options
   *
   * Unlike the plain `.text` property, this method supports controlling error behaviour when
   * non-WORD blocks are encountered.
   *
   * @param opts Optional configuration for filtering rendering to certain content types
   */
  getText(opts?: IBlockTypeFilterOpts): string;
  /**
   * Iterate through the text `Word` items in this object
   *
   * Optionally control what happens when unexpected (non-Word) block types are encountered, by
   * specifying filter `opts`.
   *
   * @example
   * for (const word of line.iterWords()) {
   *   console.log(word.text);
   * }
   * @example
   * [...line.iterWords()].forEach(
   *   (word) => console.log(word.text)
   * );
   */
  iterWords(opts?: IBlockTypeFilterOpts): Iterable<Word>;
  /**
   * List the text `Word`s in this object
   *
   * Optionally control what happens when unexpected (non-Word) block types are encountered, by
   * specifying filter `opts`.
   */
  listWords(opts?: IBlockTypeFilterOpts): Word[];
  /**
   * Fetch a particular text `Word` in this object by index from 0 to `.nWords - 1`
   * @param ix 0-based index in the word list
   * @throws if the index is out of bounds
   */
  wordAtIndex(ix: number): Word;
}

/**
 * Mixin for page-hosted API block wrappers with CHILD relations to WORD objects
 *
 * Adds dynamic functionality to list and traverse the Word objects contained in the content, and a
 * basic implementation for getting the overall `.text`.
 *
 * @param SuperClass The class to extend
 * @param opts Configuration options for how to handle other (non-WORD) child blocks
 */
export function WithWords<
  TBlock extends ApiBlock,
  TPage extends IBlockManager,
  T extends Constructor<IHostedApiBlockWrapper<TBlock, TPage> & IWithParentPage<TPage>>,
>(
  SuperClass: T,
  { onUnexpectedBlockType = null, otherExpectedChildTypes = null }: IWithWordsMixinOptions = {},
) {
  const contentTypesSet = new Set([ApiBlockType.Word]);
  const defaultOnUnexpected = onUnexpectedBlockType; // Need to rename because it'll be shadowed
  const otherTypesSet: Set<ApiBlockType> = otherExpectedChildTypes
    ? new Set(otherExpectedChildTypes)
    : new Set();
  return class extends SuperClass implements IWithWords {
    getText(opts?: IBlockTypeFilterOpts) {
      if (!doesFilterAllowBlockType(opts, this.blockType)) return "";
      return this.listWords(opts)
        .map((w) => w.text)
        .join(" ");
    }

    iterWords({
      includeBlockTypes = null,
      onUnexpectedBlockType = defaultOnUnexpected,
      skipBlockTypes = null,
    }: IBlockTypeFilterOpts = {}): Iterable<Word> {
      if (includeBlockTypes) {
        includeBlockTypes = normalizeOptionalSet(includeBlockTypes);
        includeBlockTypes = setIntersection(contentTypesSet, includeBlockTypes);
      } else {
        includeBlockTypes = contentTypesSet;
      }
      if (skipBlockTypes) {
        skipBlockTypes;
        skipBlockTypes = normalizeOptionalSet(skipBlockTypes);
        skipBlockTypes = setUnion(skipBlockTypes, otherTypesSet);
      } else {
        skipBlockTypes = otherTypesSet;
      }
      return this.iterRelatedItemsByRelType(ApiRelationshipType.Child, {
        includeBlockTypes,
        onUnexpectedBlockType,
        skipBlockTypes,
      }) as Iterable<Word>;
    }

    listWords({
      includeBlockTypes = null,
      onUnexpectedBlockType = defaultOnUnexpected,
      skipBlockTypes = null,
    }: IBlockTypeFilterOpts = {}): Word[] {
      if (includeBlockTypes) {
        includeBlockTypes = normalizeOptionalSet(includeBlockTypes);
        includeBlockTypes = setIntersection(contentTypesSet, includeBlockTypes);
      } else {
        includeBlockTypes = contentTypesSet;
      }
      if (skipBlockTypes) {
        skipBlockTypes;
        skipBlockTypes = normalizeOptionalSet(skipBlockTypes);
        skipBlockTypes = setUnion(skipBlockTypes, otherTypesSet);
      } else {
        skipBlockTypes = otherTypesSet;
      }
      return this.listRelatedItemsByRelType(ApiRelationshipType.Child, {
        includeBlockTypes,
        onUnexpectedBlockType,
        skipBlockTypes,
      }) as Word[];
    }

    get nWords(): number {
      return this.listWords().length;
    }

    /**
     * A default text representation getter that concatenates child `Word`s separated by spaces
     *
     * Objects (like `Line`) that define their own representation of the overall text or need to
     * join words with something other than a space (like a newline) should override this.
     */
    get text(): string {
      return this.getText();
    }

    wordAtIndex(ix: number): Word {
      if (ix < 0) throw new Error(`Word index ${ix} must be >=0`);
      let ixCurr = 0;
      for (const word of this.iterWords()) {
        if (ixCurr === ix) return word;
        ++ixCurr;
      }
      throw new Error(`Word index ${ix} out of bounds for length ${ixCurr}`);
    }
  };
}

/**
 * Generic base class for a Line, as the parent Page is not defined here.
 *
 * If you're consuming this library, you probably just want to use `document.ts/Line`.
 */
export class LineGeneric<TPage extends IBlockManager>
  extends WithW
Download .txt
gitextract_tsv6824f/

├── .flake8
├── .github/
│   ├── PULL_REQUEST_TEMPLATE.md
│   └── workflows/
│       └── test_pull_request.yml
├── .idea/
│   ├── amazon-textract-response-parser.iml
│   ├── inspectionProfiles/
│   │   └── profiles_settings.xml
│   ├── misc.xml
│   ├── modules.xml
│   └── vcs.xml
├── CODE_OF_CONDUCT.md
├── CONTRIBUTING.md
├── LICENSE
├── README.md
├── src-csharp/
│   ├── LICENSE
│   ├── Program.cs
│   ├── README.md
│   ├── TextractExtensions.cs
│   ├── appsettings.json
│   └── parser.csproj
├── src-js/
│   ├── .eslintrc.js
│   ├── .nvmrc
│   ├── .prettierrc.js
│   ├── CHANGELOG.md
│   ├── README.md
│   ├── bin/
│   │   └── reading-order-diagnostic.js
│   ├── examples/
│   │   ├── README.md
│   │   ├── browser-iife/
│   │   │   ├── main.html
│   │   │   ├── main.js
│   │   │   ├── package.json
│   │   │   └── test.js
│   │   ├── nodejs-import/
│   │   │   ├── main.js
│   │   │   └── package.json
│   │   ├── nodejs-require/
│   │   │   ├── main.js
│   │   │   └── package.json
│   │   └── nodejs-typescript/
│   │       ├── package.json
│   │       ├── src/
│   │       │   └── main.ts
│   │       └── tsconfig.json
│   ├── jest.config.js
│   ├── package.json
│   ├── rollup.config.mjs
│   ├── src/
│   │   ├── api-models/
│   │   │   ├── base.ts
│   │   │   ├── content.ts
│   │   │   ├── document.ts
│   │   │   ├── expense.ts
│   │   │   ├── form.ts
│   │   │   ├── geometry.ts
│   │   │   ├── id.ts
│   │   │   ├── index.ts
│   │   │   ├── layout.ts
│   │   │   ├── query.ts
│   │   │   ├── response.ts
│   │   │   └── table.ts
│   │   ├── base.ts
│   │   ├── content.ts
│   │   ├── document.ts
│   │   ├── expense.ts
│   │   ├── form.ts
│   │   ├── geometry.ts
│   │   ├── id.ts
│   │   ├── index.ts
│   │   ├── layout.ts
│   │   ├── query.ts
│   │   └── table.ts
│   ├── test/
│   │   ├── data/
│   │   │   ├── analyzeid-test-drivers-license-response.json
│   │   │   ├── analyzeid-test-passport-response.json
│   │   │   ├── expense-missing-geoms-response.json
│   │   │   ├── financial-document-response.json
│   │   │   ├── form1005-response.json
│   │   │   ├── invoice-expense-response.json
│   │   │   ├── paystub-response.json
│   │   │   ├── table-example-response.json
│   │   │   ├── test-failed-response.json
│   │   │   ├── test-inprogress-response.json
│   │   │   ├── test-multicol-response-2.json
│   │   │   ├── test-multicol-response.json
│   │   │   ├── test-query-response.json
│   │   │   ├── test-response.json
│   │   │   └── test-twocol-header-footer-response.json
│   │   ├── integ/
│   │   │   └── aws-sdk.test.ts
│   │   ├── tsconfig.json
│   │   └── unit/
│   │       ├── api-models.test.ts
│   │       ├── base.test.ts
│   │       ├── content.test.ts
│   │       ├── corpus/
│   │       │   ├── header-footer.test.ts
│   │       │   └── reading-order.test.ts
│   │       ├── document.test.ts
│   │       ├── expense.test.ts
│   │       ├── form.test.ts
│   │       ├── geometry.test.ts
│   │       ├── id.test.ts
│   │       ├── index.test.ts
│   │       ├── layout.test.ts
│   │       ├── query.test.ts
│   │       └── table.test.ts
│   ├── tsconfig.browser.json
│   ├── tsconfig.cjs.json
│   ├── tsconfig.es.json
│   ├── tsconfig.json
│   └── tsconfig.types.json
└── src-python/
    ├── .style.yapf
    ├── .yapfignore
    ├── README.md
    ├── a2i/
    │   ├── README.md
    │   ├── __init__.py
    │   ├── a2i-response.json
    │   ├── a2irp.py
    │   └── a2irptest.py
    ├── bin/
    │   └── amazon-textract-pipeline
    ├── extras/
    │   └── dev.txt
    ├── setup.cfg
    ├── setup.py
    ├── tests/
    │   ├── data/
    │   │   ├── 180-degree-roation.json
    │   │   ├── 2023-Q2-table-model-sample.json
    │   │   ├── all_features_with_floating_title_header.json
    │   │   ├── analyzeExpenseResponse-multipage.json
    │   │   ├── bounding_box_issue.json
    │   │   ├── employment-application.json
    │   │   ├── gib.json
    │   │   ├── gib1.json
    │   │   ├── gib_10_degrees.json
    │   │   ├── gib__10_degrees.json
    │   │   ├── gib__15_degrees.json
    │   │   ├── gib__180_degrees.json
    │   │   ├── gib__25_degrees.json
    │   │   ├── gib__270_degrees.json
    │   │   ├── gib__90_degrees.json
    │   │   ├── gib__minus_10_degrees.json
    │   │   ├── gib_multi_page_table_merge.json
    │   │   ├── gib_multi_page_tables.json
    │   │   ├── gib_multi_tables_multi_page_sample.json
    │   │   ├── in-table-footer.json
    │   │   ├── in-table-title.json
    │   │   ├── issue_83.json
    │   │   ├── lending-doc-output.json
    │   │   ├── lending-package-no-signature.json
    │   │   ├── little_women_page_1.json
    │   │   ├── multi-page-forms-samples-2-page.json
    │   │   ├── multi-tables-multi-page-sample.json
    │   │   ├── patient_intake_form_sample.json
    │   │   ├── paystub_with_signature.json
    │   │   ├── queries_sample.json
    │   │   ├── request_for_verification_of_employment.json
    │   │   ├── table-performance-pretty.json
    │   │   ├── tables_with_headers_and_merged_cells.json
    │   │   ├── tables_with_headers_out_of_order_cells.json
    │   │   ├── tables_with_merged_cells_sample1.json
    │   │   ├── tables_with_merged_cells_sample2.json
    │   │   ├── test-trp2-analyzeid_sample_multi_page.json
    │   │   ├── test-trp2_analyzeid_sample1.json
    │   │   ├── test-trp2_analyzeid_sample1_with_OCR.json
    │   │   ├── test-trp2_analyzeid_sample2.json
    │   │   ├── test_table_merged_text.json
    │   │   ├── test_trp2_expense_sample1.json
    │   │   ├── test_trp2_expense_sample2.json
    │   │   ├── test_trp2_expense_sample3.json
    │   │   ├── test_trp2_expense_sample4.json
    │   │   └── textract-new-tables-api.json
    │   ├── test-response.json
    │   ├── test_base_trp2.py
    │   ├── test_merged.py
    │   ├── test_t_tables.py
    │   ├── test_trp.py
    │   ├── test_trp2.py
    │   ├── test_trp2_analyzeid.py
    │   ├── test_trp2_expense.py
    │   └── test_trp2_lending.py
    ├── textract-mapping/
    │   ├── README.md
    │   ├── __init__.py
    │   ├── loan-app-response.json
    │   ├── mapping-response.json
    │   ├── mapping.py
    │   └── mappingtest.py
    └── trp/
        ├── __init__.py
        ├── t_pipeline.py
        ├── t_tables.py
        ├── trp2.py
        ├── trp2_analyzeid.py
        ├── trp2_expense.py
        └── trp2_lending.py
Download .txt
SYMBOL INDEX (1173 symbols across 57 files)

FILE: src-csharp/Program.cs
  class Program (line 9) | class Program {
    method Main (line 12) | static void Main(string[] args) {
    method PrepareDocument (line 51) | static TextractDocument PrepareDocument(TextractTextAnalysisService te...
    method BuildTextractClient (line 59) | static TextractTextAnalysisService BuildTextractClient() {
  class TextractTextAnalysisService (line 70) | public class TextractTextAnalysisService {
    method TextractTextAnalysisService (line 72) | public TextractTextAnalysisService(IAmazonTextract textract) {
    method GetJobResults (line 75) | public GetDocumentAnalysisResponse GetJobResults(string jobId) {
    method IsJobComplete (line 83) | public bool IsJobComplete(string jobId) {
    method StartDocumentAnalysis (line 91) | public async Task<string> StartDocumentAnalysis(string bucketName, str...
    method WaitForJobCompletion (line 105) | public void WaitForJobCompletion(string jobId, int delay = 5000) {
    method Wait (line 111) | private void Wait(int delay = 5000) {

FILE: src-csharp/TextractExtensions.cs
  class Word (line 6) | public class Word {
    method Word (line 7) | public Word(Block block, List<Block> blocks) {
    method ToString (line 21) | public override string ToString() {
  class TextractDocument (line 26) | public class TextractDocument {
    method TextractDocument (line 30) | public TextractDocument(GetDocumentAnalysisResponse response) {
    method ParseDocumentPagesAndBlockMap (line 39) | private void ParseDocumentPagesAndBlockMap() {
    method Parse (line 63) | private void Parse() {
    method GetBlockById (line 70) | public Block GetBlockById(string blockId) {
  class Table (line 83) | public class Table {
    method Table (line 84) | public Table(Block block, List<Block> blocks) {
    method ToString (line 118) | public override string ToString() {
  class SelectionElement (line 128) | public class SelectionElement {
    method SelectionElement (line 129) | public SelectionElement(Block block, List<Block> blocks) {
  class Row (line 142) | public class Row {
    method Row (line 143) | public Row() {
    method ToString (line 148) | public override string ToString() {
  class Page (line 157) | public class Page {
    method Page (line 158) | public Page(List<Block> blocks, List<Block> blockMap) {
    method GetLinesInReadingOrder (line 192) | public List<IndexedText> GetLinesInReadingOrder() {
    method GetTextInReadingOrder (line 221) | public string GetTextInReadingOrder() {
    method ToString (line 240) | public override string ToString() {
    class Column (line 249) | public class Column {
      method ToString (line 253) | public override string ToString() {
    class IndexedText (line 258) | public class IndexedText {
      method ToString (line 262) | public override string ToString() {
  class NewGeometry (line 268) | public class NewGeometry : Geometry {
    method NewGeometry (line 270) | public NewGeometry(Geometry geometry) : base() {
    method ToString (line 284) | public override string ToString() {
  class NewBoundingBox (line 291) | public class NewBoundingBox : BoundingBox {
    method NewBoundingBox (line 292) | public NewBoundingBox(float width, float height, float left, float top...
    method ToString (line 299) | public override string ToString() {
  class Line (line 304) | public class Line {
    method Line (line 305) | public Line(Block block, List<Block> blocks) {
    method ToString (line 332) | public override string ToString() {
  class Form (line 343) | public class Form {
    method Form (line 347) | public Form() {
    method AddField (line 352) | public void AddField(Field field) {
    method GetFieldByKey (line 356) | public Field GetFieldByKey(string key) {
    method SearchFieldsByKey (line 360) | public List<Field> SearchFieldsByKey(string key) {
    method ToString (line 364) | public override string ToString() {
  class FieldValue (line 369) | public class FieldValue {
    method FieldValue (line 370) | public FieldValue(Block block, List<string> children, List<Block> bloc...
    method ToString (line 405) | public override string ToString() {
  class FieldKey (line 410) | public class FieldKey {
    method FieldKey (line 411) | public FieldKey(Block block, List<string> children, List<Block> blocks) {
    method ToString (line 444) | public override string ToString() {
  class Field (line 449) | public class Field {
    method Field (line 450) | public Field(Block block, List<Block> blocks) {
    method ToString (line 477) | public override string ToString() {
  class Cell (line 487) | public class Cell {
    method Cell (line 488) | public Cell(Block block, List<Block> blocks) {
    method ToString (line 532) | public override string ToString() {

FILE: src-js/bin/reading-order-diagnostic.js
  constant IN_FOLDER (line 19) | const IN_FOLDER = "test/data/corpus";
  constant OUT_FOLDER (line 20) | const OUT_FOLDER = "test/data/corpus-readingorder";

FILE: src-js/examples/browser-iife/main.js
  constant CHILD (line 16) | const CHILD = trp.ApiRelationshipType.Child;
  function parseDocResult (line 41) | function parseDocResult(textractJsonStr) {
  function onFileUpload (line 58) | function onFileUpload(fileInput) {

FILE: src-js/examples/browser-iife/test.js
  constant HTML_URI (line 19) | const HTML_URI = `file:///${__dirname}/main.html`;
  constant RESPONSE_JSON_PATH (line 20) | const RESPONSE_JSON_PATH = path.join(__dirname, "..", "..", "test", "dat...

FILE: src-js/examples/nodejs-require/main.js
  function testStaticFiles (line 26) | async function testStaticFiles() {
  function testCallTextract (line 46) | async function testCallTextract() {

FILE: src-js/examples/nodejs-typescript/src/main.ts
  function testStaticFiles (line 33) | async function testStaticFiles() {
  function testCallTextract (line 55) | async function testCallTextract() {

FILE: src-js/src/api-models/base.ts
  type ApiRelationshipType (line 14) | const enum ApiRelationshipType {
  type IRelationshipBase (line 52) | interface IRelationshipBase {
  type ApiAnswerRelationship (line 63) | interface ApiAnswerRelationship extends IRelationshipBase {
  type ApiChildRelationship (line 67) | interface ApiChildRelationship extends IRelationshipBase {
  type ApiComplexFeaturesRelationship (line 71) | interface ApiComplexFeaturesRelationship extends IRelationshipBase {
  type ApiMergedCellRelationship (line 75) | interface ApiMergedCellRelationship extends IRelationshipBase {
  type ApiTableFooterRelationship (line 79) | interface ApiTableFooterRelationship extends IRelationshipBase {
  type ApiTableTitleRelationship (line 83) | interface ApiTableTitleRelationship extends IRelationshipBase {
  type ApiValueRelationship (line 87) | interface ApiValueRelationship extends IRelationshipBase {
  type ApiRelationship (line 96) | type ApiRelationship =
  type ApiBlockType (line 110) | const enum ApiBlockType {
  type ApiLayoutBlockType (line 272) | type ApiLayoutBlockType =
  constant LAYOUT_BLOCK_TYPES (line 289) | const LAYOUT_BLOCK_TYPES = new Set([
  function isLayoutBlockType (line 305) | function isLayoutBlockType(blockType: ApiBlockType): boolean {
  type ApiBlockBase (line 314) | interface ApiBlockBase {

FILE: src-js/src/api-models/content.ts
  type ApiTextType (line 18) | const enum ApiTextType {
  type ApiWordBlock (line 28) | interface ApiWordBlock extends ApiBlockBase {
  type ApiLineBlock (line 48) | interface ApiLineBlock extends ApiBlockBase {
  type ApiSelectionStatus (line 70) | const enum ApiSelectionStatus {
  type ApiSelectionElementBlock (line 80) | interface ApiSelectionElementBlock extends ApiBlockBase {
  type ApiSignatureBlock (line 100) | interface ApiSignatureBlock extends ApiBlockBase {

FILE: src-js/src/api-models/document.ts
  type ApiPageBlock (line 127) | interface ApiPageBlock extends ApiBlockBase {
  type ApiBlock (line 143) | type ApiBlock =

FILE: src-js/src/api-models/expense.ts
  type ApiExpenseComponentDetection (line 11) | interface ApiExpenseComponentDetection {
  type ApiExpenseFieldType (line 20) | interface ApiExpenseFieldType {
  type ApiExpenseField (line 25) | interface ApiExpenseField {
  type ApiExpenseLineItem (line 32) | interface ApiExpenseLineItem {
  type ApiExpenseLineItemGroup (line 36) | interface ApiExpenseLineItemGroup {
  type ApiExpenseDocument (line 41) | interface ApiExpenseDocument {

FILE: src-js/src/api-models/form.ts
  type ApiKeyValueEntityType (line 14) | const enum ApiKeyValueEntityType {
  type ApiKeyBlock (line 31) | interface ApiKeyBlock extends ApiBlockBase {
  type ApiKeyValueSetBlock (line 55) | interface ApiKeyValueSetBlock extends ApiBlockBase {
  type ApiValueBlock (line 82) | interface ApiValueBlock extends ApiBlockBase {

FILE: src-js/src/api-models/geometry.ts
  type ApiBoundingBox (line 10) | interface ApiBoundingBox {
  type ApiPoint (line 34) | interface ApiPoint {
  type ApiGeometry (line 50) | interface ApiGeometry {

FILE: src-js/src/api-models/id.ts
  type ApiIdentityDocumentFieldType (line 7) | interface ApiIdentityDocumentFieldType {
  type ApiIdentityDocumentFieldValueDetection (line 22) | interface ApiIdentityDocumentFieldValueDetection {
  type ApiIdentityDocumentField (line 39) | interface ApiIdentityDocumentField {
  type ApiIdentityDocument (line 44) | interface ApiIdentityDocument {

FILE: src-js/src/api-models/layout.ts
  type ApiLayoutBlockBase (line 12) | interface ApiLayoutBlockBase extends ApiBlockBase {
  type ApiLayoutFigureBlock (line 34) | interface ApiLayoutFigureBlock extends ApiLayoutBlockBase {
  type ApiLayoutFooterBlock (line 45) | interface ApiLayoutFooterBlock extends ApiLayoutBlockBase {
  type ApiLayoutHeaderBlock (line 56) | interface ApiLayoutHeaderBlock extends ApiLayoutBlockBase {
  type ApiLayoutKeyValueBlock (line 63) | interface ApiLayoutKeyValueBlock extends ApiLayoutBlockBase {
  type ApiLayoutListBlock (line 70) | interface ApiLayoutListBlock extends ApiLayoutBlockBase {
  type ApiLayoutPageNumberBlock (line 79) | interface ApiLayoutPageNumberBlock extends ApiLayoutBlockBase {
  type ApiLayoutSectionHeaderBlock (line 86) | interface ApiLayoutSectionHeaderBlock extends ApiLayoutBlockBase {
  type ApiLayoutTableBlock (line 93) | interface ApiLayoutTableBlock extends ApiLayoutBlockBase {
  type ApiLayoutTextBlock (line 100) | interface ApiLayoutTextBlock extends ApiLayoutBlockBase {
  type ApiLayoutTitleBlock (line 107) | interface ApiLayoutTitleBlock extends ApiLayoutBlockBase {
  type ApiLayoutBlock (line 116) | type ApiLayoutBlock =

FILE: src-js/src/api-models/query.ts
  type ApiQueryBlock (line 15) | interface ApiQueryBlock extends ApiBlockBase {
  type ApiQueryResultBlock (line 52) | interface ApiQueryResultBlock extends ApiBlockBase {

FILE: src-js/src/api-models/response.ts
  type ApiJobStatus (line 15) | const enum ApiJobStatus {
  type ApiDocumentMetadata (line 27) | interface ApiDocumentMetadata {
  type ApiAnalyzeExpenseResponse (line 38) | interface ApiAnalyzeExpenseResponse {
  type ApiAnalyzeIdResponse (line 48) | interface ApiAnalyzeIdResponse {
  type ApiResponseWithContent (line 54) | interface ApiResponseWithContent {
  type ApiAnalyzeDocumentResponse (line 64) | interface ApiAnalyzeDocumentResponse extends ApiResponseWithContent {
  type ApiDetectDocumentTextResponse (line 78) | interface ApiDetectDocumentTextResponse extends ApiResponseWithContent {
  type ApiAsyncJobOutputInProgress (line 82) | interface ApiAsyncJobOutputInProgress {
  type ApiAsyncJobOuputInProgress (line 96) | interface ApiAsyncJobOuputInProgress extends ApiAsyncJobOutputInProgress {}
  type ApiAsyncJobOutputStatus (line 101) | interface ApiAsyncJobOutputStatus {
  type ApiResultWarning (line 114) | interface ApiResultWarning {
  type ApiAsyncJobOuputSucceded (line 119) | interface ApiAsyncJobOuputSucceded extends ApiResponseWithContent, ApiAs...
  type ApiAsyncJobOutputPartialSuccess (line 123) | interface ApiAsyncJobOutputPartialSuccess extends ApiResponseWithContent...
  type ApiAsyncJobOutputFailed (line 127) | interface ApiAsyncJobOutputFailed extends ApiAsyncJobOutputStatus {
  type ApiAsyncDocumentAnalysis (line 131) | type ApiAsyncDocumentAnalysis =
  type ApiAsyncDocumentTextDetection (line 139) | type ApiAsyncDocumentTextDetection =
  type ApiResponsePage (line 147) | type ApiResponsePage =
  type ApiResponsePages (line 153) | type ApiResponsePages =

FILE: src-js/src/api-models/table.ts
  type ApiTableEntityType (line 24) | const enum ApiTableEntityType {
  type ApiTableBlock (line 32) | interface ApiTableBlock extends ApiBlockBase {
  type ApiTableCellEntityType (line 57) | const enum ApiTableCellEntityType {
  type ApiCellBlock (line 71) | interface ApiCellBlock extends ApiBlockBase {
  type ApiMergedCellBlock (line 107) | interface ApiMergedCellBlock extends ApiBlockBase {
  type ApiTableFooterBlock (line 143) | interface ApiTableFooterBlock extends ApiBlockBase {
  type ApiTableTitleBlock (line 159) | interface ApiTableTitleBlock extends ApiBlockBase {

FILE: src-js/src/base.ts
  type Constructor (line 17) | type Constructor<T> = new (...args: any[]) => T;
  class ApiObjectWrapper (line 24) | class ApiObjectWrapper<T> {
    method constructor (line 27) | constructor(dict: T) {
    method dict (line 34) | get dict(): T {
  type IApiBlockWrapper (line 42) | interface IApiBlockWrapper<T extends ApiBlock> {
  type IWithText (line 66) | interface IWithText {
  type IRenderOpts (line 81) | interface IRenderOpts {
  function doesFilterAllowBlockType (line 105) | function doesFilterAllowBlockType(
  type IRenderable (line 121) | interface IRenderable extends IWithText {
  class ApiBlockWrapper (line 143) | class ApiBlockWrapper<T extends ApiBlock> extends ApiObjectWrapper<T> im...
    method id (line 144) | get id(): string {
    method blockType (line 148) | get blockType(): ApiBlockType {
    method childBlockIds (line 152) | get childBlockIds(): string[] {
    method relatedBlockIdsByRelType (line 156) | relatedBlockIdsByRelType(relType: ApiRelationshipType | ApiRelationshi...
  class DocumentMetadata (line 180) | class DocumentMetadata extends ApiObjectWrapper<ApiDocumentMetadata> {
    method nPages (line 184) | get nPages(): number {
  type INestedListOpts (line 192) | interface INestedListOpts {
  function getIterable (line 211) | function getIterable<T>(collectionFetcher: () => T[]): Iterable<T> {
  type IEscapeHtmlOpts (line 237) | interface IEscapeHtmlOpts {
  function escapeHtml (line 252) | function escapeHtml(str: string, { forAttr = false }: IEscapeHtmlOpts = ...
  type IIndentOpts (line 269) | interface IIndentOpts {
  function indent (line 301) | function indent(
  type AggregationMethod (line 319) | const enum AggregationMethod {
  function modalAvg (line 332) | function modalAvg(arr: Iterable<number>): number | null {
  function aggregate (line 358) | function aggregate(arr: Iterable<number>, aggMethod: AggregationMethod):...
  function argMax (line 387) | function argMax(arr: number[]): { maxValue: number; maxIndex: number } {
  type ActionOnMissingBlock (line 401) | type ActionOnMissingBlock = "error" | "warn" | null;
  type ActionOnUnexpectedBlockType (line 410) | type ActionOnUnexpectedBlockType = "error" | "warn" | null;
  type IBlockTypeFilterOpts (line 415) | interface IBlockTypeFilterOpts {
  type IMissingBlockOpts (line 437) | interface IMissingBlockOpts {
  function normalizeOptionalSet (line 452) | function normalizeOptionalSet<T>(raw: T | T[] | Set<T> | null | undefine...
  function setIntersection (line 464) | function setIntersection<T>(a: Set<T>, b: Set<T>) {
  function setUnion (line 477) | function setUnion<T>(a: Set<T>, b: Set<T>) {
  type IDocBlocks (line 494) | interface IDocBlocks {
  type IBlockManager (line 512) | interface IBlockManager extends IDocBlocks {
  type IWithParentPage (line 544) | interface IWithParentPage<TPage extends IBlockManager> {
  type IWithRelatedItems (line 556) | interface IWithRelatedItems<TRelated extends IApiBlockWrapper<ApiBlock>> {
  function _implIterRelatedBlocksByRelType (line 599) | function _implIterRelatedBlocksByRelType(
  function _implIterRelatedItemsByRelType (line 675) | function _implIterRelatedItemsByRelType(
  function _implListRelatedBlocksByRelType (line 710) | function _implListRelatedBlocksByRelType(
  function _implListRelatedItemsByRelType (line 775) | function _implListRelatedItemsByRelType(
  type IHostedApiBlockWrapper (line 791) | interface IHostedApiBlockWrapper<TBlock extends ApiBlock, TPage extends ...
  class PageHostedApiBlockWrapper (line 801) | class PageHostedApiBlockWrapper<TBlock extends ApiBlock, TPage extends I...
    method constructor (line 807) | constructor(dict: TBlock, parentPage: TPage) {
    method parentPage (line 813) | get parentPage(): TPage {
    method iterRelatedItemsByRelType (line 817) | iterRelatedItemsByRelType(
    method listRelatedItemsByRelType (line 824) | listRelatedItemsByRelType(

FILE: src-js/src/content.ts
  class Word (line 42) | class Word
    method constructor (line 48) | constructor(block: ApiWordBlock) {
    method confidence (line 56) | get confidence(): number {
    method confidence (line 59) | set confidence(newVal: number) {
    method geometry (line 65) | get geometry(): Geometry<ApiWordBlock, Word> {
    method text (line 71) | get text(): string {
    method textType (line 77) | get textType(): ApiTextType {
    method textType (line 80) | set textType(newVal: ApiTextType) {
    method html (line 87) | html(opts?: IRenderOpts): string {
    method str (line 95) | str(): string {
  type IWithContent (line 110) | interface IWithContent<TContent extends IApiBlockWrapper<ApiBlock> & IRe...
  type IWithContentMixinOptions (line 148) | interface IWithContentMixinOptions {
  function buildWithContent (line 189) | function buildWithContent<TContent extends IApiBlockWrapper<ApiBlock> & ...
  type IWithWordsMixinOptions (line 290) | interface IWithWordsMixinOptions {
  type IWithWords (line 313) | interface IWithWords extends IWithText {
  function WithWords (line 367) | function WithWords<
  class LineGeneric (line 469) | class LineGeneric<TPage extends IBlockManager>
    method constructor (line 475) | constructor(block: ApiLineBlock, parentPage: TPage) {
    method confidence (line 480) | get confidence(): number {
    method confidence (line 483) | set confidence(newVal: number) {
    method geometry (line 489) | get geometry(): Geometry<ApiLineBlock, LineGeneric<TPage>> {
    method text (line 497) | override get text(): string {
    method getText (line 509) | override getText(opts?: IBlockTypeFilterOpts): string {
    method html (line 520) | html(opts?: IRenderOpts): string {
    method str (line 525) | str(): string {
  class SelectionElement (line 537) | class SelectionElement
    method constructor (line 543) | constructor(block: ApiSelectionElementBlock) {
    method confidence (line 551) | get confidence(): number {
    method confidence (line 554) | set confidence(newVal: number) {
    method geometry (line 560) | get geometry(): Geometry<ApiSelectionElementBlock, SelectionElement> {
    method isSelected (line 566) | get isSelected(): boolean {
    method selectionStatus (line 574) | get selectionStatus(): ApiSelectionStatus {
    method selectionStatus (line 577) | set selectionStatus(newVal: ApiSelectionStatus) {
    method html (line 586) | html(opts?: IRenderOpts): string {
    method str (line 596) | str(): string {
    method text (line 603) | get text(): string {
  class Signature (line 613) | class Signature
    method constructor (line 619) | constructor(block: ApiSignatureBlock) {
    method confidence (line 627) | get confidence(): number {
    method confidence (line 630) | set confidence(newVal: number) {
    method geometry (line 636) | get geometry(): Geometry<ApiSignatureBlock, Signature> {
    method html (line 645) | html(opts?: IRenderOpts): string {
    method str (line 659) | str(): string {
    method text (line 666) | get text(): "" {

FILE: src-js/src/document.ts
  type ReadingOrderLayoutMode (line 93) | const enum ReadingOrderLayoutMode {
  type HeuristicReadingOrderModelParams (line 111) | interface HeuristicReadingOrderModelParams {
  type HeaderFooterSegmentModelParams (line 156) | interface HeaderFooterSegmentModelParams {
  class Page (line 183) | class Page
    method constructor (line 223) | constructor(pageBlock: ApiPageBlock, blocks: ApiBlock[], parentDocumen...
    method getBlockById (line 273) | getBlockById(blockId: string): ApiBlock | undefined {
    method getItemByBlockId (line 277) | getItemByBlockId(
    method getModalWordOrientationDegrees (line 316) | getModalWordOrientationDegrees(): number | null {
    method _getLineClustersByColumn (line 333) | _getLineClustersByColumn({
    method getLineClustersInReadingOrder (line 536) | getLineClustersInReadingOrder({
    method getTextInReadingOrder (line 584) | getTextInReadingOrder({
    method _groupLinesByVerticalGaps (line 622) | _groupLinesByVerticalGaps(
    method _getHeaderOrFooterLines (line 771) | _getHeaderOrFooterLines(
    method getFooterLines (line 849) | getFooterLines(
    method getHeaderLines (line 869) | getHeaderLines(
    method getLinesByLayoutArea (line 891) | getLinesByLayoutArea(
    method iterLines (line 952) | iterLines(): Iterable<LineGeneric<Page>> {
    method iterRelatedItemsByRelType (line 956) | iterRelatedItemsByRelType(
    method iterSignatures (line 973) | iterSignatures(): Iterable<Signature> {
    method iterTables (line 989) | iterTables(): Iterable<TableGeneric<Page>> {
    method lineAtIndex (line 1002) | lineAtIndex(ix: number): LineGeneric<Page> {
    method listBlocks (line 1014) | listBlocks(): ApiBlock[] {
    method listLines (line 1025) | listLines(): LineGeneric<Page>[] {
    method listRelatedItemsByRelType (line 1029) | listRelatedItemsByRelType(
    method listSignatures (line 1043) | listSignatures(): Signature[] {
    method listTables (line 1056) | listTables(): TableGeneric<Page>[] {
    method registerParsedItem (line 1060) | registerParsedItem(
    method tableAtIndex (line 1086) | tableAtIndex(ix: number): TableGeneric<Page> {
    method form (line 1098) | get form(): FormGeneric<Page> {
    method geometry (line 1107) | get geometry(): Geometry<ApiPageBlock, Page> {
    method hasLayout (line 1115) | get hasLayout(): boolean {
    method layout (line 1123) | get layout(): LayoutGeneric<Page> {
    method nLines (line 1129) | get nLines(): number {
    method nSignatures (line 1135) | get nSignatures(): number {
    method nTables (line 1141) | get nTables(): number {
    method pageNumber (line 1148) | get pageNumber(): number {
    method parentDocument (line 1159) | get parentDocument(): TextractDocument {
    method queries (line 1168) | get queries(): QueryInstanceCollectionGeneric<Page> {
    method text (line 1176) | get text(): string {
    method html (line 1198) | html(opts?: IRenderOpts): string {
    method str (line 1217) | str(): string {
  class Line (line 1231) | class Line extends LineGeneric<Page> {}
  class Field (line 1241) | class Field extends FieldGeneric<Page> {}
  class FieldKey (line 1250) | class FieldKey extends FieldKeyGeneric<Page> {}
  class FieldValue (line 1259) | class FieldValue extends FieldValueGeneric<Page> {}
  class Form (line 1267) | class Form extends FormGeneric<Page> {}
  class LayoutFigure (line 1278) | class LayoutFigure extends LayoutFigureGeneric<Page> {}
  class LayoutFooter (line 1288) | class LayoutFooter extends LayoutFooterGeneric<Page> {}
  class LayoutHeader (line 1298) | class LayoutHeader extends LayoutHeaderGeneric<Page> {}
  class LayoutKeyValue (line 1308) | class LayoutKeyValue extends LayoutKeyValueGeneric<Page> {}
  class LayoutPageNumber (line 1317) | class LayoutPageNumber extends LayoutPageNumberGeneric<Page> {}
  class LayoutSectionHeader (line 1326) | class LayoutSectionHeader extends LayoutSectionHeaderGeneric<Page> {}
  class LayoutTable (line 1336) | class LayoutTable extends LayoutTableGeneric<Page> {}
  class LayoutText (line 1345) | class LayoutText extends LayoutTextGeneric<Page> {}
  class LayoutTitle (line 1354) | class LayoutTitle extends LayoutTitleGeneric<Page> {}
  class LayoutList (line 1363) | class LayoutList extends LayoutListGeneric<Page> {}
  class Layout (line 1373) | class Layout extends LayoutGeneric<Page> {}
  class QueryInstance (line 1384) | class QueryInstance extends QueryInstanceGeneric<Page> {}
  class QueryInstanceCollection (line 1392) | class QueryInstanceCollection extends QueryInstanceCollectionGeneric<Pag...
  class QueryResult (line 1401) | class QueryResult extends QueryResultGeneric<Page> {}
  class Cell (line 1412) | class Cell extends CellGeneric<Page> {}
  class MergedCell (line 1421) | class MergedCell extends MergedCellGeneric<Page> {}
  class Row (line 1431) | class Row extends RowGeneric<Page> {}
  class Table (line 1440) | class Table extends TableGeneric<Page> {}
  class TableFooter (line 1449) | class TableFooter extends TableFooterGeneric<Page> {}
  class TableTitle (line 1458) | class TableTitle extends TableTitleGeneric<Page> {}
  class TextractDocument (line 1463) | class TextractDocument
    method constructor (line 1476) | constructor(textractResults: ApiResponsePage | ApiResponsePages) {
    method _parse (line 1498) | _parse(): void {
    method _consolidateMultipleResponses (line 1531) | static _consolidateMultipleResponses(
    method form (line 1641) | get form(): FormsComposite {
    method nPages (line 1648) | get nPages(): number {
    method text (line 1657) | get text(): string {
    method getBlockById (line 1661) | getBlockById(blockId: string): ApiBlock | undefined {
    method getItemByBlockId (line 1674) | getItemByBlockId(
    method iterPages (line 1697) | iterPages(): Iterable<Page> {
    method listBlocks (line 1706) | listBlocks(): ApiBlock[] {
    method listPages (line 1715) | listPages(): Page[] {
    method pageNumber (line 1724) | pageNumber(pageNum: number): Page {
    method html (line 1749) | html(opts?: IRenderOpts): string {
    method str (line 1759) | str(): string {
  class FormsComposite (line 1764) | class FormsComposite extends FormsCompositeGeneric<Page, TextractDocumen...

FILE: src-js/src/expense.ts
  class ExpenseComponentDetection (line 18) | class ExpenseComponentDetection extends ApiObjectWrapper<ApiExpenseCompo...
    method constructor (line 22) | constructor(dict: ApiExpenseComponentDetection, parentField: ExpenseFi...
    method confidence (line 28) | get confidence(): number {
    method confidence (line 31) | set confidence(newVal: number) {
    method geometry (line 37) | get geometry(): undefined | Geometry<ApiExpenseComponentDetection, Exp...
    method parentField (line 40) | get parentField(): ExpenseField {
    method text (line 43) | get text(): string {
    method text (line 46) | set text(newVal: string) {
  class ExpenseFieldType (line 51) | class ExpenseFieldType extends ApiObjectWrapper<ApiExpenseFieldType> {
    method constructor (line 54) | constructor(dict: ApiExpenseFieldType, parentField: ExpenseField) {
    method confidence (line 59) | get confidence(): number {
    method confidence (line 62) | set confidence(newVal: number) {
    method parentField (line 65) | get parentField(): ExpenseField {
    method text (line 68) | get text(): string {
    method text (line 71) | set text(newVal: string) {
  class ExpenseField (line 76) | class ExpenseField extends ApiObjectWrapper<ApiExpenseField> {
    method constructor (line 82) | constructor(dict: ApiExpenseField, parent: ExpenseDocument | ExpenseLi...
    method fieldType (line 90) | get fieldType(): ExpenseFieldType {
    method label (line 93) | get label(): ExpenseComponentDetection | null {
    method pageNumber (line 96) | get pageNumber(): number {
    method parent (line 99) | get parent(): ExpenseDocument | ExpenseLineItem {
    method value (line 102) | get value(): ExpenseComponentDetection {
  class ExpenseLineItem (line 107) | class ExpenseLineItem extends ApiObjectWrapper<ApiExpenseLineItem> {
    method constructor (line 111) | constructor(dict: ApiExpenseLineItem, parentGroup: ExpenseLineItemGrou...
    method nFields (line 117) | get nFields(): number {
    method parentGroup (line 120) | get parentGroup(): ExpenseLineItemGroup {
    method iterFields (line 135) | iterFields(): Iterable<ExpenseField> {
    method listFields (line 139) | listFields(): ExpenseField[] {
    method getFieldByType (line 143) | getFieldByType(fieldType: string): ExpenseField | null {
    method searchFieldsByType (line 148) | searchFieldsByType(fieldType: string): ExpenseField[] {
  class ExpenseLineItemGroup (line 153) | class ExpenseLineItemGroup extends ApiObjectWrapper<ApiExpenseLineItemGr...
    method constructor (line 157) | constructor(dict: ApiExpenseLineItemGroup, parentDoc: ExpenseDocument) {
    method index (line 165) | get index(): number {
    method nLineItems (line 168) | get nLineItems(): number {
    method parentDoc (line 171) | get parentDoc(): ExpenseDocument {
    method iterLineItems (line 186) | iterLineItems(): Iterable<ExpenseLineItem> {
    method listLineItems (line 190) | listLineItems(): ExpenseLineItem[] {
  class ExpenseDocument (line 200) | class ExpenseDocument extends ApiObjectWrapper<ApiExpenseDocument> {
    method constructor (line 205) | constructor(dict: ApiExpenseDocument, parentExpense: TextractExpense |...
    method index (line 215) | get index(): number {
    method nLineItemGroups (line 218) | get nLineItemGroups(): number {
    method nSummaryFields (line 221) | get nSummaryFields(): number {
    method parentExpense (line 224) | get parentExpense(): TextractExpense | null {
    method getSummaryFieldByType (line 228) | getSummaryFieldByType(fieldType: string): ExpenseField | null {
    method searchSummaryFieldsByType (line 233) | searchSummaryFieldsByType(fieldType: string): ExpenseField[] {
    method iterLineItemGroups (line 248) | iterLineItemGroups(): Iterable<ExpenseLineItemGroup> {
    method iterSummaryFields (line 263) | iterSummaryFields(): Iterable<ExpenseField> {
    method listLineItemGroups (line 267) | listLineItemGroups(): ExpenseLineItemGroup[] {
    method listSummaryFields (line 271) | listSummaryFields(): ExpenseField[] {
  class TextractExpense (line 282) | class TextractExpense extends ApiObjectWrapper<ApiAnalyzeExpenseResponse> {
    method constructor (line 290) | constructor(textractResult: ApiAnalyzeExpenseResponse) {
    method metadata (line 303) | get metadata(): DocumentMetadata {
    method nDocs (line 306) | get nDocs(): number {
    method iterDocs (line 321) | iterDocs(): Iterable<ExpenseDocument> {
    method listDocs (line 325) | listDocs(): ExpenseDocument[] {

FILE: src-js/src/form.ts
  class FieldKeyGeneric (line 32) | class FieldKeyGeneric<TPage extends IBlockManager>
    method constructor (line 39) | constructor(block: ApiKeyBlock | ApiKeyValueSetBlock, parentField: Fie...
    method geometry (line 45) | get geometry(): Geometry<ApiKeyBlock | ApiKeyValueSetBlock, FieldKeyGe...
    method parentField (line 48) | get parentField(): FieldGeneric<TPage> {
    method confidence (line 58) | get confidence(): number {
    method getOcrConfidence (line 71) | getOcrConfidence(aggMethod: AggregationMethod = AggregationMethod.Mean...
    method html (line 80) | html({ includeBlockTypes = null, skipBlockTypes = null }: IRenderOpts ...
    method str (line 92) | str(): string {
  class FieldValueGeneric (line 102) | class FieldValueGeneric<TPage extends IBlockManager>
    method constructor (line 115) | constructor(valueBlock: ApiKeyValueSetBlock | ApiValueBlock, parentFie...
    method confidence (line 127) | get confidence(): number {
    method geometry (line 130) | get geometry(): Geometry<ApiKeyValueSetBlock | ApiValueBlock, FieldVal...
    method isCheckbox (line 139) | get isCheckbox(): boolean {
    method isSelected (line 148) | get isSelected(): boolean | null {
    method parentField (line 154) | get parentField(): FieldGeneric<TPage> {
    method selectionStatus (line 163) | get selectionStatus(): ApiSelectionStatus | null {
    method getOcrConfidence (line 179) | getOcrConfidence(aggMethod: AggregationMethod = AggregationMethod.Mean...
    method html (line 188) | html({ includeBlockTypes = null, skipBlockTypes = null }: IRenderOpts ...
    method str (line 200) | str(): string {
  class FieldGeneric (line 213) | class FieldGeneric<TPage extends IBlockManager>
    method constructor (line 220) | constructor(keyBlock: ApiKeyBlock | ApiKeyValueSetBlock, parentForm: F...
    method blockType (line 252) | get blockType(): ApiBlockType {
    method confidence (line 264) | get confidence(): number {
    method childBlockIds (line 278) | get childBlockIds(): string[] {
    method dict (line 282) | get dict(): ApiKeyBlock | ApiKeyValueSetBlock {
    method id (line 286) | get id(): string {
    method isSelected (line 296) | get isSelected(): boolean | null {
    method isCheckbox (line 305) | get isCheckbox(): boolean {
    method key (line 308) | get key(): FieldKeyGeneric<TPage> {
    method parentForm (line 311) | get parentForm(): FormGeneric<TPage> {
    method parentPage (line 314) | get parentPage(): TPage {
    method selectionStatus (line 323) | get selectionStatus(): ApiSelectionStatus | null {
    method text (line 326) | get text(): string {
    method value (line 329) | get value(): FieldValueGeneric<TPage> | null {
    method getOcrConfidence (line 343) | getOcrConfidence(aggMethod: AggregationMethod = AggregationMethod.Mean...
    method relatedBlockIdsByRelType (line 351) | relatedBlockIdsByRelType(relType: ApiRelationshipType | ApiRelationshi...
    method html (line 361) | html(opts?: IRenderOpts): string {
    method str (line 379) | str(): string {
  class FormGeneric (line 392) | class FormGeneric<TPage extends IBlockManager> implements IRenderable {
    method constructor (line 397) | constructor(keyBlocks: Array<ApiKeyBlock | ApiKeyValueSetBlock>, paren...
    method nFields (line 418) | get nFields(): number {
    method parentPage (line 421) | get parentPage(): TPage {
    method text (line 424) | get text(): string {
    method getFieldByKey (line 430) | getFieldByKey(key: string): FieldGeneric<TPage> | null {
    method html (line 439) | html(opts: IRenderOpts = {}): string {
    method iterFields (line 456) | iterFields(skipFieldsWithoutKey = false): Iterable<FieldGeneric<TPage>> {
    method listFields (line 464) | listFields(skipFieldsWithoutKey = false): FieldGeneric<TPage>[] {
    method searchFieldsByKey (line 472) | searchFieldsByKey(key: string): FieldGeneric<TPage>[] {
    method str (line 477) | str(): string {
  class FormsCompositeGeneric (line 491) | class FormsCompositeGeneric<TPage extends IBlockManager, TDocument exten...
    method constructor (line 497) | constructor(forms: FormGeneric<TPage>[], parentDocument: TDocument) {
    method nFields (line 502) | get nFields(): number {
    method parentDocument (line 505) | get parentDocument(): TDocument {
    method text (line 508) | get text(): string {
    method getFieldByKey (line 512) | getFieldByKey(key: string): FieldGeneric<TPage> | null {
    method html (line 525) | html(): string {
    method iterFields (line 548) | iterFields(skipFieldsWithoutKey = false): Iterable<FieldGeneric<TPage>> {
    method listFields (line 556) | listFields(skipFieldsWithoutKey = false): FieldGeneric<TPage>[] {
    method searchFieldsByKey (line 569) | searchFieldsByKey(key: string): FieldGeneric<TPage>[] {
    method str (line 573) | str(): string {
  type IWithForm (line 581) | interface IWithForm<TPage extends IBlockManager> {
  type IWithFormsComposite (line 591) | interface IWithFormsComposite<TPage extends IBlockManager, TDocument ext...

FILE: src-js/src/geometry.ts
  class BoundingBox (line 11) | class BoundingBox<
    method constructor (line 17) | constructor(dict: ApiBoundingBox, parentGeometry: Geometry<TParentBloc...
    method bottom (line 25) | get bottom(): number {
    method hCenter (line 31) | get hCenter(): number {
    method height (line 37) | get height(): number {
    method left (line 43) | get left(): number {
    method parentGeometry (line 49) | get parentGeometry(): Geometry<TParentBlock, TParent> | null {
    method top (line 55) | get top(): number {
    method right (line 61) | get right(): number {
    method vCenter (line 67) | get vCenter(): number {
    method width (line 73) | get width(): number {
    method union (line 81) | union<T>(other: BoundingBox<T, ApiObjectWrapper<T>>): BoundingBox<T, A...
    method intersection (line 101) | intersection<T>(other: BoundingBox<T, ApiObjectWrapper<T>>): BoundingB...
    method str (line 126) | str(): string {
  class Point (line 134) | class Point<
    method constructor (line 140) | constructor(dict: ApiPoint, parentGeometry: Geometry<TParentBlock, TPa...
    method parentGeometry (line 148) | get parentGeometry(): Geometry<TParentBlock, TParent> | null {
    method x (line 154) | get x(): number {
    method y (line 160) | get y(): number {
    method str (line 167) | str(): string {
  class Geometry (line 175) | class Geometry<
    method constructor (line 183) | constructor(dict: ApiGeometry, parentObject: TParent | null) {
    method boundingBox (line 193) | get boundingBox(): BoundingBox<TParentBlock, TParent> {
    method parentObject (line 199) | get parentObject(): TParent | null {
    method polygon (line 208) | get polygon(): Point<TParentBlock, TParent>[] {
    method orientationRadians (line 218) | orientationRadians(): number | null {
    method orientationDegrees (line 229) | orientationDegrees(): number | null {
    method str (line 238) | str(): string {
  type IWithGeometry (line 246) | interface IWithGeometry<TParentBlock, TParent extends ApiObjectWrapper<T...

FILE: src-js/src/id.ts
  type IdFieldType (line 17) | enum IdFieldType {
  type IdFieldValueType (line 49) | const enum IdFieldValueType {
  type IdDocumentType (line 57) | enum IdDocumentType {
  class IdDocumentField (line 63) | class IdDocumentField extends ApiObjectWrapper<ApiIdentityDocumentField> {
    method constructor (line 66) | constructor(dict: ApiIdentityDocumentField, parentDocument: IdDocument...
    method isValueNormalized (line 71) | get isValueNormalized(): boolean {
    method fieldTypeRaw (line 77) | get fieldTypeRaw(): string | undefined {
    method fieldType (line 83) | get fieldType(): IdFieldType {
    method parentDocument (line 96) | get parentDocument(): IdDocument | undefined {
    method value (line 102) | get value(): string {
    method valueConfidence (line 106) | get valueConfidence(): number {
    method valueRaw (line 112) | get valueRaw(): string | undefined {
    method valueType (line 118) | get valueType(): IdFieldValueType {
    method str (line 131) | str(): string {
  class IdDocument (line 141) | class IdDocument extends ApiObjectWrapper<ApiIdentityDocument> {
    method constructor (line 146) | constructor(dict: ApiIdentityDocument, parentResult: TextractIdentity ...
    method index (line 157) | get index(): number {
    method idType (line 164) | get idType(): IdDocumentType {
    method nFields (line 172) | get nFields(): number {
    method parentCollection (line 178) | get parentCollection(): TextractIdentity | undefined {
    method getFieldByType (line 182) | getFieldByType(fieldType: IdFieldType): IdDocumentField | undefined {
    method iterFields (line 196) | iterFields(): Iterable<IdDocumentField> {
    method listFields (line 203) | listFields(): IdDocumentField[] {
    method str (line 210) | str(): string {
  class TextractIdentity (line 222) | class TextractIdentity extends ApiObjectWrapper<ApiAnalyzeIdResponse> {
    method constructor (line 229) | constructor(dict: ApiAnalyzeIdResponse) {
    method modelVersion (line 234) | get modelVersion(): string {
    method nDocuments (line 241) | get nDocuments(): number {
    method nPages (line 245) | get nPages(): number {
    method getDocAtIndex (line 253) | getDocAtIndex(index: number): IdDocument {
    method iterDocuments (line 270) | iterDocuments(): Iterable<IdDocument> {
    method listDocuments (line 277) | listDocuments(): IdDocument[] {
    method str (line 284) | str(): string {

FILE: src-js/src/layout.ts
  type ILayoutItem (line 50) | interface ILayoutItem<
  class LayoutItemBaseGeneric (line 150) | class LayoutItemBaseGeneric<
    method constructor (line 164) | constructor(block: TBlock, parentLayout: LayoutGeneric<TPage>) {
    method confidence (line 170) | get confidence(): number {
    method geometry (line 173) | get geometry(): Geometry<ApiLayoutBlock, LayoutItemBaseGeneric<TBlock,...
    method nLayoutChildrenDirect (line 176) | get nLayoutChildrenDirect(): number {
    method nLayoutChildrenTotal (line 179) | get nLayoutChildrenTotal(): number {
    method parentLayout (line 182) | get parentLayout(): LayoutGeneric<TPage> {
    method iterLayoutChildren (line 185) | iterLayoutChildren(
    method listLayoutChildren (line 197) | listLayoutChildren({
  class LayoutLineContainerItem (line 257) | class LayoutLineContainerItem<
    method getText (line 267) | override getText(opts?: IBlockTypeFilterOpts) {
    method iterTextLines (line 289) | iterTextLines(): Iterable<LineGeneric<TPage>> {
    method listTextLines (line 299) | listTextLines(): Array<LineGeneric<TPage>> {
    method nTextLines (line 310) | get nTextLines(): number {
  class LayoutFigureGeneric (line 320) | class LayoutFigureGeneric<
    method html (line 341) | html(opts?: IBlockTypeFilterOpts): string {
    method str (line 351) | str(): string {
  class LayoutFooterGeneric (line 361) | class LayoutFooterGeneric<
    method html (line 382) | html(opts?: IBlockTypeFilterOpts): string {
    method str (line 389) | str(): string {
  class LayoutHeaderGeneric (line 399) | class LayoutHeaderGeneric<
    method html (line 420) | html(opts?: IBlockTypeFilterOpts): string {
    method str (line 427) | str(): string {
  class LayoutKeyValueGeneric (line 437) | class LayoutKeyValueGeneric<
    method _listContentIdsInFormField (line 462) | protected _listContentIdsInFormField<TPage extends IBlockManager>(fiel...
    method _mapPageContentToFormFields (line 477) | protected _mapPageContentToFormFields(): { [blockId: string]: FieldGen...
    method iterFields (line 502) | iterFields(): Iterable<FieldGeneric<TPage>> {
    method listFields (line 513) | listFields(): FieldGeneric<TPage>[] {
    method html (line 543) | html(opts?: IBlockTypeFilterOpts): string {
    method str (line 575) | str(): string {
  class LayoutPageNumberGeneric (line 585) | class LayoutPageNumberGeneric<
    method html (line 604) | html(opts?: IBlockTypeFilterOpts): string {
    method str (line 611) | str(): string {
  class LayoutSectionHeaderGeneric (line 621) | class LayoutSectionHeaderGeneric<
    method html (line 640) | html(opts?: IBlockTypeFilterOpts): string {
    method str (line 647) | str(): string {
  class LayoutTableGeneric (line 657) | class LayoutTableGeneric<
    method _listContentIdsInTable (line 682) | protected _listContentIdsInTable<TPage extends IBlockManager>(table: T...
    method _mapPageContentToTables (line 728) | protected _mapPageContentToTables(): { [blockId: string]: TableGeneric...
    method iterTables (line 753) | iterTables(): Iterable<TableGeneric<TPage>> {
    method listTables (line 764) | listTables(): TableGeneric<TPage>[] {
    method html (line 793) | html(opts?: IBlockTypeFilterOpts): string {
    method str (line 825) | str(): string {
  class LayoutTextGeneric (line 837) | class LayoutTextGeneric<
    method html (line 856) | html(opts?: IBlockTypeFilterOpts): string {
    method str (line 866) | str(): string {
  class LayoutTitleGeneric (line 876) | class LayoutTitleGeneric<
    method html (line 895) | html(opts?: IBlockTypeFilterOpts): string {
    method str (line 902) | str(): string {
  class LayoutListGeneric (line 912) | class LayoutListGeneric<
    method html (line 950) | html(opts?: IBlockTypeFilterOpts): string {
    method iterTextLines (line 971) | iterTextLines(): Iterable<LineGeneric<TPage>> {
    method listTextLines (line 978) | listTextLines(): Array<LineGeneric<TPage>> {
    method str (line 1017) | str(): string {
    method nTextLines (line 1024) | get nTextLines(): number {
    method text (line 1030) | override get text(): string {
  type LayoutItemGeneric (line 1040) | type LayoutItemGeneric<
  class LayoutGeneric (line 1063) | class LayoutGeneric<
    method constructor (line 1074) | constructor(parentPage: TPage) {
    method nItems (line 1124) | get nItems(): number {
    method nItemsDirect (line 1133) | get nItemsDirect(): number {
    method nItemsTotal (line 1142) | get nItemsTotal(): number {
    method parentPage (line 1148) | get parentPage(): TPage {
    method text (line 1154) | get text(): string {
    method html (line 1166) | html(opts?: IRenderOpts): string {
    method iterItems (line 1176) | iterItems({
    method listItems (line 1190) | listItems({
    method str (line 1236) | str(): string {

FILE: src-js/src/query.ts
  class QueryResultGeneric (line 28) | class QueryResultGeneric<TPage extends IBlockManager>
    method constructor (line 35) | constructor(block: ApiQueryResultBlock, parentQuery: QueryInstanceGene...
    method confidence (line 41) | get confidence(): number {
    method geometry (line 47) | get geometry(): undefined | Geometry<ApiQueryResultBlock, QueryResultG...
    method parentQuery (line 50) | get parentQuery(): QueryInstanceGeneric<TPage> {
    method text (line 53) | get text(): string {
    method html (line 60) | html(opts?: IRenderOpts): string {
    method str (line 65) | str(): string {
  class QueryInstanceGeneric (line 75) | class QueryInstanceGeneric<TPage extends IBlockManager>
    method constructor (line 79) | constructor(block: ApiQueryBlock, parentPage: TPage) {
    method alias (line 104) | get alias(): string | undefined {
    method nResults (line 107) | get nResults(): number {
    method text (line 114) | get text(): string {
    method topResult (line 121) | get topResult(): QueryResultGeneric<TPage> | undefined {
    method _listResults (line 138) | protected _listResults(): QueryResultGeneric<TPage>[] {
    method listResultsByConfidence (line 153) | listResultsByConfidence(): QueryResultGeneric<TPage>[] {
    method html (line 168) | html(opts?: IRenderOpts): string {
    method str (line 187) | str(): string {
  type IFilterQueryOpts (line 197) | interface IFilterQueryOpts {
  class QueryInstanceCollectionGeneric (line 209) | class QueryInstanceCollectionGeneric<TPage extends IBlockManager> implem...
    method constructor (line 213) | constructor(queryBlocks: ApiQueryBlock[], parentPage: TPage) {
    method nQueries (line 222) | get nQueries(): number {
    method parentPage (line 225) | get parentPage(): TPage {
    method text (line 232) | get text(): string {
    method getQueryByAlias (line 249) | getQueryByAlias(alias: string): QueryInstanceGeneric<TPage> | undefined {
    method getQueryByQuestion (line 259) | getQueryByQuestion(question: string): QueryInstanceGeneric<TPage> | un...
    method html (line 267) | html(opts?: IRenderOpts): string {
    method iterQueries (line 289) | iterQueries(opts: IFilterQueryOpts = {}): Iterable<QueryInstanceGeneri...
    method listQueries (line 297) | listQueries(opts: IFilterQueryOpts = {}): QueryInstanceGeneric<TPage>[] {
    method searchQueriesByAlias (line 306) | searchQueriesByAlias(alias: string, opts: IFilterQueryOpts = {}): Quer...
    method searchQueriesByQuestion (line 317) | searchQueriesByQuestion(question: string, opts: IFilterQueryOpts = {})...
    method str (line 326) | str(): string {

FILE: src-js/src/table.ts
  class CellBaseGeneric (line 42) | class CellBaseGeneric<TBlock extends ApiCellBlock | ApiMergedCellBlock, ...
    method constructor (line 49) | constructor(block: TBlock, parentTable: TableGeneric<TPage>) {
    method geometry (line 58) | get geometry(): Geometry<TBlock, CellBaseGeneric<TBlock, TPage>> {
    method parentTable (line 64) | get parentTable(): TableGeneric<TPage> {
  type ICellBaseProps (line 72) | interface ICellBaseProps {
  function WithCellBaseProps (line 122) | function WithCellBaseProps<
  class CellGeneric (line 235) | class CellGeneric<TPage extends IBlockManager>
    method constructor (line 242) | constructor(block: ApiCellBlock, parentTable: TableGeneric<TPage>) {
  class MergedCellGeneric (line 258) | class MergedCellGeneric<TPage extends IBlockManager> extends WithCellBas...
    method constructor (line 261) | constructor(block: ApiMergedCellBlock, parentTable: TableGeneric<TPage...
    method nSubCells (line 269) | get nSubCells(): number {
    method listSubCells (line 278) | listSubCells(): CellGeneric<TPage>[] {
    method iterContent (line 282) | override iterContent(opts: IBlockTypeFilterOpts = {}): Iterable<Select...
    method listContent (line 314) | override listContent(opts: IBlockTypeFilterOpts = {}): Array<Selection...
    method iterSubCells (line 330) | iterSubCells(): Iterable<CellGeneric<TPage>> {
  class RowGeneric (line 340) | class RowGeneric<TPage extends IBlockManager> {
    method constructor (line 344) | constructor(
    method nCells (line 355) | get nCells(): number {
    method parentTable (line 361) | get parentTable(): TableGeneric<TPage> {
    method text (line 367) | get text(): string {
    method getConfidence (line 380) | getConfidence(aggMethod: AggregationMethod = AggregationMethod.Mean): ...
    method getOcrConfidence (line 396) | getOcrConfidence(aggMethod: AggregationMethod = AggregationMethod.Mean...
    method iterCells (line 414) | iterCells(): Iterable<CellGeneric<TPage> | MergedCellGeneric<TPage>> {
    method listCells (line 421) | listCells(): Array<CellGeneric<TPage> | MergedCellGeneric<TPage>> {
    method str (line 428) | str(): string {
  type IGetCellOptions (line 436) | interface IGetCellOptions {
  type IGetRowOptions (line 446) | interface IGetRowOptions {
  class TableFooterGeneric (line 463) | class TableFooterGeneric<TPage extends IBlockManager>
    method confidence (line 470) | get confidence(): number {
    method html (line 480) | html({ includeBlockTypes = null, skipBlockTypes = null }: IRenderOpts ...
    method str (line 485) | str(): string {
  class TableTitleGeneric (line 495) | class TableTitleGeneric<TPage extends IBlockManager>
    method confidence (line 502) | get confidence(): number {
    method html (line 512) | html({ includeBlockTypes = null, skipBlockTypes = null }: IRenderOpts ...
    method str (line 517) | str(): string {
  class TableGeneric (line 527) | class TableGeneric<TPage extends IBlockManager> extends PageHostedApiBlo...
    method constructor (line 538) | constructor(block: ApiTableBlock, parentPage: TPage) {
    method _sortCellsByLocation (line 601) | _sortCellsByLocation<T extends CellGeneric<TPage> | MergedCellGeneric<...
    method _updateCellsById (line 608) | _updateCellsById(): void {
    method _getSplitCellByBlockId (line 625) | _getSplitCellByBlockId(id: string): CellGeneric<TPage> {
    method cellAt (line 647) | cellAt(
    method cellsAt (line 677) | cellsAt(
    method getOcrConfidence (line 720) | getOcrConfidence(aggMethod: AggregationMethod = AggregationMethod.Mean...
    method iterFooters (line 730) | iterFooters(): Iterable<TableFooterGeneric<TPage>> {
    method iterRows (line 750) | iterRows(opts: IGetRowOptions = {}): Iterable<RowGeneric<TPage>> {
    method iterTitles (line 777) | iterTitles(): Iterable<TableTitleGeneric<TPage>> {
    method listFooters (line 784) | listFooters(): TableFooterGeneric<TPage>[] {
    method listRows (line 795) | listRows(opts: IGetRowOptions = {}): RowGeneric<TPage>[] {
    method listTitles (line 802) | listTitles(): TableTitleGeneric<TPage>[] {
    method rowAt (line 814) | rowAt(rowIndex: number, opts: IGetRowOptions = {}): RowGeneric<TPage> {
    method confidence (line 829) | get confidence(): number {
    method confidence (line 832) | set confidence(newVal: number) {
    method firstFooter (line 841) | get firstFooter(): TableFooterGeneric<TPage> | undefined {
    method firstTitle (line 851) | get firstTitle(): TableTitleGeneric<TPage> | undefined {
    method geometry (line 858) | get geometry(): Geometry<ApiTableBlock, TableGeneric<TPage>> {
    method nCells (line 866) | get nCells(): number {
    method nColumns (line 874) | get nColumns(): number {
    method nRows (line 880) | get nRows(): number {
    method tableType (line 894) | get tableType(): ApiTableEntityType | null {
    method text (line 913) | get text(): string {
    method html (line 926) | html(opts?: IRenderOpts): string {
    method str (line 990) | str(): string {
  type IWithTables (line 1003) | interface IWithTables<TPage extends IBlockManager> {

FILE: src-js/test/unit/base.test.ts
  constant PRECISION_DPS (line 26) | const PRECISION_DPS = 10;

FILE: src-js/test/unit/content.test.ts
  constant EXAMPLE_WORD_BLOCK (line 21) | const EXAMPLE_WORD_BLOCK: ApiWordBlock = {
  constant EXAMPLE_SELECT_BLOCK (line 55) | const EXAMPLE_SELECT_BLOCK: ApiSelectionElementBlock = {
  constant EXAMPLE_SIG_BLOCK (line 88) | const EXAMPLE_SIG_BLOCK: ApiSignatureBlock = {
  constant EXAMPLE_LINE_BLOCK (line 120) | const EXAMPLE_LINE_BLOCK: ApiLineBlock = {
  class DummyPage (line 165) | class DummyPage implements IBlockManager {
    method constructor (line 170) | constructor(blocks: ApiBlock[], items: any[] | undefined = undefined) {
    method parentDocument (line 175) | get parentDocument() {
    method getItemByBlockId (line 180) | getItemByBlockId(id: string): any {
    method getBlockById (line 191) | getBlockById(id: string): ApiBlock | undefined {
    method listBlocks (line 195) | listBlocks(): ApiBlock[] {
    method registerParsedItem (line 199) | registerParsedItem(blockId: string, item: IApiBlockWrapper<ApiBlock>):...
  class ErrLine (line 662) | class ErrLine extends WithWords(PageHostedApiBlockWrapper, {
  class ItemWithWords (line 701) | class ItemWithWords extends WithWords(PageHostedApiBlockWrapper)<ApiLine...

FILE: src-js/test/unit/corpus/header-footer.test.ts
  type HeaderFooterTest (line 20) | interface HeaderFooterTest {
  constant HEADER_FOOTER_TESTS (line 32) | const HEADER_FOOTER_TESTS: HeaderFooterTest[] = [
  constant HEADER_CONFIG (line 53) | const HEADER_CONFIG: HeaderFooterSegmentModelParams = {
  constant FOOTER_CONFIG (line 58) | const FOOTER_CONFIG: HeaderFooterSegmentModelParams = {
  type PageHeaderFooterTestSpec (line 64) | interface PageHeaderFooterTestSpec {
  function checkLinesAreLike (line 70) | function checkLinesAreLike(
  function checkHeaderFooters (line 103) | function checkHeaderFooters(filename: string, headerFooterSpec: PageHead...

FILE: src-js/test/unit/corpus/reading-order.test.ts
  type ReadingOrderTest (line 23) | interface ReadingOrderTest {
  constant READING_ORDER_TESTS (line 31) | const READING_ORDER_TESTS: ReadingOrderTest[] = [
  type CheckReadingOrderResult (line 48) | type CheckReadingOrderResult = {
  function checkReadingOrder (line 55) | function checkReadingOrder(filename: string, expectedDocReadingOrder: st...
  function expectReadingOrderSuccessful (line 108) | function expectReadingOrderSuccessful(result: CheckReadingOrderResult[])...

FILE: src-js/test/unit/document.test.ts
  constant EXPECTED_MULTILINE_SEQ_LOWER (line 30) | const EXPECTED_MULTILINE_SEQ_LOWER = [
  constant EXPECTED_MULTILINE_SEQ_2_LOWER (line 41) | const EXPECTED_MULTILINE_SEQ_2_LOWER = [
  function checkMultiColReadingOrder (line 58) | function checkMultiColReadingOrder(

FILE: src-js/test/unit/form.test.ts
  constant REFERENCE_FIELD_STR (line 12) | const REFERENCE_FIELD_STR = `
  function mean (line 148) | function mean(numberArr: number[]): number {

FILE: src-js/test/unit/geometry.test.ts
  constant PRECISION_DPS (line 7) | const PRECISION_DPS = 15;
  constant EXAMPLE_WORD_BLOCK (line 9) | const EXAMPLE_WORD_BLOCK: ApiWordBlock = {

FILE: src-js/test/unit/query.test.ts
  constant REFERENCE_QUERY_HTML (line 10) | const REFERENCE_QUERY_HTML = `<div class="query">
  constant REFERENCE_QUERY_STR (line 17) | const REFERENCE_QUERY_STR = `Query
  constant REFERENCE_QUERIES_HTML (line 23) | const REFERENCE_QUERIES_HTML = `<div class="queries">

FILE: src-js/test/unit/table.test.ts
  constant REFERENCE_TABLE_NO_CAPTION_HTML (line 20) | const REFERENCE_TABLE_NO_CAPTION_HTML = `<table>
  constant REFERENCE_TABLE_WITH_HEADER_HTML (line 58) | const REFERENCE_TABLE_WITH_HEADER_HTML = `<table>

FILE: src-python/a2i/a2irp.py
  class Word (line 3) | class Word:
    method __init__ (line 4) | def __init__(self, block, blockMap):
    method __str__ (line 11) | def __str__(self):
    method id (line 15) | def id(self):
    method text (line 19) | def text(self):
    method block (line 23) | def block(self):
  class FieldKey (line 26) | class FieldKey:
    method __init__ (line 27) | def __init__(self, block, children, blockMap):
    method __str__ (line 45) | def __str__(self):
    method id (line 49) | def id(self):
    method content (line 53) | def content(self):
    method text (line 57) | def text(self):
    method block (line 61) | def block(self):
  class FieldValue (line 64) | class FieldValue:
    method __init__ (line 65) | def __init__(self, block, children, blockMap):
    method __str__ (line 83) | def __str__(self):
    method id (line 87) | def id(self):
    method content (line 91) | def content(self):
    method text (line 95) | def text(self):
    method block (line 99) | def block(self):
  class Field (line 102) | class Field:
    method __init__ (line 103) | def __init__(self, block, blockMap):
    method __str__ (line 118) | def __str__(self):
    method key (line 130) | def key(self):
    method value (line 134) | def value(self):
  class Form (line 137) | class Form:
    method __init__ (line 138) | def __init__(self):
    method addField (line 142) | def addField(self, field):
    method __str__ (line 146) | def __str__(self):
    method fields (line 153) | def fields(self):
    method getFieldByKey (line 156) | def getFieldByKey(self, key):
    method searchFieldsByKey (line 162) | def searchFieldsByKey(self, key):
  class Page (line 170) | class Page:
    method __init__ (line 172) | def __init__(self, blocks, blockMap):
    method __str__ (line 181) | def __str__(self):
    method _parse (line 187) | def _parse(self, blockMap):
    method blocks (line 201) | def blocks(self):
    method text (line 205) | def text(self):
    method lines (line 209) | def lines(self):
    method form (line 213) | def form(self):
    method tables (line 217) | def tables(self):
    method content (line 221) | def content(self):
    method id (line 225) | def id(self):
  class Document (line 228) | class Document:
    method __init__ (line 230) | def __init__(self, responsePages):
    method __str__ (line 242) | def __str__(self):
    method _parseDocumentPagesAndBlockMap (line 248) | def _parseDocumentPagesAndBlockMap(self):
    method _parse (line 272) | def _parse(self):
    method blocks (line 280) | def blocks(self):
    method pageBlocks (line 284) | def pageBlocks(self):
    method pages (line 288) | def pages(self):
    method getBlockById (line 291) | def getBlockById(self, blockId):

FILE: src-python/a2i/a2irptest.py
  function processDocument (line 5) | def processDocument(doc):
  function run (line 32) | def run():

FILE: src-python/setup.py
  function read (line 6) | def read(fname):

FILE: src-python/tests/test_base_trp2.py
  function test_relationship_recursive_with_lru_cache (line 21) | def test_relationship_recursive_with_lru_cache(caplog):
  function test_selection_elements (line 33) | def test_selection_elements(caplog):

FILE: src-python/tests/test_merged.py
  function test_merged_cells (line 17) | def test_merged_cells(file_path, len_pages):

FILE: src-python/tests/test_t_tables.py
  function test_table_header_compare (line 14) | def test_table_header_compare():
  function test_execute_table_validations (line 43) | def test_execute_table_validations():

FILE: src-python/tests/test_trp.py
  function return_json_for_file (line 11) | def return_json_for_file(filename):
  function json_response (line 17) | def json_response():
  function test_words (line 21) | def test_words(json_response):
  function test_tables (line 30) | def test_tables(json_response):
  function test_forms (line 35) | def test_forms(json_response):
  function test_table_with_headers_and_merged_cells (line 40) | def test_table_with_headers_and_merged_cells(caplog):
  function test_table_with_headers_and_merged_cells_out_of_order_cells (line 54) | def test_table_with_headers_and_merged_cells_out_of_order_cells(caplog):
  function test_tables_after_sort_cells (line 72) | def test_tables_after_sort_cells():
  function _test_table_with_merged_cells (line 88) | def _test_table_with_merged_cells(datafile, expected_merged_cells):
  function test_table_with_merged_cells_1 (line 109) | def test_table_with_merged_cells_1(caplog):
  function test_table_with_merged_cells_2 (line 123) | def test_table_with_merged_cells_2(caplog):
  function test_table_with_header (line 129) | def test_table_with_header(caplog):
  function test_signature (line 146) | def test_signature(caplog):
  function test_2023_q1_table_model (line 155) | def test_2023_q1_table_model(caplog):
  function test_2023_q2_table_model (line 172) | def test_2023_q2_table_model(caplog):
  function test_issue_83 (line 193) | def test_issue_83(caplog):

FILE: src-python/tests/test_trp2.py
  function return_json_for_file (line 18) | def return_json_for_file(filename):
  function json_response (line 24) | def json_response():
  function test_serialization (line 28) | def test_serialization():
  function test_tblock_order_blocks_by_geo (line 45) | def test_tblock_order_blocks_by_geo():
  function test_tblock_order_block_by_geo_multi_page (line 57) | def test_tblock_order_block_by_geo_multi_page():
  function test_tblock_order_blocks_by_geo_x_y (line 68) | def test_tblock_order_blocks_by_geo_x_y():
  function test_tblock (line 80) | def test_tblock():
  function test_custom_tblock (line 92) | def test_custom_tblock():
  function test_custom_page_orientation (line 101) | def test_custom_page_orientation(json_response):
  function test_empty_page_orientation (line 176) | def test_empty_page_orientation():
  function test_filter_blocks_by_type (line 209) | def test_filter_blocks_by_type():
  function test_empty_page_get_blocks_by_type (line 215) | def test_empty_page_get_blocks_by_type():
  function test_next_token_response (line 252) | def test_next_token_response():
  function test_rotate_point (line 262) | def test_rotate_point():
  function test_rotate (line 296) | def test_rotate():
  function test_adjust_bounding_boxes_and_polygons_to_orientation (line 313) | def test_adjust_bounding_boxes_and_polygons_to_orientation():
  function test_scale (line 366) | def test_scale(caplog):
  function test_ratio (line 382) | def test_ratio(caplog):
  function test_tbbox_union (line 405) | def test_tbbox_union():
  function test_get_blocks_for_relationship (line 431) | def test_get_blocks_for_relationship(caplog):
  function test_block_id_map (line 458) | def test_block_id_map():
  function test_block_id_map_no_content (line 479) | def test_block_id_map_no_content():
  function test_block_map (line 517) | def test_block_map():
  function test_find_block_by_id (line 541) | def test_find_block_by_id():
  function test_get_block_by_id (line 551) | def test_get_block_by_id():
  function test_pages (line 562) | def test_pages():
  function test_pages_no_pagenums (line 571) | def test_pages_no_pagenums():
  function test_pages_out_of_order (line 615) | def test_pages_out_of_order():
  function test_add_ids_to_relationships (line 661) | def test_add_ids_to_relationships(caplog):
  function test_key_value_set_key_name (line 684) | def test_key_value_set_key_name(caplog):
  function test_get_relationships_for_type (line 704) | def test_get_relationships_for_type(caplog):
  function test_merge_tables (line 726) | def test_merge_tables():
  function test_add_block (line 748) | def test_add_block():
  function test_delete_blocks (line 766) | def test_delete_blocks():
  function test_link_tables (line 787) | def test_link_tables():
  function test_pipeline_merge_tables (line 799) | def test_pipeline_merge_tables():
  function test_pipeline_merge_multiple_tables (line 819) | def test_pipeline_merge_multiple_tables():
  function test_kv_ocr_confidence (line 839) | def test_kv_ocr_confidence(caplog):
  function test_get_answers_for_query (line 861) | def test_get_answers_for_query(caplog):
  function test_table_with_headers_and_merged_cells (line 874) | def test_table_with_headers_and_merged_cells(caplog):
  function test_bla (line 884) | def test_bla(caplog):
  function test_add_key_values_new_value_blocks (line 902) | def test_add_key_values_new_value_blocks(caplog):
  function test_add_virtual_key_for_existing_key_multi_page (line 915) | def test_add_virtual_key_for_existing_key_multi_page(caplog):
  function test_paystub_with_signature (line 952) | def test_paystub_with_signature(caplog):
  function test_2023_q1_table_model (line 961) | def test_2023_q1_table_model(caplog):
  function test_180_degree_orientation_page_and_based_on_words (line 985) | def test_180_degree_orientation_page_and_based_on_words(caplog):
  function test_large_json (line 1000) | def test_large_json(caplog):
  function test_process_tables_timing (line 1009) | def test_process_tables_timing(caplog):
  function test_tdoc_signature (line 1076) | def test_tdoc_signature(caplog):
  function test_lines_in_order (line 1086) | def test_lines_in_order(caplog):
  function test_create_geometry_from_blocks (line 1099) | def test_create_geometry_from_blocks(caplog):

FILE: src-python/tests/test_trp2_analyzeid.py
  function return_json_for_file (line 10) | def return_json_for_file(filename):
  function json_response_1 (line 16) | def json_response_1():
  function json_response_3 (line 21) | def json_response_3():
  function json_response_multi_page (line 26) | def json_response_multi_page():
  function json_response_2 (line 31) | def json_response_2():
  function test_analyzeid_serialization (line 35) | def test_analyzeid_serialization(caplog, json_response_1):
  function test_analyzeid_serialization_empty (line 45) | def test_analyzeid_serialization_empty(caplog, json_response_2):
  function test_analyzeid_serialization_multi_page (line 52) | def test_analyzeid_serialization_multi_page(caplog, json_response_multi_...
  function test_analyzeid_serialization_with_OCR (line 66) | def test_analyzeid_serialization_with_OCR(caplog, json_response_3):

FILE: src-python/tests/test_trp2_expense.py
  function return_json_for_file (line 10) | def return_json_for_file(filename):
  function json_response_1 (line 16) | def json_response_1():
  function json_response_2 (line 20) | def json_response_2():
  function test_serialization (line 23) | def test_serialization(caplog, json_response_1):
  function test_multipage (line 36) | def test_multipage(caplog, json_response_2):
  function test_generate_multipage_text (line 41) | def test_generate_multipage_text(caplog, json_response_2):

FILE: src-python/tests/test_trp2_lending.py
  function test_tblock (line 9) | def test_tblock(caplog):
  function test_tblock_no_signature (line 18) | def test_tblock_no_signature(caplog):

FILE: src-python/textract-mapping/mapping.py
  function Document (line 1) | def Document(response_pages):

FILE: src-python/textract-mapping/mappingtest.py
  function run (line 5) | def run():

FILE: src-python/trp/__init__.py
  class BaseBlock (line 17) | class BaseBlock():
    method __init__ (line 19) | def __init__(self, block, blockMap):
    method __str__ (line 33) | def __str__(self):
    method custom (line 37) | def custom(self):
    method confidence (line 41) | def confidence(self):
    method geometry (line 45) | def geometry(self):
    method id (line 49) | def id(self):
    method text (line 53) | def text(self):
    method text (line 57) | def text(self, text):
    method block (line 61) | def block(self):
    method textType (line 65) | def textType(self):
  class BoundingBox (line 69) | class BoundingBox:
    method __init__ (line 71) | def __init__(self, width, height, left, top):
    method __str__ (line 77) | def __str__(self):
    method width (line 81) | def width(self):
    method height (line 85) | def height(self):
    method left (line 89) | def left(self):
    method top (line 93) | def top(self):
  class Polygon (line 97) | class Polygon:
    method __init__ (line 99) | def __init__(self, x, y):
    method __str__ (line 103) | def __str__(self):
    method x (line 107) | def x(self):
    method y (line 111) | def y(self):
  class Geometry (line 115) | class Geometry:
    method __init__ (line 117) | def __init__(self, geometry):
    method __str__ (line 128) | def __str__(self):
    method boundingBox (line 133) | def boundingBox(self):
    method polygon (line 137) | def polygon(self):
  class Word (line 141) | class Word(BaseBlock):
    method __init__ (line 143) | def __init__(self, block, blockMap):
  class Line (line 147) | class Line(BaseBlock):
    method __init__ (line 149) | def __init__(self, block, blockMap):
    method __str__ (line 160) | def __str__(self):
    method words (line 169) | def words(self):
  class SelectionElement (line 173) | class SelectionElement:
    method __init__ (line 175) | def __init__(self, block, blockMap):
    method confidence (line 182) | def confidence(self):
    method geometry (line 186) | def geometry(self):
    method id (line 190) | def id(self):
    method selectionStatus (line 194) | def selectionStatus(self):
  class FieldKey (line 198) | class FieldKey(BaseBlock):
    method __init__ (line 200) | def __init__(self, block, children, blockMap):
    method content (line 217) | def content(self):
  class FieldValue (line 221) | class FieldValue(BaseBlock):
    method __init__ (line 223) | def __init__(self, block, children, blockMap):
    method content (line 244) | def content(self):
  class Field (line 248) | class Field(BaseBlock):
    method __init__ (line 250) | def __init__(self, block, blockMap):
    method __str__ (line 270) | def __str__(self):
    method key (line 282) | def key(self):
    method value (line 286) | def value(self):
  class Form (line 290) | class Form:
    method __init__ (line 292) | def __init__(self):
    method addField (line 296) | def addField(self, field):
    method __str__ (line 300) | def __str__(self):
    method fields (line 307) | def fields(self):
    method getFieldByKey (line 310) | def getFieldByKey(self, key):
    method searchFieldsByKey (line 316) | def searchFieldsByKey(self, key):
  class BaseCell (line 325) | class BaseCell(BaseBlock):
    method __init__ (line 327) | def __init__(self, block, blockMap):
    method rowIndex (line 338) | def rowIndex(self):
    method columnIndex (line 342) | def columnIndex(self):
    method rowSpan (line 346) | def rowSpan(self):
    method columnSpan (line 350) | def columnSpan(self):
    method content (line 354) | def content(self):
    method entityTypes (line 358) | def entityTypes(self):
  class Cell (line 363) | class Cell(BaseCell):
    method __init__ (line 365) | def __init__(self, block, blockMap):
    method mergedText (line 387) | def mergedText(self):
  class MergedCell (line 394) | class MergedCell(BaseCell):
    method __init__ (line 396) | def __init__(self, block, blockMap, rows):
  class Row (line 420) | class Row:
    method __init__ (line 422) | def __init__(self):
    method __str__ (line 425) | def __str__(self):
    method cells (line 432) | def cells(self):
    method cells (line 436) | def cells(self, cells: List[Cell]):
    method merged_cells (line 440) | def merged_cells(self):
  class Table (line 444) | class Table(BaseBlock):
    method __init__ (line 446) | def __init__(self, block, blockMap):
    method __str__ (line 469) | def __str__(self):
    method _resolve_merged_cells (line 476) | def _resolve_merged_cells(self, blockMap):
    method get_header_field_names (line 481) | def get_header_field_names(self):
    method rows (line 495) | def rows(self) -> List[Row]:
    method header (line 499) | def header(self) -> List[List[Cell]]:
    method rows_without_header (line 513) | def rows_without_header(self) -> List[Row]:
    method merged_cells (line 526) | def merged_cells(self) -> List[MergedCell]:
  class Page (line 530) | class Page:
    method __init__ (line 532) | def __init__(self, blocks, blockMap):
    method __str__ (line 543) | def __str__(self):
    method _parse (line 549) | def _parse(self, blockMap):
    method getLinesInReadingOrder (line 576) | def getLinesInReadingOrder(self):
    method getTextInReadingOrder (line 602) | def getTextInReadingOrder(self):
    method blocks (line 610) | def blocks(self):
    method text (line 614) | def text(self):
    method lines (line 618) | def lines(self):
    method form (line 622) | def form(self):
    method tables (line 626) | def tables(self):
    method content (line 630) | def content(self):
    method geometry (line 634) | def geometry(self):
    method id (line 638) | def id(self):
    method custom (line 642) | def custom(self):
  class Document (line 646) | class Document:
    method __init__ (line 648) | def __init__(self, responsePages):
    method __str__ (line 660) | def __str__(self):
    method _parseDocumentPagesAndBlockMap (line 666) | def _parseDocumentPagesAndBlockMap(self):
    method _parse (line 691) | def _parse(self):
    method blocks (line 699) | def blocks(self):
    method pageBlocks (line 703) | def pageBlocks(self):
    method pages (line 707) | def pages(self):
    method getBlockById (line 710) | def getBlockById(self, blockId):

FILE: src-python/trp/t_pipeline.py
  function order_blocks_by_geo (line 10) | def order_blocks_by_geo(t_document: t2.TDocument) -> t2.TDocument:
  function order_blocks_by_geo_x_y (line 28) | def order_blocks_by_geo_x_y(t_document: t2.TDocument) -> t2.TDocument:
  function add_kv_ocr_confidence (line 80) | def add_kv_ocr_confidence(t_document: t2.TDocument) -> t2.TDocument:
  function __get_degree_from_polygon (line 113) | def __get_degree_from_polygon(poly: List[t2.TPoint] = None) -> float:
  function add_orientation_to_blocks (line 124) | def add_orientation_to_blocks(t_document: t2.TDocument) -> t2.TDocument:
  function add_page_orientation (line 136) | def add_page_orientation(t_document: t2.TDocument) -> t2.TDocument:
  function add_image_size (line 156) | def add_image_size(t_document: t2.TDocument) -> t2.TDocument:
  function rotate_points_to_page_orientation (line 159) | def rotate_points_to_page_orientation(t_document:t2.TDocument)->t2.TDocu...
  function pipeline_merge_tables (line 175) | def pipeline_merge_tables(t_document: t2.TDocument,

FILE: src-python/trp/t_tables.py
  class MergeOptions (line 9) | class MergeOptions(Enum):
  class HeaderFooterType (line 14) | class HeaderFooterType(Enum):
  function __validate_objects_between_tables (line 23) | def __validate_objects_between_tables(page1, page1_table, page2, page2_t...
  function __compare_table_column_numbers (line 39) | def __compare_table_column_numbers(table_1, table_2):
  function __compare_table_headers (line 51) | def __compare_table_headers(table_1, table_2):
  function __calculate_percentage_difference (line 60) | def __calculate_percentage_difference(measure_1, measure_2):
  function __compare_table_dimensions (line 70) | def __compare_table_dimensions(table_1, table_2, accuracy_percentage):
  function ExecuteTableValidations (line 83) | def ExecuteTableValidations(t_doc: t2.TDocument, header_footer_type: Hea...

FILE: src-python/trp/trp2.py
  class BaseSchema (line 22) | class BaseSchema(m.Schema):
    method remove_skip_values (line 30) | def remove_skip_values(self, data, many, pass_many=False):
  class TextractBlockTypes (line 38) | class TextractBlockTypes(Enum):
  class TextractEntityTypes (line 65) | class TextractEntityTypes(Enum):
  class TPoint (line 71) | class TPoint():
    method __init__ (line 75) | def __init__(self, x: float, y: float) -> None:
    method scale (line 79) | def scale(self, doc_width, doc_height):
    method ratio (line 83) | def ratio(self, doc_width, doc_height):
    method to_list (line 87) | def to_list(self) -> List[float]:
    method rotate (line 95) | def rotate(self,
  class TBoundingBox (line 122) | class TBoundingBox():
    method __init__ (line 128) | def __init__(self, height: float, width: float, left: float, top: floa...
    method scale (line 134) | def scale(self, doc_width, doc_height):
    method ratio (line 140) | def ratio(self, doc_width, doc_height):
    method points (line 147) | def points(self) -> List[TPoint]:
    method bottom (line 156) | def bottom(self) -> float:
    method right (line 160) | def right(self) -> float:
    method centre (line 164) | def centre(self) -> TPoint:
    method to_list (line 170) | def to_list(self) -> List[float]:
    method union (line 179) | def union(self, bbox: TBoundingBox) -> TBoundingBox:
    method rotate (line 211) | def rotate(self, origin: TPoint = TPoint(0, 0), degrees: float = 180) ...
  class TBoundingBoxSchema (line 242) | class TBoundingBoxSchema(BaseSchema):
    method make_tbounding_box (line 249) | def make_tbounding_box(self, data, **kwargs):
  class TPointSchema (line 253) | class TPointSchema(BaseSchema):
    method make_tpoint (line 258) | def make_tpoint(self, data, **kwargs):
  class TGeometry (line 263) | class TGeometry():
    method ratio (line 267) | def ratio(self, doc_width=None, doc_height=None):
    method rotate (line 271) | def rotate(self, origin: TPoint = TPoint(0, 0), degrees: float = 180.0):
    method scale (line 275) | def scale(self, doc_width=None, doc_height=None):
  class TGeometrySchema (line 280) | class TGeometrySchema(BaseSchema):
    method make_tgeometry (line 285) | def make_tgeometry(self, data, **kwargs):
  class TQuery (line 290) | class TQuery:
  class TQuerySchema (line 295) | class TQuerySchema(BaseSchema):
    method make_tquery (line 300) | def make_tquery(self, data, **kwargs):
  class TRelationship (line 305) | class TRelationship():
  class TRelationshipSchema (line 310) | class TRelationshipSchema(BaseSchema):
    method make_trelationship (line 315) | def make_trelationship(self, data, **kwargs):
  class TBlock (line 320) | class TBlock():
    method __eq__ (line 342) | def __eq__(self, o: object) -> bool:
    method __hash__ (line 347) | def __hash__(self) -> int:
    method get_relationships_for_type (line 350) | def get_relationships_for_type(self, relationship_type="CHILD") -> Opt...
    method add_ids_to_relationships (line 358) | def add_ids_to_relationships(self, ids: List[str], relationships_type:...
    method rotate (line 373) | def rotate(self, origin=TPoint(0.5, 0.5), degrees: float = 180):
  class TBlockSchema (line 377) | class TBlockSchema(BaseSchema):
    method make_tblock (line 396) | def make_tblock(self, data, **kwargs):
  class TDocumentMetadata (line 401) | class TDocumentMetadata():
  class TDocumentMetadataSchema (line 405) | class TDocumentMetadataSchema(BaseSchema):
    method make_tdocument_metadat (line 409) | def make_tdocument_metadat(self, data, **kwargs):
  class TWarnings (line 414) | class TWarnings():
  class TWarningsSchema (line 419) | class TWarningsSchema(BaseSchema):
    method make_twarnings (line 424) | def make_twarnings(self, data, **kwargs):
  class THttpHeaders (line 429) | class THttpHeaders():
  class TResponseMetadata (line 438) | class TResponseMetadata():
  class TDocument (line 446) | class TDocument():
    method __post_init__ (line 460) | def __post_init__(self):    #this is a dataclass method
    method __hash__ (line 491) | def __hash__(self):
    method block_id_map (line 494) | def block_id_map(self, block_type: Optional[TextractBlockTypes] = None...
    method block_map (line 504) | def block_map(self, block_type: Optional[TextractBlockTypes] = None) -...
    method add_block (line 513) | def add_block(self, block: TBlock, page: TBlock = None):    #type: ignore
    method create_geometry_from_blocks (line 538) | def create_geometry_from_blocks(values: List[TBlock]) -> Optional[TGeo...
    method create_value_block (line 552) | def create_value_block(values: List[TBlock]) -> TBlock:
    method add_virtual_block (line 561) | def add_virtual_block(self, text: str, page_block: TBlock, text_type="...
    method add_virtual_key_for_existing_key (line 572) | def add_virtual_key_for_existing_key(self, key_name: str, existing_key...
    method add_key_values (line 581) | def add_key_values(self, key_name: str, values: List[TBlock], page_blo...
    method rotate (line 614) | def rotate(self, page: TBlock, degrees: float, origin: TPoint = TPoint...
    method find_block_by_id (line 624) | def find_block_by_id(self, id: str) -> Optional[TBlock]:
    method get_block_by_id (line 631) | def get_block_by_id(self, id: str) -> TBlock:
    method __relationships_recursive (line 638) | def __relationships_recursive(self, block: TBlock) -> Iterator[TBlock]:
    method relationships_recursive (line 650) | def relationships_recursive(self, block: TBlock) -> Set[TBlock]:
    method pages (line 654) | def pages(self) -> List[TBlock]:
    method filter_blocks_by_type (line 667) | def filter_blocks_by_type(block_list: List[TBlock],
    method get_child_relations (line 676) | def get_child_relations(self, page: TBlock):
    method tables (line 680) | def tables(self, page: TBlock) -> List[TBlock]:
    method get_blocks_by_type (line 683) | def get_blocks_by_type(
    method forms (line 707) | def forms(self, page: TBlock = None) -> List[TBlock]:    #type: ignore
    method keys (line 710) | def keys(self, page: TBlock = None) -> List[TBlock]:    #type: ignore
    method signatures (line 713) | def signatures(self, page: TBlock) -> List[TBlock]:
    method queries (line 716) | def queries(self, page: TBlock) -> List[TBlock]:
    method get_answers_for_query (line 719) | def get_answers_for_query(self, block: TBlock) -> List[TBlock]:
    method get_query_answers (line 727) | def get_query_answers(self, page: TBlock) -> List[List[str]]:
    method get_key_by_name (line 738) | def get_key_by_name(self, key_name: str) -> List[TBlock]:
    method get_blocks_for_relationships (line 749) | def get_blocks_for_relationships(self, relationship: TRelationship = N...
    method value_for_key (line 756) | def value_for_key(self, key: TBlock) -> List[TBlock]:
    method get_text_for_tblocks (line 769) | def get_text_for_tblocks(tblocks: List[TBlock]) -> str:
    method lines (line 774) | def lines(self, page: TBlock) -> List[TBlock]:
    method delete_blocks (line 782) | def delete_blocks(self, block_id: List[str]):
    method merge_tables (line 791) | def merge_tables(self, table_array_ids: List[List[str]]):
    method link_tables (line 822) | def link_tables(self, table_array_ids: List[List[str]]):
  class THttpHeadersSchema (line 841) | class THttpHeadersSchema(BaseSchema):
    class Meta (line 843) | class Meta:
    method make_thttp_headers (line 853) | def make_thttp_headers(self, data, **kwargs):
  class TResponseMetadataSchema (line 857) | class TResponseMetadataSchema(BaseSchema):
    class Meta (line 859) | class Meta:
    method make_tresponse_metadata (line 868) | def make_tresponse_metadata(self, data, **kwargs):
  class TDocumentSchema (line 872) | class TDocumentSchema(BaseSchema):
    class Meta (line 874) | class Meta:
    method make_tdocument (line 899) | def make_tdocument(self, data, **kwargs):

FILE: src-python/trp/trp2_analyzeid.py
  class TType (line 14) | class TType():
  class TTypeSchema (line 21) | class TTypeSchema(BaseSchema):
    method make_ttype (line 28) | def make_ttype(self, data, **kwargs):
  class TNormalizedValue (line 33) | class TNormalizedValue():
  class TNormalizedValueSchema (line 41) | class TNormalizedValueSchema(BaseSchema):
    method make_tnormalizedtype (line 49) | def make_tnormalizedtype(self, data, **kwargs):
  class TValueDetection (line 54) | class TValueDetection():
  class TValueDetectionSchema (line 63) | class TValueDetectionSchema(BaseSchema):
    method make_tvaluedetection (line 75) | def make_tvaluedetection(self, data, **kwargs):
  class TIdentityDocumentField (line 80) | class TIdentityDocumentField():
  class TIdentityDocumentFieldSchema (line 88) | class TIdentityDocumentFieldSchema(BaseSchema):
    method make_tidentitydocumentfield (line 101) | def make_tidentitydocumentfield(self, data, **kwargs):
  class TIdentityDocument (line 106) | class TIdentityDocument():
  class TIdentityDocumentSchema (line 115) | class TIdentityDocumentSchema(BaseSchema):
    method make_tidentitydocumentfield (line 129) | def make_tidentitydocumentfield(self, data, **kwargs):
  class TAnalyzeIdDocument (line 134) | class TAnalyzeIdDocument():
    method get_values_as_list (line 148) | def get_values_as_list(self) -> List[List[str]]:
  class TAnalyzeIdDocumentSchema (line 173) | class TAnalyzeIdDocumentSchema(BaseSchema):
    method make_tanalyzeiddocument (line 198) | def make_tanalyzeiddocument(self, data, **kwargs):

FILE: src-python/trp/trp2_expense.py
  class TextractAnalyzeExpenseSummaryFieldType (line 72) | class TextractAnalyzeExpenseSummaryFieldType(Enum):
  class TLabelDetection (line 80) | class TLabelDetection():
  class TLabelDetectionSchema (line 89) | class TLabelDetectionSchema(BaseSchema):
    method make_tlabeldetection (line 98) | def make_tlabeldetection(self, data, **kwargs):
  class TValueDetection (line 103) | class TValueDetection():
  class TValueDetectionSchema (line 112) | class TValueDetectionSchema(BaseSchema):
    method make_tvaluedetection (line 121) | def make_tvaluedetection(self, data, **kwargs):
  class TFieldType (line 126) | class TFieldType():
  class TFieldTypeSchema (line 134) | class TFieldTypeSchema(BaseSchema):
    method make_tfieldtype (line 142) | def make_tfieldtype(self, data, **kwargs):
  class TGroupProperty (line 147) | class TGroupProperty():
  class GroupProperty (line 152) | class GroupProperty(BaseSchema):
    method make (line 157) | def make(self, data, **kwargs):
  class TSummaryField (line 162) | class TSummaryField():
  class TSummaryFieldSchema (line 173) | class TSummaryFieldSchema(BaseSchema):
    class Meta (line 177) | class Meta:
    method make_tsummaryfield (line 190) | def make_tsummaryfield(self, data, **kwargs):
  class TExpenseField (line 195) | class TExpenseField():
  class TExpenseFieldSchema (line 205) | class TExpenseFieldSchema(BaseSchema):
    method make_texpensefield (line 217) | def make_texpensefield(self, data, **kwargs):
  class TLineItem (line 222) | class TLineItem():
  class TLineItemSchema (line 229) | class TLineItemSchema(BaseSchema):
    method make_tlineitem (line 240) | def make_tlineitem(self, data, **kwargs):
  class TLineItemGroup (line 245) | class TLineItemGroup():
  class TLineItemGroupSchema (line 253) | class TLineItemGroupSchema(BaseSchema):
    method make_tlineitemgroup (line 263) | def make_tlineitemgroup(self, data, **kwargs):
  class TExpense (line 268) | class TExpense():
    method lines (line 278) | def lines(self, page: TBlock) -> List[TBlock]:
  class TExpenseSchema (line 287) | class TExpenseSchema(BaseSchema):
    class Meta (line 291) | class Meta:
    method make_texpense (line 312) | def make_texpense(self, data, **kwargs):
  class TAnalyzeExpenseDocument (line 316) | class TAnalyzeExpenseDocument():
    method __init__ (line 321) | def __init__(self,
    method document_metadata (line 342) | def document_metadata(self):
    method expenses_documents (line 346) | def expenses_documents(self):
    method expenses_documents (line 350) | def expenses_documents(self, value: List[TExpense]):
    method analyze_expense_model_version (line 354) | def analyze_expense_model_version(self):
    method status_message (line 358) | def status_message(self):
    method warnings (line 362) | def warnings(self):
    method job_status (line 366) | def job_status(self):
    method response_metadata (line 370) | def response_metadata(self):
    method next_token (line 374) | def next_token(self):
    method custom (line 378) | def custom(self):
    method custom (line 382) | def custom(self, value: dict):
    method get_expensedocument_by_id (line 385) | def get_expensedocument_by_id(self, docid: int) -> Optional[TExpense]:
    method get_all_summaryfields_by_expense_id (line 398) | def get_all_summaryfields_by_expense_id(self, docid: int) -> Optional[...
    method get_normalized_summaryfields_by_expense_id (line 414) | def get_normalized_summaryfields_by_expense_id(self, docid: int) -> Op...
  class TAnalyzeExpenseDocumentSchema (line 432) | class TAnalyzeExpenseDocumentSchema(BaseSchema):
    class Meta (line 436) | class Meta:
    method make_tdocument (line 459) | def make_tdocument(self, data, **kwargs):

FILE: src-python/trp/trp2_lending.py
  class TLendingDetection (line 12) | class TLendingDetection():
  class TSignatureDetection (line 20) | class TSignatureDetection():
  class TLendingField (line 26) | class TLendingField():
  class TLendingDocument (line 33) | class TLendingDocument():
  class TExtraction (line 39) | class TExtraction():
  class TPrediction (line 46) | class TPrediction():
  class TPageClassification (line 52) | class TPageClassification():
  class TLendingResult (line 58) | class TLendingResult():
  class TFullLendingDocument (line 65) | class TFullLendingDocument():
  class TLendingDetectionSchema (line 82) | class TLendingDetectionSchema(BaseSchema):
    method make (line 89) | def make(self, data, **kwargs):
  class TSignatureDetectionSchema (line 93) | class TSignatureDetectionSchema(BaseSchema):
    method make (line 98) | def make(self, data, **kwargs):
  class TLendingFieldSchema (line 102) | class TLendingFieldSchema(BaseSchema):
    method make (line 110) | def make(self, data, **kwargs):
  class TLendingDocumentSchema (line 114) | class TLendingDocumentSchema(BaseSchema):
    method make (line 121) | def make(self, data, **kwargs):
  class TExtractionSchema (line 125) | class TExtractionSchema(BaseSchema):
    method make (line 137) | def make(self, data, **kwargs):
  class TPredictionSchema (line 141) | class TPredictionSchema(BaseSchema):
    method make (line 146) | def make(self, data, **kwargs):
  class TPageClassificationSchema (line 150) | class TPageClassificationSchema(BaseSchema):
    method make (line 155) | def make(self, data, **kwargs):
  class TLendingResultSchema (line 159) | class TLendingResultSchema(BaseSchema):
    method make (line 168) | def make(self, data, **kwargs):
  class TFullLendingDocumentSchema (line 172) | class TFullLendingDocumentSchema(BaseSchema):
    method make (line 195) | def make(self, data, **kwargs):
Copy disabled (too large) Download .json
Condensed preview — 178 files, each showing path, character count, and a content snippet. Download the .json file for the full structured content (21,081K chars).
[
  {
    "path": ".flake8",
    "chars": 28,
    "preview": "[flake8]\nignore = E501,W503\n"
  },
  {
    "path": ".github/PULL_REQUEST_TEMPLATE.md",
    "chars": 197,
    "preview": "*Issue #, if available:*\n\n*Description of changes:*\n\n\nBy submitting this pull request, I confirm that you can use, modif"
  },
  {
    "path": ".github/workflows/test_pull_request.yml",
    "chars": 1019,
    "preview": "# Controls when the action will run. Triggers the workflow on push or pull request\n# events but only for the main branch"
  },
  {
    "path": ".idea/amazon-textract-response-parser.iml",
    "chars": 665,
    "preview": "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<module type=\"PYTHON_MODULE\" version=\"4\">\n  <component name=\"NewModuleRootManager"
  },
  {
    "path": ".idea/inspectionProfiles/profiles_settings.xml",
    "chars": 174,
    "preview": "<component name=\"InspectionProjectProfileManager\">\n  <settings>\n    <option name=\"USE_PROJECT_PROFILE\" value=\"false\" />\n"
  },
  {
    "path": ".idea/misc.xml",
    "chars": 192,
    "preview": "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<project version=\"4\">\n  <component name=\"ProjectRootManager\" version=\"2\" project-"
  },
  {
    "path": ".idea/modules.xml",
    "chars": 314,
    "preview": "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<project version=\"4\">\n  <component name=\"ProjectModuleManager\">\n    <modules>\n   "
  },
  {
    "path": ".idea/vcs.xml",
    "chars": 180,
    "preview": "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<project version=\"4\">\n  <component name=\"VcsDirectoryMappings\">\n    <mapping dire"
  },
  {
    "path": "CODE_OF_CONDUCT.md",
    "chars": 309,
    "preview": "## Code of Conduct\nThis project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-condu"
  },
  {
    "path": "CONTRIBUTING.md",
    "chars": 3659,
    "preview": "# Contributing Guidelines\n\nThank you for your interest in contributing to our project. Whether it's a bug report, new fe"
  },
  {
    "path": "LICENSE",
    "chars": 10142,
    "preview": "\n                                 Apache License\n                           Version 2.0, January 2004\n                  "
  },
  {
    "path": "README.md",
    "chars": 2384,
    "preview": "# Textract Response Parser\n\nYou can use Textract response parser library to easily parse JSON returned by Amazon Textrac"
  },
  {
    "path": "src-csharp/LICENSE",
    "chars": 930,
    "preview": "Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved.\n\nPermission is hereby granted, free of charge, t"
  },
  {
    "path": "src-csharp/Program.cs",
    "chars": 3845,
    "preview": "using System;\r\nusing Microsoft.Extensions.Configuration;\r\nusing Amazon.Textract;\r\nusing Amazon.Textract.Model;\r\nusing S"
  },
  {
    "path": "src-csharp/README.md",
    "chars": 1652,
    "preview": "# Usage\n\n## Forms\n\n```csharp\ndocument.Pages.ForEach(page => {\n    Console.WriteLine(\"Print Lines and Words:\");\n    page."
  },
  {
    "path": "src-csharp/TextractExtensions.cs",
    "chars": 14785,
    "preview": "using System;\nusing System.Collections.Generic;\n\nnamespace Amazon.Textract.Model {\n\n\tpublic class Word {\n\t\tpublic Word(B"
  },
  {
    "path": "src-csharp/appsettings.json",
    "chars": 71,
    "preview": "{\n  \"AWS\": {\n    \"Profile\": \"default\",\n    \"Region\": \"us-west-2\"\n  }\n}\n"
  },
  {
    "path": "src-csharp/parser.csproj",
    "chars": 945,
    "preview": "<Project Sdk=\"Microsoft.NET.Sdk\">\r\n\r\n  <PropertyGroup>\r\n    <OutputType>Exe</OutputType>\r\n    <TargetFramework>netcoreap"
  },
  {
    "path": "src-js/.eslintrc.js",
    "chars": 1183,
    "preview": "module.exports = {\n  parser: \"@typescript-eslint/parser\", // Specifies the ESLint parser\n  parserOptions: {\n    ecmaVers"
  },
  {
    "path": "src-js/.nvmrc",
    "chars": 13,
    "preview": "lts/hydrogen\n"
  },
  {
    "path": "src-js/.prettierrc.js",
    "chars": 41,
    "preview": "module.exports = {\n  printWidth: 110,\n};\n"
  },
  {
    "path": "src-js/CHANGELOG.md",
    "chars": 9931,
    "preview": "# Changelog\n\n## 0.4.3 (2024-11-19)\n### Changed\n- Bumped dev dependencies (including `cross-spawn`, `lint-staged`, `rollu"
  },
  {
    "path": "src-js/README.md",
    "chars": 23599,
    "preview": "# Textract Response Parser for JavaScript/TypeScript\n\nThis library loads [Amazon Textract](https://docs.aws.amazon.com/t"
  },
  {
    "path": "src-js/bin/reading-order-diagnostic.js",
    "chars": 1966,
    "preview": "/**\n * Basic script to extract and save reading-order text from Amazon Textract JSONs.\n *\n * This script uses the built "
  },
  {
    "path": "src-js/examples/README.md",
    "chars": 4510,
    "preview": "# Examples for TRP.js\n\nThis folder contains example projects using the Amazon Textract Response Parser for JavaScript/Ty"
  },
  {
    "path": "src-js/examples/browser-iife/main.html",
    "chars": 1971,
    "preview": "<!DOCTYPE html>\n<html>\n  <head>\n    <!--\n      Add a <script> tag to your page to load the IIFE version of TRP.js\n      "
  },
  {
    "path": "src-js/examples/browser-iife/main.js",
    "chars": 2586,
    "preview": "/**\n * Browser Javascript for main.html to demonstrate using TRP.js via IIFE <script> tag.\n *\n * This script assumes the"
  },
  {
    "path": "src-js/examples/browser-iife/package.json",
    "chars": 477,
    "preview": "{\n  \"name\": \"@amazon-textract-response-parser-examples/browser-iife\",\n  \"version\": \"0.0.1\",\n  \"description\": \"Example of"
  },
  {
    "path": "src-js/examples/browser-iife/test.js",
    "chars": 2194,
    "preview": "/**\n * Script to test main.html using headless Chrome browser via Puppeteer\n *\n * This test script opens the HTML file i"
  },
  {
    "path": "src-js/examples/nodejs-import/main.js",
    "chars": 2704,
    "preview": "/**\n * Example script using TRP.js from NodeJS with ES-style module imports\n *\n * This script shows how you can get star"
  },
  {
    "path": "src-js/examples/nodejs-import/package.json",
    "chars": 608,
    "preview": "{\n  \"name\": \"@amazon-textract-response-parser-examples/nodejs-import\",\n  \"version\": \"0.0.1\",\n  \"description\": \"Example o"
  },
  {
    "path": "src-js/examples/nodejs-require/main.js",
    "chars": 2729,
    "preview": "/**\n * Example script using TRP.js from NodeJS with CommonJS-style module `require()`s\n *\n * This script shows how you c"
  },
  {
    "path": "src-js/examples/nodejs-require/package.json",
    "chars": 516,
    "preview": "{\n  \"name\": \"@amazon-textract-response-parser-examples/nodejs-require\",\n  \"version\": \"0.0.1\",\n  \"description\": \"Example "
  },
  {
    "path": "src-js/examples/nodejs-typescript/package.json",
    "chars": 695,
    "preview": "{\n  \"name\": \"@amazon-textract-response-parser-examples/nodejs-typescript\",\n  \"version\": \"0.0.1\",\n  \"description\": \"Examp"
  },
  {
    "path": "src-js/examples/nodejs-typescript/src/main.ts",
    "chars": 3128,
    "preview": "/**\n * Example script using TRP.js from NodeJS with ES-style module imports in TypeScript\n *\n * This script shows how yo"
  },
  {
    "path": "src-js/examples/nodejs-typescript/tsconfig.json",
    "chars": 451,
    "preview": "{\n  \"compilerOptions\": {\n    \"esModuleInterop\": true,\n    \"forceConsistentCasingInFileNames\": true,\n    \"lib\": [\"es2016\""
  },
  {
    "path": "src-js/jest.config.js",
    "chars": 185,
    "preview": "module.exports = {\n  preset: \"ts-jest\",\n  testEnvironment: \"node\",\n  // `examples` packages define their own test comman"
  },
  {
    "path": "src-js/package.json",
    "chars": 3602,
    "preview": "{\n  \"name\": \"amazon-textract-response-parser\",\n  \"version\": \"0.4.3\",\n  \"description\": \"Parse API responses from Amazon T"
  },
  {
    "path": "src-js/rollup.config.mjs",
    "chars": 606,
    "preview": "// External Dependencies:\nimport resolve from \"@rollup/plugin-node-resolve\";\nimport typescript from \"@rollup/plugin-type"
  },
  {
    "path": "src-js/src/api-models/base.ts",
    "chars": 10281,
    "preview": "/**\n * Common Textract API models used by (multiple features in) the Textract Response Parser.\n *\n * This file collects "
  },
  {
    "path": "src-js/src/api-models/content.ts",
    "chars": 3389,
    "preview": "/**\n * Low-level content Textract API models used by the Textract Response Parser.\n *\n * This file collects types/interf"
  },
  {
    "path": "src-js/src/api-models/document.ts",
    "chars": 4132,
    "preview": "/**\n * Document processing Textract API models used by the Textract Response Parser.\n *\n * This file collects types/inte"
  },
  {
    "path": "src-js/src/api-models/expense.ts",
    "chars": 1133,
    "preview": "/**\n * Expense analysis Textract API models used by the Textract Response Parser.\n *\n * This file collects types/interfa"
  },
  {
    "path": "src-js/src/api-models/form.ts",
    "chars": 3701,
    "preview": "/**\n * Form data (key-value pairs) analysis Textract API models used by the Textract Response Parser.\n *\n * This file co"
  },
  {
    "path": "src-js/src/api-models/geometry.ts",
    "chars": 1537,
    "preview": "/**\n * Core geometry/shape API models used by the Textract Response Parser.\n */\n\n/**\n * An axis-aligned bounding box on "
  },
  {
    "path": "src-js/src/api-models/id.ts",
    "chars": 1174,
    "preview": "/**\n * Identity document analysis Textract API models used by the Textract Response Parser.\n *\n * This file collects typ"
  },
  {
    "path": "src-js/src/api-models/index.ts",
    "chars": 2477,
    "preview": "/**\n * Amazon Textract API models (TypeScript interfaces) used by the response parser.\n\n * While these models should cor"
  },
  {
    "path": "src-js/src/api-models/layout.ts",
    "chars": 4004,
    "preview": "/**\n * Layout analysis Textract API models used by the Textract response parser.\n *\n * This file collects types/interfac"
  },
  {
    "path": "src-js/src/api-models/query.ts",
    "chars": 2205,
    "preview": "/**\n * Queries analysis Textract API models used by the Textract Response Parser.\n *\n * This file collects types/interfa"
  },
  {
    "path": "src-js/src/api-models/response.ts",
    "chars": 4456,
    "preview": "/**\n * Top-level API response models used by the Textract Response Parser.\n *\n * These models/interfaces cover the top-l"
  },
  {
    "path": "src-js/src/api-models/table.ts",
    "chars": 5269,
    "preview": "/**\n * Table analysis Textract API models used by the Textract Response Parser.\n *\n * This file collects types/interface"
  },
  {
    "path": "src-js/src/base.ts",
    "chars": 28401,
    "preview": "/**\n * Common shared utilities, interfaces, etc.\n */\n\n// Local Dependencies:\nimport { ApiBlockType, ApiRelationshipType "
  },
  {
    "path": "src-js/src/content.ts",
    "chars": 21388,
    "preview": "/**\n * TRP classes for (generic document) low-level content objects\n */\n\n// Local Dependencies:\nimport { ApiBlockType, A"
  },
  {
    "path": "src-js/src/document.ts",
    "chars": 68133,
    "preview": "/**\n * TRP classes for standard document/OCR results (e.g. DetectText and AnalyzeDocument)\n */\n\n// Local Dependencies:\ni"
  },
  {
    "path": "src-js/src/expense.ts",
    "chars": 9363,
    "preview": "/**\n * TRP classes for expense API results (e.g. AnalyzeExpense)\n */\n\n// Local Dependencies:\nimport {\n  ApiExpenseCompon"
  },
  {
    "path": "src-js/src/form.ts",
    "chars": 21026,
    "preview": "/**\n * TRP classes for (generic document) key-value form objects\n */\n\n// Local Dependencies:\nimport { ApiBlockType, ApiR"
  },
  {
    "path": "src-js/src/geometry.ts",
    "chars": 7530,
    "preview": "/**\n * TRP classes for objects describing geometry (of e.g. words, text lines, tables) on the page\n */\n// Local Dependen"
  },
  {
    "path": "src-js/src/id.ts",
    "chars": 8406,
    "preview": "/**\n * TRP classes for identity document API results (e.g. AnalyzeID)\n */\n\n// Local Dependencies:\nimport { ApiIdentityDo"
  },
  {
    "path": "src-js/src/index.ts",
    "chars": 1818,
    "preview": "/**\n * Amazon Textract Response Parser (JS/TS) main entry point\n */\n\n// api-models/index.ts handles filtering for this s"
  },
  {
    "path": "src-js/src/layout.ts",
    "chars": 41046,
    "preview": "/**\n * TRP classes for (generic document) layout analysis objects\n *\n * See: https://docs.aws.amazon.com/textract/latest"
  },
  {
    "path": "src-js/src/query.ts",
    "chars": 10977,
    "preview": "/**\n * TRP classes for (generic document) query objects\n *\n * See: https://docs.aws.amazon.com/textract/latest/dg/queryr"
  },
  {
    "path": "src-js/src/table.ts",
    "chars": 34558,
    "preview": "/**\n * TRP classes for (generic document) table objects\n */\n\n// Local Dependencies:\nimport { ApiBlockType, ApiRelationsh"
  },
  {
    "path": "src-js/test/data/analyzeid-test-drivers-license-response.json",
    "chars": 4762,
    "preview": "{\n  \"Comment\": \"AnalyzeId response for synthetic example drivers license per the Amazon Textract console\",\n  \"IdentityDo"
  },
  {
    "path": "src-js/test/data/analyzeid-test-passport-response.json",
    "chars": 4693,
    "preview": "{\n  \"Comment\": \"AnalyzeId response for synthetic example passport per the Amazon Textract console\",\n  \"IdentityDocuments"
  },
  {
    "path": "src-js/test/data/expense-missing-geoms-response.json",
    "chars": 435,
    "preview": "{\n  \"DocumentMetadata\": {\n    \"Pages\": 1\n  },\n  \"ExpenseDocuments\": [\n    {\n      \"ExpenseIndex\": 1,\n      \"LineItemGrou"
  },
  {
    "path": "src-js/test/data/financial-document-response.json",
    "chars": 578909,
    "preview": "{\n  \"DocumentMetadata\": {\n    \"Pages\": 1\n  },\n  \"Blocks\": [\n    {\n      \"BlockType\": \"PAGE\",\n      \"Geometry\": {\n       "
  },
  {
    "path": "src-js/test/data/form1005-response.json",
    "chars": 991289,
    "preview": "{\n  \"DocumentMetadata\": {\n    \"Pages\": 1\n  },\n  \"Blocks\": [\n    {\n      \"BlockType\": \"PAGE\",\n      \"Geometry\": {\n       "
  },
  {
    "path": "src-js/test/data/invoice-expense-response.json",
    "chars": 93008,
    "preview": "{\n  \"DocumentMetadata\": {\n    \"Pages\": 1\n  },\n  \"ExpenseDocuments\": [\n    {\n      \"ExpenseIndex\": 1,\n      \"LineItemGrou"
  },
  {
    "path": "src-js/test/data/paystub-response.json",
    "chars": 646809,
    "preview": "{\n  \"DocumentMetadata\": {\n    \"Pages\": 1\n  },\n  \"Blocks\": [\n    {\n      \"BlockType\": \"PAGE\",\n      \"Geometry\": {\n       "
  },
  {
    "path": "src-js/test/data/table-example-response.json",
    "chars": 87510,
    "preview": "{\n  \"Comment\": \"AnalyzeDocument response for sample table with merged cells shown on Textract Developer Guide: https://d"
  },
  {
    "path": "src-js/test/data/test-failed-response.json",
    "chars": 475,
    "preview": "{\n  \"JobStatus\": \"FAILED\",\n  \"StatusMessage\": \"Dummy Status Message\",\n  \"ResponseMetadata\": {\n    \"RequestId\": \"24bf72b5"
  },
  {
    "path": "src-js/test/data/test-inprogress-response.json",
    "chars": 491,
    "preview": "{\n  \"JobStatus\": \"IN_PROGRESS\",\n  \"StatusMessage\": \"Dummy still in progress message\",\n  \"ResponseMetadata\": {\n    \"Reque"
  },
  {
    "path": "src-js/test/data/test-multicol-response-2.json",
    "chars": 125334,
    "preview": "{\n  \"DocumentMetadata\": {\n    \"Pages\": 1\n  },\n  \"Blocks\": [\n    {\n      \"BlockType\": \"PAGE\",\n      \"Geometry\": {\n       "
  },
  {
    "path": "src-js/test/data/test-multicol-response.json",
    "chars": 132423,
    "preview": "{\n  \"DocumentMetadata\": {\n    \"Pages\": 1\n  },\n  \"Blocks\": [\n    {\n      \"BlockType\": \"PAGE\",\n      \"Geometry\": {\n       "
  },
  {
    "path": "src-js/test/data/test-query-response.json",
    "chars": 164544,
    "preview": "{\n  \"DocumentMetadata\": {\n    \"Pages\": 1\n  },\n  \"Blocks\": [\n    {\n      \"BlockType\": \"PAGE\",\n      \"Geometry\": {\n       "
  },
  {
    "path": "src-js/test/data/test-response.json",
    "chars": 152635,
    "preview": "{\n  \"$metadata\": {\n    \"httpStatusCode\": 200,\n    \"requestId\": \"ec200a34-47e6-49c2-89aa-7b1771b00ab7\",\n    \"attempts\": 1"
  },
  {
    "path": "src-js/test/data/test-twocol-header-footer-response.json",
    "chars": 518787,
    "preview": "{\n  \"DocumentMetadata\": {\n    \"Pages\": 1\n  },\n  \"Blocks\": [\n    {\n      \"BlockType\": \"PAGE\",\n      \"Geometry\": {\n       "
  },
  {
    "path": "src-js/test/integ/aws-sdk.test.ts",
    "chars": 3026,
    "preview": "// Node Built-Ins:\nimport { promises as fs } from \"fs\";\n\n// External Dependencies:\nimport {\n  TextractClient,\n  AnalyzeD"
  },
  {
    "path": "src-js/test/tsconfig.json",
    "chars": 351,
    "preview": "{\n  // We can't `include` the test/ folder in the top-level tsconfig.json (or it would get built into\n  // the library) "
  },
  {
    "path": "src-js/test/unit/api-models.test.ts",
    "chars": 5311,
    "preview": "import { ApiBlockType, isLayoutBlockType } from \"../../src/api-models\";\n\ndescribe(\"isLayoutBlockType\", () => {\n  it(\"sho"
  },
  {
    "path": "src-js/test/unit/base.test.ts",
    "chars": 24553,
    "preview": "import { ApiBlockType, ApiRelationshipType } from \"../../src/api-models/base\";\nimport { ApiBlock } from \"../../src/api-m"
  },
  {
    "path": "src-js/test/unit/content.test.ts",
    "chars": 44537,
    "preview": "import { ApiBlockType, ApiRelationshipType } from \"../../src/api-models/base\";\nimport {\n  ApiLineBlock,\n  ApiSelectionEl"
  },
  {
    "path": "src-js/test/unit/corpus/header-footer.test.ts",
    "chars": 4779,
    "preview": "/**\n * Tests for evaluating the header and footer segmentation APIs against a corpus of example documents.\n *\n * You can"
  },
  {
    "path": "src-js/test/unit/corpus/reading-order.test.ts",
    "chars": 6645,
    "preview": "/**\n * Tests for evaluating the '*inReadingOrder' APIs against a corpus of example documents.\n *\n * You can use this tem"
  },
  {
    "path": "src-js/test/unit/document.test.ts",
    "chars": 21819,
    "preview": "import { ApiBlockType } from \"../../src/api-models/base\";\nimport {\n  ApiAnalyzeDocumentResponse,\n  ApiAsyncDocumentAnaly"
  },
  {
    "path": "src-js/test/unit/expense.test.ts",
    "chars": 7497,
    "preview": "import { ApiAnalyzeExpenseResponse } from \"../../src/api-models/response\";\nimport { TextractExpense } from \"../../src/ex"
  },
  {
    "path": "src-js/test/unit/form.test.ts",
    "chars": 21882,
    "preview": "import { ApiBlockType, ApiRelationshipType } from \"../../src/api-models/base\";\nimport { ApiSelectionStatus } from \"../.."
  },
  {
    "path": "src-js/test/unit/geometry.test.ts",
    "chars": 9650,
    "preview": "import { ApiBlockType } from \"../../src/api-models/base\";\nimport { ApiTextType, ApiWordBlock } from \"../../src/api-model"
  },
  {
    "path": "src-js/test/unit/id.test.ts",
    "chars": 7107,
    "preview": "import { ApiIdentityDocument } from \"../../src/api-models/id\";\nimport { ApiAnalyzeIdResponse } from \"../../src/api-model"
  },
  {
    "path": "src-js/test/unit/index.test.ts",
    "chars": 5324,
    "preview": "describe(\"Top-level index.ts\", () => {\n  it(\"should re-export expected public /api-models properties\", () => {\n    expec"
  },
  {
    "path": "src-js/test/unit/layout.test.ts",
    "chars": 33487,
    "preview": "import { ApiLayoutListBlock } from \"../../src\";\nimport { ApiBlockType, ApiRelationshipType } from \"../../src/api-models/"
  },
  {
    "path": "src-js/test/unit/query.test.ts",
    "chars": 21769,
    "preview": "import { ApiAnswerRelationship, ApiBlockType, ApiRelationshipType } from \"../../src/api-models/base\";\nimport { ApiQueryB"
  },
  {
    "path": "src-js/test/unit/table.test.ts",
    "chars": 37565,
    "preview": "import { ApiBlockType, ApiRelationshipType } from \"../../src/api-models/base\";\nimport { ApiAnalyzeDocumentResponse, ApiR"
  },
  {
    "path": "src-js/tsconfig.browser.json",
    "chars": 184,
    "preview": "{\n  \"extends\": \"./tsconfig.json\",\n  \"compilerOptions\": {\n    // @rollup/plugin-typescript requires esnext output (will b"
  },
  {
    "path": "src-js/tsconfig.cjs.json",
    "chars": 114,
    "preview": "{\n  \"extends\": \"./tsconfig.json\",\n  \"compilerOptions\": {\n    \"module\": \"CommonJS\",\n    \"outDir\": \"dist/cjs\"\n  }\n}\n"
  },
  {
    "path": "src-js/tsconfig.es.json",
    "chars": 111,
    "preview": "{\n  \"extends\": \"./tsconfig.json\",\n  \"compilerOptions\": {\n    \"module\": \"esnext\",\n    \"outDir\": \"dist/es\"\n  }\n}\n"
  },
  {
    "path": "src-js/tsconfig.json",
    "chars": 468,
    "preview": "{\n  \"compilerOptions\": {\n    \"esModuleInterop\": true,\n    \"forceConsistentCasingInFileNames\": true,\n    \"lib\": [\"dom\", \""
  },
  {
    "path": "src-js/tsconfig.types.json",
    "chars": 177,
    "preview": "{\n  \"extends\": \"./tsconfig.json\",\n  \"compilerOptions\": {\n    \"declaration\": true,\n    \"emitDeclarationOnly\": true,\n    \""
  },
  {
    "path": "src-python/.style.yapf",
    "chars": 111,
    "preview": "[style]\nbased_on_style = pep8\nspaces_before_comment = 4\nsplit_before_logical_operator = true\ncolumn_limit: 120\n"
  },
  {
    "path": "src-python/.yapfignore",
    "chars": 0,
    "preview": ""
  },
  {
    "path": "src-python/README.md",
    "chars": 10214,
    "preview": "# Textract Response Parser\n\nYou can use Textract response parser library to easily parser JSON returned by Amazon Textra"
  },
  {
    "path": "src-python/a2i/README.md",
    "chars": 3320,
    "preview": "# Textract-Augmented AI (A2I) Human Review Response Parser\n\nYou can use Textract-Amazon Augmented AI (A2I) response pars"
  },
  {
    "path": "src-python/a2i/__init__.py",
    "chars": 108,
    "preview": "# -*- coding: utf-8 -*-\n\"\"\"Top-level package for amazon-textract-response-parser.\"\"\"\n\n__version__ = '1.0.2'\n"
  },
  {
    "path": "src-python/a2i/a2i-response.json",
    "chars": 136560,
    "preview": "{\n    \"aiServiceRequest\": {\n        \"document\": {\n            \"s3Object\": {\n                \"bucket\": \"textract-document"
  },
  {
    "path": "src-python/a2i/a2irp.py",
    "chars": 7188,
    "preview": "import json\n\nclass Word:\n    def __init__(self, block, blockMap):\n        self._block = block\n        self._id = block['"
  },
  {
    "path": "src-python/a2i/a2irptest.py",
    "chars": 1355,
    "preview": "import json\nfrom a2irp import Document\n\n\ndef processDocument(doc):\n    for page in doc.pages:\n        print(\"PAGE\\n====="
  },
  {
    "path": "src-python/bin/amazon-textract-pipeline",
    "chars": 1696,
    "preview": "#!/usr/bin/env python\n\nimport json\nimport sys\nfrom trp.trp2 import TDocumentSchema\nfrom trp.t_pipeline import order_bloc"
  },
  {
    "path": "src-python/extras/dev.txt",
    "chars": 7,
    "preview": "pytest\n"
  },
  {
    "path": "src-python/setup.cfg",
    "chars": 466,
    "preview": "[bumpversion]\ncurrent_version = 1.0.2\ncommit = True\ntag = True\n\n[bumpversion:file:setup.py]\nsearch = version='{current_v"
  },
  {
    "path": "src-python/setup.py",
    "chars": 2021,
    "preview": "import os\nimport sys\nfrom setuptools import setup\n\n\ndef read(fname):\n    return open(os.path.join(os.path.dirname(__file"
  },
  {
    "path": "src-python/tests/data/180-degree-roation.json",
    "chars": 487699,
    "preview": "{\n  \"DocumentMetadata\": {\n    \"Pages\": 1\n  },\n  \"JobStatus\": \"SUCCEEDED\",\n  \"Blocks\": [\n    {\n      \"BlockType\": \"PAGE\","
  },
  {
    "path": "src-python/tests/data/2023-Q2-table-model-sample.json",
    "chars": 274499,
    "preview": "{\n    \"DocumentMetadata\": {\n        \"Pages\": 1\n    },\n    \"Blocks\": [\n        {\n            \"BlockType\": \"PAGE\",\n       "
  },
  {
    "path": "src-python/tests/data/all_features_with_floating_title_header.json",
    "chars": 997174,
    "preview": "{\n    \"DocumentMetadata\": {\n        \"Pages\": 1\n    },\n    \"Blocks\": [\n        {\n            \"BlockType\": \"PAGE\",\n       "
  },
  {
    "path": "src-python/tests/data/analyzeExpenseResponse-multipage.json",
    "chars": 455749,
    "preview": "{\n  \"DocumentMetadata\": {\n    \"Pages\": 2\n  },\n  \"JobStatus\": \"SUCCEEDED\",\n  \"ExpenseDocuments\": [\n    {\n      \"ExpenseIn"
  },
  {
    "path": "src-python/tests/data/bounding_box_issue.json",
    "chars": 340405,
    "preview": "{\n    \"DocumentMetadata\": {\n        \"Pages\": 1\n    },\n    \"Blocks\": [\n        {\n            \"BlockType\": \"PAGE\",\n       "
  },
  {
    "path": "src-python/tests/data/employment-application.json",
    "chars": 158398,
    "preview": "{\n    \"DocumentMetadata\": {\n        \"Pages\": 1\n    },\n    \"Blocks\": [\n        {\n            \"BlockType\": \"PAGE\",\n       "
  },
  {
    "path": "src-python/tests/data/gib.json",
    "chars": 359047,
    "preview": "{\n    \"DocumentMetadata\": {\n        \"Pages\": 1\n    },\n    \"JobStatus\": \"SUCCEEDED\",\n    \"NextToken\": \"OJQgPDJLZcJpwsujYY"
  },
  {
    "path": "src-python/tests/data/gib1.json",
    "chars": 263699,
    "preview": "{\n  \"DocumentMetadata\": {\n    \"Pages\": 1\n  },\n  \"Blocks\": [\n    {\n      \"BlockType\": \"PAGE\",\n      \"Geometry\": {\n       "
  },
  {
    "path": "src-python/tests/data/gib_10_degrees.json",
    "chars": 160332,
    "preview": "{\"DocumentMetadata\": {\"Pages\": 1}, \"Blocks\": [{\"BlockType\": \"PAGE\", \"Geometry\": {\"BoundingBox\": {\"Width\": 1.0, \"Height\":"
  },
  {
    "path": "src-python/tests/data/gib__10_degrees.json",
    "chars": 160332,
    "preview": "{\"DocumentMetadata\": {\"Pages\": 1}, \"Blocks\": [{\"BlockType\": \"PAGE\", \"Geometry\": {\"BoundingBox\": {\"Width\": 1.0, \"Height\":"
  },
  {
    "path": "src-python/tests/data/gib__15_degrees.json",
    "chars": 155889,
    "preview": "{\"DocumentMetadata\": {\"Pages\": 1}, \"Blocks\": [{\"BlockType\": \"PAGE\", \"Geometry\": {\"BoundingBox\": {\"Width\": 1.0, \"Height\":"
  },
  {
    "path": "src-python/tests/data/gib__180_degrees.json",
    "chars": 165128,
    "preview": "{\"DocumentMetadata\": {\"Pages\": 1}, \"Blocks\": [{\"BlockType\": \"PAGE\", \"Geometry\": {\"BoundingBox\": {\"Width\": 1.0, \"Height\":"
  },
  {
    "path": "src-python/tests/data/gib__25_degrees.json",
    "chars": 144206,
    "preview": "{\"DocumentMetadata\": {\"Pages\": 1}, \"Blocks\": [{\"BlockType\": \"PAGE\", \"Geometry\": {\"BoundingBox\": {\"Width\": 1.0, \"Height\":"
  },
  {
    "path": "src-python/tests/data/gib__270_degrees.json",
    "chars": 162677,
    "preview": "{\"DocumentMetadata\": {\"Pages\": 1}, \"Blocks\": [{\"BlockType\": \"PAGE\", \"Geometry\": {\"BoundingBox\": {\"Width\": 1.0, \"Height\":"
  },
  {
    "path": "src-python/tests/data/gib__90_degrees.json",
    "chars": 157929,
    "preview": "{\"DocumentMetadata\": {\"Pages\": 1}, \"Blocks\": [{\"BlockType\": \"PAGE\", \"Geometry\": {\"BoundingBox\": {\"Width\": 1.0, \"Height\":"
  },
  {
    "path": "src-python/tests/data/gib__minus_10_degrees.json",
    "chars": 164235,
    "preview": "{\"DocumentMetadata\": {\"Pages\": 1}, \"Blocks\": [{\"BlockType\": \"PAGE\", \"Geometry\": {\"BoundingBox\": {\"Width\": 1.0, \"Height\":"
  },
  {
    "path": "src-python/tests/data/gib_multi_page_table_merge.json",
    "chars": 602412,
    "preview": "{\"DocumentMetadata\": {\"Pages\": 3}, \"JobStatus\": \"SUCCEEDED\", \"Blocks\": [{\"BlockType\": \"PAGE\", \"Geometry\": {\"BoundingBox\""
  },
  {
    "path": "src-python/tests/data/gib_multi_page_tables.json",
    "chars": 257271,
    "preview": "{\"DocumentMetadata\": {\"Pages\": 2}, \"JobStatus\": \"SUCCEEDED\", \"Blocks\": [{\"BlockType\": \"PAGE\", \"Geometry\": {\"BoundingBox\""
  },
  {
    "path": "src-python/tests/data/gib_multi_tables_multi_page_sample.json",
    "chars": 90704,
    "preview": "{\"DocumentMetadata\": {\"Pages\": 3}, \"JobStatus\": \"SUCCEEDED\", \"Blocks\": [{\"BlockType\": \"PAGE\", \"Geometry\": {\"BoundingBox\""
  },
  {
    "path": "src-python/tests/data/in-table-footer.json",
    "chars": 805976,
    "preview": "{\n    \"DocumentMetadata\": {\n        \"Pages\": 1\n    },\n    \"Blocks\": [\n        {\n            \"BlockType\": \"PAGE\",\n       "
  },
  {
    "path": "src-python/tests/data/in-table-title.json",
    "chars": 244517,
    "preview": "{\n    \"DocumentMetadata\": {\n        \"Pages\": 1\n    },\n    \"Blocks\": [\n        {\n            \"BlockType\": \"PAGE\",\n       "
  },
  {
    "path": "src-python/tests/data/issue_83.json",
    "chars": 52004,
    "preview": "{\n    \"AnalyzeDocumentModelVersion\": \"1.0\",\n    \"Blocks\": [{\n        \"BlockType\": \"PAGE\",\n        \"ColumnIndex\": null,\n "
  },
  {
    "path": "src-python/tests/data/lending-doc-output.json",
    "chars": 1653784,
    "preview": "{\n    \"DocumentMetadata\": {\n        \"Pages\": 32\n    },\n    \"JobStatus\": \"SUCCEEDED\",\n    \"Results\": [{\n        \"Page\": 1"
  },
  {
    "path": "src-python/tests/data/lending-package-no-signature.json",
    "chars": 45693,
    "preview": "{\"DocumentMetadata\": {\"Pages\": 7}, \"JobStatus\": \"SUCCEEDED\", \"Results\": [{\"Page\": 1, \"PageClassification\": {\"PageType\": "
  },
  {
    "path": "src-python/tests/data/little_women_page_1.json",
    "chars": 156031,
    "preview": "{\n  \"Bucket\": \"textract-console-us-east-1-d17637b3-eef6-42df-88c7-27374be8f2f9\",\n  \"UploadedFileName\": \"85a1f161_67e6_4a"
  },
  {
    "path": "src-python/tests/data/multi-page-forms-samples-2-page.json",
    "chars": 256534,
    "preview": "{\"DocumentMetadata\": {\"Pages\": 2}, \"JobStatus\": \"SUCCEEDED\", \"Blocks\": [{\"BlockType\": \"PAGE\", \"Geometry\": {\"BoundingBox\""
  },
  {
    "path": "src-python/tests/data/multi-tables-multi-page-sample.json",
    "chars": 90704,
    "preview": "{\"DocumentMetadata\": {\"Pages\": 3}, \"JobStatus\": \"SUCCEEDED\", \"Blocks\": [{\"BlockType\": \"PAGE\", \"Geometry\": {\"BoundingBox\""
  },
  {
    "path": "src-python/tests/data/patient_intake_form_sample.json",
    "chars": 336672,
    "preview": "{\n    \"DocumentMetadata\": {\n        \"Pages\": 1\n    },\n    \"Blocks\": [\n        {\n            \"BlockType\": \"PAGE\",\n       "
  },
  {
    "path": "src-python/tests/data/paystub_with_signature.json",
    "chars": 631413,
    "preview": "{\n  \"DocumentMetadata\": {\n    \"Pages\": 1\n  },\n  \"Blocks\": [\n    {\n      \"BlockType\": \"PAGE\",\n      \"Geometry\": {\n       "
  },
  {
    "path": "src-python/tests/data/queries_sample.json",
    "chars": 411216,
    "preview": "{\n    \"AnalyzeDocumentModelVersion\": \"1.0\",\n    \"Blocks\": [\n        {\n            \"BlockType\": \"PAGE\",\n            \"Geom"
  },
  {
    "path": "src-python/tests/data/request_for_verification_of_employment.json",
    "chars": 902193,
    "preview": "{\n  \"DocumentMetadata\": {\n    \"Pages\": 1\n  },\n  \"Blocks\": [\n    {\n      \"BlockType\": \"PAGE\",\n      \"Geometry\": {\n       "
  },
  {
    "path": "src-python/tests/data/tables_with_headers_and_merged_cells.json",
    "chars": 186485,
    "preview": "    {\n        \"DocumentMetadata\": {\n            \"Pages\": 1\n        },\n        \"Blocks\": [\n            {\n                "
  },
  {
    "path": "src-python/tests/data/tables_with_headers_out_of_order_cells.json",
    "chars": 471384,
    "preview": "{\"DocumentMetadata\": {\"Pages\": 1}, \"Blocks\": [{\"BlockType\": \"PAGE\", \"Geometry\": {\"BoundingBox\": {\"Width\": 0.999764263629"
  },
  {
    "path": "src-python/tests/data/tables_with_merged_cells_sample1.json",
    "chars": 276147,
    "preview": "{\r\n    \"DocumentMetadata\": {\r\n        \"Pages\": 1\r\n    },\r\n    \"Blocks\": [\r\n        {\r\n            \"BlockType\": \"PAGE\",\r\n"
  },
  {
    "path": "src-python/tests/data/tables_with_merged_cells_sample2.json",
    "chars": 287678,
    "preview": "{\r\n    \"DocumentMetadata\": {\r\n        \"Pages\": 1\r\n    },\r\n    \"Blocks\": [\r\n        {\r\n            \"BlockType\": \"PAGE\",\r\n"
  },
  {
    "path": "src-python/tests/data/test-trp2-analyzeid_sample_multi_page.json",
    "chars": 5022,
    "preview": "{\"IdentityDocuments\": [{\"DocumentIndex\": 1, \"IdentityDocumentFields\": [{\"Type\": {\"Text\": \"FIRST_NAME\"}, \"ValueDetection\""
  },
  {
    "path": "src-python/tests/data/test-trp2_analyzeid_sample1.json",
    "chars": 6523,
    "preview": "{\n    \"IdentityDocuments\": [\n        {\n            \"DocumentIndex\": 1,\n            \"IdentityDocumentFields\": [\n         "
  },
  {
    "path": "src-python/tests/data/test-trp2_analyzeid_sample1_with_OCR.json",
    "chars": 138932,
    "preview": "{\n    \"IdentityDocuments\": [\n        {\n            \"DocumentIndex\": 1,\n            \"IdentityDocumentFields\": [\n         "
  },
  {
    "path": "src-python/tests/data/test-trp2_analyzeid_sample2.json",
    "chars": 90,
    "preview": "{\n    \"DocumentMetadata\": {\n      \"Pages\": 1\n    },\n    \"AnalyzeIDModelVersion\": \"1.0\"\n  }"
  },
  {
    "path": "src-python/tests/data/test_table_merged_text.json",
    "chars": 55803,
    "preview": "{\n  \"DocumentMetadata\": {\n    \"Pages\": 1\n  },\n  \"Blocks\": [\n    {\n      \"BlockType\": \"PAGE\",\n      \"Geometry\": {\n       "
  },
  {
    "path": "src-python/tests/data/test_trp2_expense_sample1.json",
    "chars": 160324,
    "preview": "{\n    \"DocumentMetadata\": {\n        \"Pages\": 1\n    },\n    \"ExpenseDocuments\": [\n        {\n            \"ExpenseIndex\": 1,"
  },
  {
    "path": "src-python/tests/data/test_trp2_expense_sample2.json",
    "chars": 57770,
    "preview": "{\n    \"DocumentMetadata\": {\n        \"Pages\": 1\n    },\n    \"ExpenseDocuments\": [\n        {\n            \"ExpenseIndex\": 1,"
  },
  {
    "path": "src-python/tests/data/test_trp2_expense_sample3.json",
    "chars": 95330,
    "preview": "{\n    \"DocumentMetadata\": {\n        \"Pages\": 1\n    },\n    \"ExpenseDocuments\": [\n        {\n            \"ExpenseIndex\": 1,"
  },
  {
    "path": "src-python/tests/data/test_trp2_expense_sample4.json",
    "chars": 137121,
    "preview": "{\n    \"DocumentMetadata\": {\n        \"Pages\": 1\n    },\n    \"ExpenseDocuments\": [\n        {\n            \"ExpenseIndex\": 1,"
  },
  {
    "path": "src-python/tests/data/textract-new-tables-api.json",
    "chars": 186483,
    "preview": "    {\n        \"DocumentMetadata\": {\n            \"Pages\": 1\n        },\n        \"Blocks\": [\n            {\n                "
  },
  {
    "path": "src-python/tests/test-response.json",
    "chars": 142156,
    "preview": "{\n    \"DocumentMetadata\": {\n        \"Pages\": 1\n    },\n    \"Blocks\": [\n        {\n            \"BlockType\": \"PAGE\",\n       "
  },
  {
    "path": "src-python/tests/test_base_trp2.py",
    "chars": 1970,
    "preview": "import os\nfrom trp.t_pipeline import add_page_orientation, order_blocks_by_geo\nfrom typing import List\nfrom trp.t_pipeli"
  },
  {
    "path": "src-python/tests/test_merged.py",
    "chars": 842,
    "preview": "import json\nimport pytest\nimport os\nfrom trp import Document\n\ncurrent_folder = os.path.dirname(os.path.realpath(__file__"
  },
  {
    "path": "src-python/tests/test_t_tables.py",
    "chars": 3117,
    "preview": "# Python Built-Ins:\nimport json\nimport os\n\n# Local Dependencies:\nfrom trp import Document\nfrom trp.t_tables import __com"
  },
  {
    "path": "src-python/tests/test_trp.py",
    "chars": 6338,
    "preview": "import json\nimport pytest\nimport os\nfrom trp import Document, Cell\nfrom typing import List\nimport logging\n\ncurrent_folde"
  },
  {
    "path": "src-python/tests/test_trp2.py",
    "chars": 50705,
    "preview": "from typing import List\nfrom trp.t_pipeline import add_page_orientation, order_blocks_by_geo, order_blocks_by_geo_x_y, p"
  },
  {
    "path": "src-python/tests/test_trp2_analyzeid.py",
    "chars": 2668,
    "preview": "import trp.trp2_analyzeid as texa\nimport json\nimport os\nimport pytest\nimport logging\n\ncurrent_folder = os.path.dirname(o"
  },
  {
    "path": "src-python/tests/test_trp2_expense.py",
    "chars": 1925,
    "preview": "import trp.trp2_expense as texp\nimport json\nimport os\nimport pytest\nimport logging\n\ncurrent_folder = os.path.dirname(os."
  },
  {
    "path": "src-python/tests/test_trp2_lending.py",
    "chars": 786,
    "preview": "import json\nimport os\nimport logging\nimport trp.trp2_lending as tl\n\ncurrent_folder = os.path.dirname(os.path.realpath(__"
  },
  {
    "path": "src-python/textract-mapping/README.md",
    "chars": 22669,
    "preview": "# Textract Response Parser to then leverage Comprehend CDK constructs \n\nYou can use Textract-Amazon Augmented AI (A2I) r"
  },
  {
    "path": "src-python/textract-mapping/__init__.py",
    "chars": 109,
    "preview": "# -*- coding: utf-8 -*-\n\"\"\"Top-level package for amazon-textract-response-parser.\"\"\"\n\n__version__ = '0.1.44'\n"
  },
  {
    "path": "src-python/textract-mapping/loan-app-response.json",
    "chars": 1507961,
    "preview": "{\n    \"AnalyzeDocumentModelVersion\": \"1.0\",\n    \"Blocks\": [\n        {\n            \"BlockType\": \"PAGE\",\n            \"Geom"
  },
  {
    "path": "src-python/textract-mapping/mapping-response.json",
    "chars": 142156,
    "preview": "{\n    \"DocumentMetadata\": {\n        \"Pages\": 1\n    },\n    \"Blocks\": [\n        {\n            \"BlockType\": \"PAGE\",\n       "
  },
  {
    "path": "src-python/textract-mapping/mapping.py",
    "chars": 464,
    "preview": "def Document(response_pages):\n    if not isinstance(response_pages, list):\n        rps = [response_pages]\n        respon"
  },
  {
    "path": "src-python/textract-mapping/mappingtest.py",
    "chars": 12439,
    "preview": "import json\nfrom mapping import Document\n\n\ndef run():\n    file_path = \"loan-app-response.json\"\n    with open(file_path, "
  },
  {
    "path": "src-python/trp/__init__.py",
    "chars": 20232,
    "preview": "# -*- coding: utf-8 -*-\n\"\"\"Top-level package for amazon-textract-response-parser.\"\"\"\nimport logging\nfrom typing import L"
  },
  {
    "path": "src-python/trp/t_pipeline.py",
    "chars": 9171,
    "preview": "import logging\nfrom trp.t_tables import ExecuteTableValidations, MergeOptions, HeaderFooterType\nimport trp.trp2 as t2\nfr"
  },
  {
    "path": "src-python/trp/t_tables.py",
    "chars": 4725,
    "preview": "import logging\nimport trp.trp2 as t2\nfrom typing import List\nfrom enum import Enum, auto\nfrom trp.trp2 import TDocument,"
  },
  {
    "path": "src-python/trp/trp2.py",
    "chars": 38350,
    "preview": "from __future__ import annotations\nfrom functools import lru_cache\nimport typing\nfrom typing import List, Set, Dict, Opt"
  },
  {
    "path": "src-python/trp/trp2_analyzeid.py",
    "chars": 7635,
    "preview": "\"\"\"\nAuthor: lanaz@amazon.com\n(De)Serializer for Textract AnalyzeID Response JSON\n\"\"\"\nfrom typing import List\nimport mars"
  },
  {
    "path": "src-python/trp/trp2_expense.py",
    "chars": 14735,
    "preview": "\"\"\"\nAuthor: dhawalkp@amazon.com\n(De)Serializer for Textract AnalyzeExpense Response JSON\n\nAnalyzeExpense’s JSON contains"
  },
  {
    "path": "src-python/trp/trp2_lending.py",
    "chars": 8106,
    "preview": "from dataclasses import dataclass, field\nfrom typing import List\nfrom uuid import uuid4, UUID\n\nfrom trp.trp2 import TGeo"
  }
]

// ... and 1 more files (download for full content)

About this extraction

This page contains the full source code of the aws-samples/amazon-textract-response-parser GitHub repository, extracted and formatted as plain text for AI agents and large language models (LLMs). The extraction includes 178 files (53.3 MB), approximately 4.9M tokens, and a symbol index with 1173 extracted functions, classes, methods, constants, and types. Use this with OpenClaw, Claude, ChatGPT, Cursor, Windsurf, or any other AI tool that accepts text input. You can copy the full output to your clipboard or download it as a .txt file.

Extracted by GitExtract — free GitHub repo to text converter for AI. Built by Nikandr Surkov.

Copied to clipboard!