Full Code of GoogleCloudPlatform/Open_Data_QnA for AI

main 19960bb38ba2 cached

253 files

1.0 MB

271.4k tokens

457 symbols

1 requests

Download .txt

Showing preview only (1,138K chars total). Download the full file or copy to clipboard to get everything.

Repository: GoogleCloudPlatform/Open_Data_QnA
Branch: main
Commit: 19960bb38ba2
Files: 253
Total size: 1.0 MB

Directory structure:
gitextract_yfzkgoxq/

├── .gitignore
├── CODE_OF_CONDUCT.md
├── CONTRIBUTING.md
├── Dockerfile
├── LICENSE
├── MANIFEST.in
├── OWNERS
├── README.md
├── SECURITY.md
├── agents/
│   ├── BuildSQLAgent.py
│   ├── DebugSQLAgent.py
│   ├── DescriptionAgent.py
│   ├── EmbedderAgent.py
│   ├── ResponseAgent.py
│   ├── ValidateSQLAgent.py
│   ├── VisualizeAgent.py
│   ├── __init__.py
│   └── core.py
├── app.py
├── backend-apis/
│   ├── README.md
│   ├── __init__.py
│   ├── main.py
│   └── policy.yaml
├── config.ini
├── dbconnectors/
│   ├── BQConnector.py
│   ├── FirestoreConnector.py
│   ├── PgConnector.py
│   ├── __init__.py
│   └── core.py
├── docs/
│   ├── README.md
│   ├── architecture.md
│   ├── best_practices.md
│   ├── changelog.md
│   ├── config_guide.md
│   ├── faq.md
│   └── repo_structure.md
├── embeddings/
│   ├── __init__.py
│   ├── kgq_embeddings.py
│   ├── retrieve_embeddings.py
│   └── store_embeddings.py
├── env_setup.py
├── frontend/
│   ├── .gitignore
│   ├── README.md
│   ├── angular.json
│   ├── database.indexes.json
│   ├── database.rules.json
│   ├── firebase_setup.json
│   ├── frontend-flutter/
│   │   ├── .flutter-plugins
│   │   ├── .flutter-plugins-dependencies
│   │   ├── Open Data QnA - Working Sheet V2 - sample_questions_UI copy.csv
│   │   ├── Open_Data_QnA_sample_questions_v3 copy.csv
│   │   ├── README.md
│   │   ├── analysis_options.yaml
│   │   ├── android/
│   │   │   ├── .gitignore
│   │   │   ├── app/
│   │   │   │   ├── build.gradle
│   │   │   │   ├── google-services.json
│   │   │   │   └── src/
│   │   │   │       ├── debug/
│   │   │   │       │   └── AndroidManifest.xml
│   │   │   │       ├── main/
│   │   │   │       │   ├── AndroidManifest.xml
│   │   │   │       │   ├── kotlin/
│   │   │   │       │   │   └── com/
│   │   │   │       │   │       └── pilotcap/
│   │   │   │       │   │           └── ttmd/
│   │   │   │       │   │               └── MainActivity.kt
│   │   │   │       │   └── res/
│   │   │   │       │       ├── drawable/
│   │   │   │       │       │   └── launch_background.xml
│   │   │   │       │       ├── drawable-v21/
│   │   │   │       │       │   └── launch_background.xml
│   │   │   │       │       ├── values/
│   │   │   │       │       │   └── styles.xml
│   │   │   │       │       └── values-night/
│   │   │   │       │           └── styles.xml
│   │   │   │       └── profile/
│   │   │   │           └── AndroidManifest.xml
│   │   │   ├── build.gradle
│   │   │   ├── gradle/
│   │   │   │   └── wrapper/
│   │   │   │       └── gradle-wrapper.properties
│   │   │   ├── gradle.properties
│   │   │   ├── nl2sql_oss_android.iml
│   │   │   └── settings.gradle
│   │   ├── build/
│   │   │   └── web/
│   │   │       └── .last_build_id
│   │   ├── ios/
│   │   │   ├── .gitignore
│   │   │   ├── Flutter/
│   │   │   │   ├── AppFrameworkInfo.plist
│   │   │   │   ├── Debug.xcconfig
│   │   │   │   └── Release.xcconfig
│   │   │   ├── Podfile
│   │   │   ├── Runner/
│   │   │   │   ├── AppDelegate.swift
│   │   │   │   ├── Assets.xcassets/
│   │   │   │   │   ├── AppIcon.appiconset/
│   │   │   │   │   │   └── Contents.json
│   │   │   │   │   └── LaunchImage.imageset/
│   │   │   │   │       ├── Contents.json
│   │   │   │   │       └── README.md
│   │   │   │   ├── Base.lproj/
│   │   │   │   │   ├── LaunchScreen.storyboard
│   │   │   │   │   └── Main.storyboard
│   │   │   │   ├── GoogleService-Info.plist
│   │   │   │   ├── Info.plist
│   │   │   │   └── Runner-Bridging-Header.h
│   │   │   ├── Runner.xcodeproj/
│   │   │   │   ├── project.pbxproj
│   │   │   │   ├── project.xcworkspace/
│   │   │   │   │   ├── contents.xcworkspacedata
│   │   │   │   │   └── xcshareddata/
│   │   │   │   │       ├── IDEWorkspaceChecks.plist
│   │   │   │   │       └── WorkspaceSettings.xcsettings
│   │   │   │   └── xcshareddata/
│   │   │   │       └── xcschemes/
│   │   │   │           └── Runner.xcscheme
│   │   │   ├── Runner.xcworkspace/
│   │   │   │   ├── contents.xcworkspacedata
│   │   │   │   └── xcshareddata/
│   │   │   │       ├── IDEWorkspaceChecks.plist
│   │   │   │       └── WorkspaceSettings.xcsettings
│   │   │   └── RunnerTests/
│   │   │       └── RunnerTests.swift
│   │   ├── lib/
│   │   │   ├── firebase_options.dart
│   │   │   ├── main.dart
│   │   │   ├── screens/
│   │   │   │   ├── bot.dart
│   │   │   │   ├── bot_chat_view.dart
│   │   │   │   ├── disclaimer.dart
│   │   │   │   └── settings.dart
│   │   │   ├── services/
│   │   │   │   ├── display_stepper/
│   │   │   │   │   ├── display_stepper_cubit.dart
│   │   │   │   │   └── display_stepper_state.dart
│   │   │   │   ├── first_question/
│   │   │   │   │   ├── first_question_cubit.dart
│   │   │   │   │   └── first_question_state.dart
│   │   │   │   ├── load_question/
│   │   │   │   │   ├── load_question_cubit.dart
│   │   │   │   │   └── load_question_state.dart
│   │   │   │   ├── new_suggestions/
│   │   │   │   │   ├── new_suggestion_cubit.dart
│   │   │   │   │   └── new_suggestion_state.dart
│   │   │   │   ├── text_to_doc_question/
│   │   │   │   │   ├── text_to_doc_question_cubit.dart
│   │   │   │   │   └── text_to_doc_question_state.dart
│   │   │   │   ├── update_expert_mode/
│   │   │   │   │   ├── update_expert_mode_cubit.dart
│   │   │   │   │   └── update_expert_mode_state.dart
│   │   │   │   ├── update_popular_questions/
│   │   │   │   │   ├── update_popular_questions_cubit.dart
│   │   │   │   │   └── update_popular_questions_state.dart
│   │   │   │   └── update_stepper/
│   │   │   │       ├── update_stepper_cubit.dart
│   │   │   │       └── update_stepper_state.dart
│   │   │   └── utils/
│   │   │       ├── Input_custom.dart
│   │   │       ├── TextToDocParameter.dart
│   │   │       ├── custom_input_field.dart
│   │   │       ├── most_popular_questions.dart
│   │   │       ├── pdf_viewer.dart
│   │   │       ├── stepper_expert_info.dart
│   │   │       └── tabbed_container.dart
│   │   ├── nl2sql_oss.iml
│   │   ├── pubspec.yaml
│   │   ├── test/
│   │   │   └── widget_test.dart
│   │   └── web/
│   │       ├── index 01.49.28.html
│   │       ├── index.html
│   │       └── manifest.json
│   ├── frontend.yaml
│   ├── package.json
│   ├── server.ts
│   ├── src/
│   │   ├── app/
│   │   │   ├── agent-chat/
│   │   │   │   ├── agent-chat.component.html
│   │   │   │   ├── agent-chat.component.scss
│   │   │   │   ├── agent-chat.component.spec.ts
│   │   │   │   └── agent-chat.component.ts
│   │   │   ├── app-routing.module.ts
│   │   │   ├── app.component.html
│   │   │   ├── app.component.scss
│   │   │   ├── app.component.spec.ts
│   │   │   ├── app.component.ts
│   │   │   ├── app.module.server.ts
│   │   │   ├── app.module.ts
│   │   │   ├── business-user/
│   │   │   │   ├── business-user.component.html
│   │   │   │   ├── business-user.component.scss
│   │   │   │   ├── business-user.component.spec.ts
│   │   │   │   └── business-user.component.ts
│   │   │   ├── grouping-modal/
│   │   │   │   ├── grouping-modal.component.html
│   │   │   │   ├── grouping-modal.component.scss
│   │   │   │   ├── grouping-modal.component.spec.ts
│   │   │   │   └── grouping-modal.component.ts
│   │   │   ├── header/
│   │   │   │   ├── header.component.html
│   │   │   │   ├── header.component.scss
│   │   │   │   ├── header.component.spec.ts
│   │   │   │   └── header.component.ts
│   │   │   ├── home/
│   │   │   │   ├── home.component.html
│   │   │   │   ├── home.component.scss
│   │   │   │   ├── home.component.spec.ts
│   │   │   │   └── home.component.ts
│   │   │   ├── http.interceptor.ts
│   │   │   ├── login/
│   │   │   │   ├── login.component.html
│   │   │   │   ├── login.component.scss
│   │   │   │   ├── login.component.spec.ts
│   │   │   │   └── login.component.ts
│   │   │   ├── login-button/
│   │   │   │   ├── login-button.component.html
│   │   │   │   ├── login-button.component.scss
│   │   │   │   ├── login-button.component.spec.ts
│   │   │   │   └── login-button.component.ts
│   │   │   ├── menu/
│   │   │   │   ├── menu.component.html
│   │   │   │   ├── menu.component.scss
│   │   │   │   ├── menu.component.spec.ts
│   │   │   │   └── menu.component.ts
│   │   │   ├── prism/
│   │   │   │   ├── prism.component.html
│   │   │   │   ├── prism.component.scss
│   │   │   │   ├── prism.component.spec.ts
│   │   │   │   ├── prism.component.ts
│   │   │   │   └── prism.d.ts
│   │   │   ├── scenario-list/
│   │   │   │   ├── scenario-list.component.html
│   │   │   │   ├── scenario-list.component.scss
│   │   │   │   ├── scenario-list.component.spec.ts
│   │   │   │   └── scenario-list.component.ts
│   │   │   ├── shared/
│   │   │   │   └── services/
│   │   │   │       ├── chat.service.spec.ts
│   │   │   │       ├── chat.service.ts
│   │   │   │       ├── home.service.spec.ts
│   │   │   │       ├── home.service.ts
│   │   │   │       ├── login.service.spec.ts
│   │   │   │       ├── login.service.ts
│   │   │   │       ├── shared.service.spec.ts
│   │   │   │       └── shared.service.ts
│   │   │   ├── upload-template/
│   │   │   │   ├── upload-template.component.html
│   │   │   │   ├── upload-template.component.scss
│   │   │   │   ├── upload-template.component.spec.ts
│   │   │   │   └── upload-template.component.ts
│   │   │   ├── user-journey/
│   │   │   │   ├── user-journey.component.html
│   │   │   │   ├── user-journey.component.scss
│   │   │   │   ├── user-journey.component.spec.ts
│   │   │   │   └── user-journey.component.ts
│   │   │   └── user-photo/
│   │   │       ├── user-photo.component.html
│   │   │       ├── user-photo.component.scss
│   │   │       ├── user-photo.component.spec.ts
│   │   │       └── user-photo.component.ts
│   │   ├── assets/
│   │   │   ├── .gitkeep
│   │   │   └── constants.ts
│   │   ├── index.html
│   │   ├── main.server.ts
│   │   ├── main.ts
│   │   ├── styles/
│   │   │   └── variables.scss
│   │   └── styles.scss
│   ├── tsconfig.app.json
│   ├── tsconfig.json
│   └── tsconfig.spec.json
├── notebooks/
│   ├── 0_CopyDataToBigQuery.ipynb
│   ├── 0_CopyDataToCloudSqlPG.ipynb
│   ├── 1_Setup_OpenDataQnA.ipynb
│   ├── 2_Run_OpenDataQnA.ipynb
│   └── 3_LoadKnownGoodSQL.ipynb
├── opendataqna.py
├── prompts.yaml
├── pyproject.toml
├── scripts/
│   ├── .~lock.Scenarios Sample.csv#
│   ├── Scenarios Sample.csv
│   ├── __init__.py
│   ├── copy_select_table_column_bigquery.py
│   ├── data_source_list.csv
│   ├── data_source_list_sample.csv
│   ├── known_good_sql.csv
│   ├── save_config.py
│   └── tables_columns_descriptions.csv
├── terraform/
│   ├── .gitignore
│   ├── README.md
│   ├── backend.tf
│   ├── bq.tf
│   ├── embeddings-setup.tf
│   ├── frontend.tf
│   ├── iam.tf
│   ├── locals.tf
│   ├── main.tf
│   ├── outputs.tf
│   ├── pg-vector.tf
│   ├── scripts/
│   │   ├── backend-deployment.sh
│   │   ├── copy-firebase-json.sh
│   │   ├── create-and-store-embeddings.py
│   │   ├── deploy-all.sh
│   │   ├── execute-gcloud-cmd.sh
│   │   ├── execute-python-files.sh
│   │   ├── frontend-deployment.sh
│   │   └── install-dependencies.sh
│   ├── templates/
│   │   ├── config.ini.tftpl
│   │   └── constants.ts.tftpl
│   ├── terraform.tfvars.sample
│   ├── variables.tf
│   └── versions.tf
└── utilities/
    ├── __init__.py
    └── imgs/
        └── aa

================================================
FILE CONTENTS
================================================

================================================
FILE: .gitignore
================================================
.venv/
__pycache__/
agents/__pycache__/
application_default_credentials.json
databases/__pycache__/
embeddings/__pycache__/
utils/__pycache__/
*/__pycache__/
.DS_Store
poetry.lock
dist/
test-pypi-token.txt
firebase.json
.firebaserc
config_copy.ini
eval/

================================================
FILE: CODE_OF_CONDUCT.md
================================================
<!-- # Generated by synthtool. DO NOT EDIT! !-->
# Code of Conduct

## Our Pledge

In the interest of fostering an open and welcoming environment, we as
contributors and maintainers pledge to making participation in our project and
our community a harassment-free experience for everyone, regardless of age, body
size, disability, ethnicity, gender identity and expression, level of
experience, education, socio-economic status, nationality, personal appearance,
race, religion, or sexual identity and orientation.

## Our Standards

Examples of behavior that contributes to creating a positive environment
include:

*   Using welcoming and inclusive language
*   Being respectful of differing viewpoints and experiences
*   Gracefully accepting constructive criticism
*   Focusing on what is best for the community
*   Showing empathy towards other community members

Examples of unacceptable behavior by participants include:

*   The use of sexualized language or imagery and unwelcome sexual attention or
    advances
*   Trolling, insulting/derogatory comments, and personal or political attacks
*   Public or private harassment
*   Publishing others' private information, such as a physical or electronic
    address, without explicit permission
*   Other conduct which could reasonably be considered inappropriate in a
    professional setting

## Our Responsibilities

Project maintainers are responsible for clarifying the standards of acceptable
behavior and are expected to take appropriate and fair corrective action in
response to any instances of unacceptable behavior.

Project maintainers have the right and responsibility to remove, edit, or reject
comments, commits, code, wiki edits, issues, and other contributions that are
not aligned to this Code of Conduct, or to ban temporarily or permanently any
contributor for other behaviors that they deem inappropriate, threatening,
offensive, or harmful.

## Scope

This Code of Conduct applies both within project spaces and in public spaces
when an individual is representing the project or its community. Examples of
representing a project or community include using an official project e-mail
address, posting via an official social media account, or acting as an appointed
representative at an online or offline event. Representation of a project may be
further defined and clarified by project maintainers.

This Code of Conduct also applies outside the project spaces when the Project
Steward has a reasonable belief that an individual's behavior may have a
negative impact on the project or its community.

## Conflict Resolution

We do not believe that all conflict is bad; healthy debate and disagreement
often yield positive results. However, it is never okay to be disrespectful or
to engage in behavior that violates the project’s code of conduct.

If you see someone violating the code of conduct, you are encouraged to address
the behavior directly with those involved. Many issues can be resolved quickly
and easily, and this gives people more control over the outcome of their
dispute. If you are unable to resolve the matter for any reason, or if the
behavior is threatening or harassing, report it. We are dedicated to providing
an environment where participants feel welcome and safe.


Reports should be directed to *googleapis-stewards@google.com*, the
Project Steward(s) for *Google Cloud Client Libraries*. It is the Project Steward’s duty to
receive and address reported violations of the code of conduct. They will then
work with a committee consisting of representatives from the Open Source
Programs Office and the Google Open Source Strategy team. If for any reason you
are uncomfortable reaching out to the Project Steward, please email
opensource@google.com.

We will investigate every complaint, but you may not receive a direct response.
We will use our discretion in determining when and how to follow up on reported
incidents, which may range from not taking action to permanent expulsion from
the project and project-sponsored spaces. We will notify the accused of the
report and provide them an opportunity to discuss it before any action is taken.
The identity of the reporter will be omitted from the details of the report
supplied to the accused. In potentially harmful situations, such as ongoing
harassment or threats to anyone's safety, we may take action without notice.

## Attribution

This Code of Conduct is adapted from the Contributor Covenant, version 1.4,
available at
https://www.contributor-covenant.org/version/1/4/code-of-conduct.html

================================================
FILE: CONTRIBUTING.md
================================================
# How to contribute

We'd love to accept your patches and contributions to this project.

## Before you begin

### Sign our Contributor License Agreement

Contributions to this project must be accompanied by a
[Contributor License Agreement](https://cla.developers.google.com/about) (CLA).
You (or your employer) retain the copyright to your contribution; this simply
gives us permission to use and redistribute your contributions as part of the
project.

If you or your current employer have already signed the Google CLA (even if it
was for a different project), you probably don't need to do it again.

Visit <https://cla.developers.google.com/> to see your current agreements or to
sign a new one.

### Review our community guidelines

This project follows
[Google's Open Source Community Guidelines](https://opensource.google/conduct/).

## Contribution process

### Code reviews

All submissions, including submissions by project members, require review. We
use GitHub pull requests for this purpose. Consult
[GitHub Help](https://help.github.com/articles/about-pull-requests/) for more
information on using pull requests.

================================================
FILE: Dockerfile
================================================
# Use the official lightweight Python image.
# https://hub.docker.com/_/python
FROM python:3.9-slim
# Allow statements and log messages to immediately appear in the Knative logs
ENV PYTHONUNBUFFERED True

# Copy local code to the container image.
ENV APP_HOME /app
WORKDIR $APP_HOME

COPY . .

# Install production dependencies.
RUN pip install poetry
RUN poetry install

# Run the web service on container startup. Here we use the gunicorn
# webserver, with one worker process and 8 threads.
# For environments with multiple CPU cores, increase the number of workers
# to be equal to the cores available.
# Timeout is set to 0 to disable the timeouts of the workers to allow Cloud Run to handle instance scaling.
# CMD exec gunicorn --bind :$PORT --workers 1 --threads 8 --timeout 0 main:app
CMD HOME=/root poetry run gunicorn --bind :$PORT --workers 1 --threads 8 --timeout 0 backend-apis.main:app



================================================
FILE: LICENSE
================================================

                                 Apache License
                           Version 2.0, January 2004
                        http://www.apache.org/licenses/

   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION

   1. Definitions.

      "License" shall mean the terms and conditions for use, reproduction,
      and distribution as defined by Sections 1 through 9 of this document.

      "Licensor" shall mean the copyright owner or entity authorized by
      the copyright owner that is granting the License.

      "Legal Entity" shall mean the union of the acting entity and all
      other entities that control, are controlled by, or are under common
      control with that entity. For the purposes of this definition,
      "control" means (i) the power, direct or indirect, to cause the
      direction or management of such entity, whether by contract or
      otherwise, or (ii) ownership of fifty percent (50%) or more of the
      outstanding shares, or (iii) beneficial ownership of such entity.

      "You" (or "Your") shall mean an individual or Legal Entity
      exercising permissions granted by this License.

      "Source" form shall mean the preferred form for making modifications,
      including but not limited to software source code, documentation
      source, and configuration files.

      "Object" form shall mean any form resulting from mechanical
      transformation or translation of a Source form, including but
      not limited to compiled object code, generated documentation,
      and conversions to other media types.

      "Work" shall mean the work of authorship, whether in Source or
      Object form, made available under the License, as indicated by a
      copyright notice that is included in or attached to the work
      (an example is provided in the Appendix below).

      "Derivative Works" shall mean any work, whether in Source or Object
      form, that is based on (or derived from) the Work and for which the
      editorial revisions, annotations, elaborations, or other modifications
      represent, as a whole, an original work of authorship. For the purposes
      of this License, Derivative Works shall not include works that remain
      separable from, or merely link (or bind by name) to the interfaces of,
      the Work and Derivative Works thereof.

      "Contribution" shall mean any work of authorship, including
      the original version of the Work and any modifications or additions
      to that Work or Derivative Works thereof, that is intentionally
      submitted to Licensor for inclusion in the Work by the copyright owner
      or by an individual or Legal Entity authorized to submit on behalf of
      the copyright owner. For the purposes of this definition, "submitted"
      means any form of electronic, verbal, or written communication sent
      to the Licensor or its representatives, including but not limited to
      communication on electronic mailing lists, source code control systems,
      and issue tracking systems that are managed by, or on behalf of, the
      Licensor for the purpose of discussing and improving the Work, but
      excluding communication that is conspicuously marked or otherwise
      designated in writing by the copyright owner as "Not a Contribution."

      "Contributor" shall mean Licensor and any individual or Legal Entity
      on behalf of whom a Contribution has been received by Licensor and
      subsequently incorporated within the Work.

   2. Grant of Copyright License. Subject to the terms and conditions of
      this License, each Contributor hereby grants to You a perpetual,
      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
      copyright license to reproduce, prepare Derivative Works of,
      publicly display, publicly perform, sublicense, and distribute the
      Work and such Derivative Works in Source or Object form.

   3. Grant of Patent License. Subject to the terms and conditions of
      this License, each Contributor hereby grants to You a perpetual,
      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
      (except as stated in this section) patent license to make, have made,
      use, offer to sell, sell, import, and otherwise transfer the Work,
      where such license applies only to those patent claims licensable
      by such Contributor that are necessarily infringed by their
      Contribution(s) alone or by combination of their Contribution(s)
      with the Work to which such Contribution(s) was submitted. If You
      institute patent litigation against any entity (including a
      cross-claim or counterclaim in a lawsuit) alleging that the Work
      or a Contribution incorporated within the Work constitutes direct
      or contributory patent infringement, then any patent licenses
      granted to You under this License for that Work shall terminate
      as of the date such litigation is filed.

   4. Redistribution. You may reproduce and distribute copies of the
      Work or Derivative Works thereof in any medium, with or without
      modifications, and in Source or Object form, provided that You
      meet the following conditions:

      (a) You must give any other recipients of the Work or
          Derivative Works a copy of this License; and

      (b) You must cause any modified files to carry prominent notices
          stating that You changed the files; and

      (c) You must retain, in the Source form of any Derivative Works
          that You distribute, all copyright, patent, trademark, and
          attribution notices from the Source form of the Work,
          excluding those notices that do not pertain to any part of
          the Derivative Works; and

      (d) If the Work includes a "NOTICE" text file as part of its
          distribution, then any Derivative Works that You distribute must
          include a readable copy of the attribution notices contained
          within such NOTICE file, excluding those notices that do not
          pertain to any part of the Derivative Works, in at least one
          of the following places: within a NOTICE text file distributed
          as part of the Derivative Works; within the Source form or
          documentation, if provided along with the Derivative Works; or,
          within a display generated by the Derivative Works, if and
          wherever such third-party notices normally appear. The contents
          of the NOTICE file are for informational purposes only and
          do not modify the License. You may add Your own attribution
          notices within Derivative Works that You distribute, alongside
          or as an addendum to the NOTICE text from the Work, provided
          that such additional attribution notices cannot be construed
          as modifying the License.

      You may add Your own copyright statement to Your modifications and
      may provide additional or different license terms and conditions
      for use, reproduction, or distribution of Your modifications, or
      for any such Derivative Works as a whole, provided Your use,
      reproduction, and distribution of the Work otherwise complies with
      the conditions stated in this License.

   5. Submission of Contributions. Unless You explicitly state otherwise,
      any Contribution intentionally submitted for inclusion in the Work
      by You to the Licensor shall be under the terms and conditions of
      this License, without any additional terms or conditions.
      Notwithstanding the above, nothing herein shall supersede or modify
      the terms of any separate license agreement you may have executed
      with Licensor regarding such Contributions.

   6. Trademarks. This License does not grant permission to use the trade
      names, trademarks, service marks, or product names of the Licensor,
      except as required for reasonable and customary use in describing the
      origin of the Work and reproducing the content of the NOTICE file.

   7. Disclaimer of Warranty. Unless required by applicable law or
      agreed to in writing, Licensor provides the Work (and each
      Contributor provides its Contributions) on an "AS IS" BASIS,
      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
      implied, including, without limitation, any warranties or conditions
      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
      PARTICULAR PURPOSE. You are solely responsible for determining the
      appropriateness of using or redistributing the Work and assume any
      risks associated with Your exercise of permissions under this License.

   8. Limitation of Liability. In no event and under no legal theory,
      whether in tort (including negligence), contract, or otherwise,
      unless required by applicable law (such as deliberate and grossly
      negligent acts) or agreed to in writing, shall any Contributor be
      liable to You for damages, including any direct, indirect, special,
      incidental, or consequential damages of any character arising as a
      result of this License or out of the use or inability to use the
      Work (including but not limited to damages for loss of goodwill,
      work stoppage, computer failure or malfunction, or any and all
      other commercial damages or losses), even if such Contributor
      has been advised of the possibility of such damages.

   9. Accepting Warranty or Additional Liability. While redistributing
      the Work or Derivative Works thereof, You may choose to offer,
      and charge a fee for, acceptance of support, warranty, indemnity,
      or other liability obligations and/or rights consistent with this
      License. However, in accepting such obligations, You may act only
      on Your own behalf and on Your sole responsibility, not on behalf
      of any other Contributor, and only if You agree to indemnify,
      defend, and hold each Contributor harmless for any liability
      incurred by, or claims asserted against, such Contributor by reason
      of your accepting any such warranty or additional liability.

   END OF TERMS AND CONDITIONS

   APPENDIX: How to apply the Apache License to your work.

      To apply the Apache License to your work, attach the following
      boilerplate notice, with the fields enclosed by brackets "[]"
      replaced with your own identifying information. (Don't include
      the brackets!)  The text should be enclosed in the appropriate
      comment syntax for the file format. We also recommend that a
      file or class name and description of purpose be included on the
      same "printed page" as the copyright notice for easier
      identification within third-party archives.

   Copyright [yyyy] [name of copyright owner]

   Licensed under the Apache License, Version 2.0 (the "License");
   you may not use this file except in compliance with the License.
   You may obtain a copy of the License at

       http://www.apache.org/licenses/LICENSE-2.0

   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   See the License for the specific language governing permissions and
   limitations under the License.


================================================
FILE: MANIFEST.in
================================================
# -*- coding: utf-8 -*-
#
# Copyright 2023 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Generated by synthtool. DO NOT EDIT!
include README.rst LICENSE
recursive-include third_party *
recursive-include bigframes *.json *.proto py.typed
recursive-include tests *
global-exclude *.py[co]
global-exclude __pycache__

# Exclude scripts for samples readmegen
prune scripts/readme-gen


================================================
FILE: OWNERS
================================================
msubasioglu@google.com
steveswalker@google.com
kpatlolla@google.com
srilakshmil@google.com
mokshazna@google.com


================================================
FILE: README.md
================================================
<p align="center">
    <a href="utilities/imgs/aaie.png">
        <img src="utilities/imgs/aaie.png" alt="aaie image" width="auto" height="150">
    </a>
</p>
<p align="center">
    <a href="https://sites.google.com/corp/google.com/genai-solutions/home?authuser=0">
        <img src="utilities/imgs/opendataqna_logo.png" alt="logo" width="400" height="auto">
    </a>
</p>
<h1 align="center">Open Data QnA - Chat with your SQL Database</h1> 

_______________

<div align="center"">
  <strong>🚨 Version 2.0.0 is now live! Refer to the <a href="docs/changelog.md">Release Notes</a> for detailed information on updates and fixes. 🚨</strong>
</div>

_______________

✨ Overview
-------------
The **Open Data QnA** python library enables you to chat with your databases by leveraging LLM Agents on Google Cloud.

Open Data QnA enables a conversational approach to interacting with your data. Ask questions about your PostgreSQL or BigQuery databases in natural language and receive informative responses, without needing to write SQL. Open Data QnA leverages Large Language Models (LLMs) to bridge the gap between human language and database queries, streamlining data analysis and decision-making.

![Alt Text](utilities/imgs/Teaser.gif)

**Key Features:**

* **Conversational Querying with Multiturn Support:** Ask questions naturally, without requiring SQL knowledge and ask follow up questions.
* **Table Grouping:** Group tables under one usecase/user grouping name which can help filtering your large number tables for LLMs to understand about.
* **Multi Schema/Dataset Support:** You can group tables from different schemas/datasets for embedding and asking questions against.
* **Prompt Customization and Additional Context:** The prompts that are being used are loaded from a yaml file and it also give you ability to add extra context as well
* **SQL Generation:** Automatically generates SQL queries based on your questions.
* **Query Refinement:** Validates and debugs queries to ensure accuracy.
* **Natural Language Responses:** DRun queries and present results in clear, easy-to-understand language.
* **Visualizations (Optional):** Explore data visually with generated charts.
* **Extensible:** Customize and integrate with your existing workflows(API, UI, Notebooks).


It is built on a modular design and currently supports the following components: 

### Database Connectors
* **Google Cloud SQL for PostgreSQL**
* **Google BigQuery**
* **Google Firestore(for storing session logs)**

### Vector Stores 
* **PGVector on Google Cloud SQL for PostgreSQL**
* **BigQuery Vector Store**

### Agents 
* **BuildSQLAgent:** An agent specialized in generating SQL queries for BigQuery or PostgreSQL databases. It analyzes user questions, available table schemas, and column descriptions to construct syntactically and semantically correct SQL queries, adapting its process based on the target database type.
* **ValidateSQLAgent:** An agent that validates the syntax and semantic correctness of SQL queries. It uses a language model to analyze queries against a database schema and returns a JSON response indicating validity and potential errors.
* **DebugSQLAgent:** An agent designed to debug and refine SQL queries for BigQuery or PostgreSQL databases. It interacts with a chat-based language model to iteratively troubleshoot queries, using error messages to generate alternative, correct queries.
* **DescriptionAgent:** An agent specialized in generating descriptions for database tables and columns. It leverages a large language model to create concise and informative descriptions that aid in understanding data structures and facilitate SQL query generation.
* **EmbedderAgent:** An agent specialized in generating text embeddings using Large Language Models (LLMs). It supports direct interaction with Vertex AI's TextEmbeddingModel or uses LangChain's VertexAIEmbeddings for a simplified interface.
* **ResponseAgent:** An agent that generates natural language responses to user questions based on SQL query results. It acts as a data assistant, interpreting SQL results and transforming them into user-friendly answers using a language model.
* **VisualizeAgent:** An agent that generates JavaScript code for Google Charts based on user questions and SQL results. It suggests suitable chart types and constructs the JavaScript code to create visualizations of the data.

**Note:** the library was formerly named Talk2Data. You may still find artifacts with the old naming in this repository. 

📏 Architecture
-------------
<p align="center">
    <a href="utilities/imgs/OpenDataQnA_architecture.png">
        <img src="utilities/imgs/OpenDataQnA_architecture.png" alt="aaie image">
    </a>
</p>

A detailed description of the Architecture can be found [`here`](/docs/architecture.md) in the docs. 


🧬 Repository Structure 
-------------
Details on the Repository Structure can be found [`here`](/docs/repo_structure.md) in the docs. 


<!-- 🏁 Getting Started: Quick Start   
-------------
**NOTE: THIS IS STILL v1 - UPDATING THE STANDALONE NOTEBOOK TO v2 IS WIP**

**Quickstart with Open Data QnA: [Standalone BigQuery Notebook](/notebooks/(standalone)Run_OpenDataQnA.ipynb)**

Copy both [Standalone BigQuery Notebook](/notebooks/(standalone)Run_OpenDataQnA.ipynb) and [pyproject.toml](/pyproject.toml)

This notebook offers a streamlined way to experience the core functionality of Open Data QnA using BigQuery as both the data source and vector store. While it doesn't encompass the full flexibility of the repository setup, it's a perfect starting point to quickly test and explore the conversational querying capabilities of Open Data QnA with your own BigQuery datasets.  -->


🏁 Getting Started: Main Repository 
-------------

### Clone the repository and switch to the correct directory 
   
    git clone git@github.com:GoogleCloudPlatform/Open_Data_QnA.git
    cd Open_Data_QnA

### 🚧 **Prerequisites**

Make sure that Google Cloud CLI and Python >= 3.10 are installed before moving ahead! You can refer to the link below for guidance

Installation Guide: https://cloud.google.com/sdk/docs/install

Download Python: https://www.python.org/downloads/

ℹ️ **You can setup this solution with three approaches. Choose one based on your requirements:**
  - **A)** Using [Jupyter Notebooks](#a-jupyter-notebook-based-approach) (For better view at what is happening at each stage of the solution)
  - **B)** Using [CLI](#b-command-line-interface-cli-based-approach) (For ease of use and running with simple python commands, without the need to understand every step of the solution)
  - **C)** Using [terraform deployment](#c-using-terraform-to-deploy-the-solution) including your backend APIs with UI


### A) Jupyter Notebook Based Approach

#### 💻 **Install Code Dependencies (Create and setup venv)**

#### **All commands in this cell to be run on the terminal (typically Ctrl+Shift+`) where your notebooks are running**
Install the dependencies by running the poetry commands below 

```
# Install poetry
pip uninstall poetry -y
pip install poetry --quiet

#Run the poetry commands below to set up the environment
poetry lock #resolve dependecies (also auto create poetry venv if not exists)
poetry install --quiet #installs dependencies
poetry env info #Displays the evn just created and the path to it

poetry shell #this command should activate your venv and you should see it enters into the venv

##inside the activated venv shell []

#If you are running on Worbench instance where the service account used has required permissions to run this solution you can skip the below gcloud auth commands and get to next kernel creation section

gcloud auth login  # Use this or below command to authenticate

gcloud auth application-default login

gcloud services enable \
    serviceusage.googleapis.com \
    cloudresourcemanager.googleapis.com --project <<Enter Project Id>>

```

Chose the relevant instructions based on where you are running the notebook

**For IDEs like Cloud Shell Editor, VS Code**

For IDEs adding Juypter Extensions will automatically give you option to change the kernel. If not, manually select the python interpreter in your IDE (The exact is shown in the above cell. Path would look like e.g. /home/admin_/opendata/.venv/bin/python or ~cache/user/opendataqna/.venv/bin/python)

Proceed to the Step 1 below


**For Jupyter Lab or Jupyter Environments on Workbench etc**

Create Kernel for with the envrionment created

```
pip install jupyter

ipython kernel install --name "openqna-venv" --user 

```

Restart your kernel or close the exsiting notebook and open again, you should now see the "openqna-venv" in the kernel drop down

**What did we do here?**

* Created Application Default Credentials to use for the code
* Added venv to kernel to select for running the notebooks (For standalone Jupyter setups like Workbench etc)

#### 1. Run the [1_Setup_OpenDataQnA](/notebooks/1_Setup_OpenDataQnA.ipynb) (Run Once for Initial Setup) 

This notebook guides you through the setup and execution of the Open Data QnA application. It provides comprehensive instructions for setup the solution.


#### 2. Run the [2_Run_OpenDataQnA](/notebooks/2_Run_OpenDataQnA.ipynb)

This notebook guides you by reading the configuration you setup with [1_Setup_OpenDataQnA](/1_Setup_OpenDataQnA) and running the pipeline to answer questions about your data.

#### 3. [Loading Known Good SQL Examples](/notebooks/3_LoadKnownGoodSQL.ipynb)
   
   In case you want to separately load Known Good SQLs please run this notebook once the config variables are setup in config.ini file. This can be run multiple times just to load the known good sql queries and create embeddings for it.

___________

### B) Command Line Interface (CLI) Based Approach

#### 1. Add Configuration values for the solution in [config.ini](/config.ini)

For setup we require details for vector store, source database etc. Edit the [config.ini](/config.ini) file and add values for the parameters based of below information.

ℹ️ Follow the guidelines from the [config guide document](/docs/config_guide.md) to populate your [config.ini](/config.ini) file.

**Sources to connect**

- This solution lets you setup multiple data source at same time.
- You can group multiple tables from different datasets or schema into a grouping and provide the details
- If your dataset/schema has many tables and you want to run the solution against few you should specifically choose a group for that tables only

**Format for data_source_list.csv**

**source | user_grouping | schema | table**

**source** - Supported Data Sources. #Options: bigquery , cloudsql-pg

**user_grouping** - Logical grouping or use case name for tables from same or different schema/dataset. When left black it default to the schema value in the next column

**schema** - schema name for postgres or dataset name in bigquery 

**table** - name of the tables to run the solutions against. Leave this column blank after filling schema/dataset if you want to run solution for whole dataset/schema

Update the [data_source_list.csv](/scripts/data_source_list.csv) according for your requirement.

Note that the source details filled in the csv should have already be present. If not please use the Copy Notebooks if you want the demo source setup.

Enabled Data Sources:
* PostgreSQL on Google Cloud SQL (Copy Sample Data: [0_CopyDataToCloudSqlPG.ipynb](0_CopyDataToCloudSqlPG.ipynb))
* BigQuery (Copy Sample Data: [0_CopyDataToBigQuery.ipynb](0_CopyDataToBigQuery.ipynb))

#### 2. Creating Virtual Environment and Install Dependencies

```
pip install poetry --quiet
poetry lock
poetry install --quiet
poetry env info
poetry shell
```
Authenticate your credentials

```
gcloud auth login

or 

gcloud auth application-default login
```
```
gcloud services enable \
    serviceusage.googleapis.com \
    cloudresourcemanager.googleapis.com --project <<Enter Project Id>>
```
```
gcloud auth application-default set-quota-project <<Enter Project Id for using resources>>
```

Enable APIs for the solution setup

```
gcloud services enable \
  cloudapis.googleapis.com \
  compute.googleapis.com \
  iam.googleapis.com \
  run.googleapis.com \
  sqladmin.googleapis.com \
  aiplatform.googleapis.com \
  bigquery.googleapis.com \
  firestore.googleapis.com --project <<Enter Project Id>>

```

#### 3. Run [env_setup.py](/env_setup.py) to create vector store based on the configuration you did in Step 1

```
python env_setup.py
```

#### 4. Run [opendataqna.py](/opendataqna.py) to run the pipeline you just setup

The Open Data QnA SQL Generation tool can be conveniently used from your terminal or command prompt using a simple CLI interface. Here's how:

```
python opendataqna.py --session_id "122133131f--ade-eweq" --user_question "What is most 5 common genres we have?" --user_grouping "MovieExplorer-bigquery"
```

Where

*session_id* : Keep this unique unique same for follow up questions.

*user_question* : Enter your question in string

*user_grouping* : Enter the BQ_DATASET_NAME for BigQuery sources or PG_SCHEMA for PostgreSQL sources (refer your [data_source_list.csv](/scripts/data_source_list.csv) file)


**Optional Parameters**

You can customize the pipeline's behavior using optional parameters. Here are some common examples:
```
# Enable the SQL debugger:
python opendataqna.py --session_id="..." --user_question "..." --user_grouping "..." --run_debugger

# Execute the final generated SQL:
python opendataqna.py --session_id="..." --user_question "..." --user_grouping "..." --execute_final_sql

# Change the number of debugging rounds:
python opendataqna.py --session_id="..." --user_question "..." --user_grouping "..." --debugging_rounds 5

# Adjust similarity thresholds:
python opendataqna.py --session_id="..." --user_question "..." --user_grouping "..." --table_similarity_threshold 0.25 --column_similarity_threshold 0.4

```

You can find a full list of available options and their descriptions by running:

```
python opendataqna.py --help
```

### C) Using Terraform to deploy the solution

The provided terraform streamlines the setup of this solution and serves as a blueprint for deployment. The script provides a one-click, one-time deployment option. However, it doesn't include CI/CD capabilities and is intended solely for initial setup.

> [!NOTE]
> Current version of the Terraform Google Cloud provider does not support deployment of a few resources, this solution uses null_resource to create those resources using Google Cloud SDK.

Prior to executing terraform, ensure that the below mentioned steps have been completed.

#### Data Sources Set Up

1. Source data should already be available. If you do not have readily available source data, use the notebooks [0_CopyDataToBigQuery.ipynb](/notebooks/0_CopyDataToBigQuery.ipynb) or [0_CopyDataToCloudSqlPG.ipynb](/notebooks/0_CopyDataToCloudSqlPG.ipynb) based on the preferred source to populate sample data.
2. Ensure that the [data_source_list.csv](/scripts/data_source_list.csv) is populated with the list of datasources to be used in this solution. Terraform will take care of creating the embeddings in the destination. Use [data_source_list_sample.csv](/scripts/data_source_list_sample.csv) to fill the [data_source_list.csv](/scripts/data_source_list.csv)
3. If you want to use known good sqls for few shot prompting, ensure that the [known_good_sql.csv](/scripts/known_good_sql.csv) is populated with the required data. Terraform will take care of creating the embeddings in the destination.

#### Enable Firebase
Firebase will be used to host the frontend of the application.

1. Go to https://console.firebase.google.com/
1. Select add project and load your Google Cloud Platform project
1. Add Firebase to one of your existing Google Cloud projects
1. Confirm Firebase billing plan
1. Continue and complete


#### Terraform deployment
> [!NOTE]  
> Terraform apply command for this application uses gcloud config to fetch & pass the set project id to the scripts. Please ensure that gcloud config has been set to your intended project id before proceeding.

> [!IMPORTANT]  
> The Terraform scripts require specific IAM permissions to function correctly. The user needs either the broad `roles/resourcemanager.projectIamAdmin` role or a custom role with tailored permissions to manage IAM policies and roles.
> Additionally, one script TEMPORARILY disables Domain Restricted Sharing Org Policies to enable the creation of a public endpoint. This requires the user to also have the `roles/orgpolicy.policyAdmin` role.

1. Install [terraform 1.7 or higher](https://developer.hashicorp.com/terraform/install).
1. [OPTIONAL] Update default values of variables in [variables.tf](/terraform/variables.tf) according to your preferences. You can find the description for each variable inside the file. This file will be used by terraform to get information about the resources it needs to deploy. If you do not update these, terraform will use the already specified default values in the file.
1. Move to the terraform directory in the terminal

```
cd Open_Data_QnA/terraform

#If you are running this outside Cloud Shell you need to set up your Google Cloud SDK Credentials

gcloud config set project <your_project_id>
gcloud auth application-default set-quota-project <your_project_id>

gcloud services enable \
    serviceusage.googleapis.com \
    cloudresourcemanager.googleapis.com --project <<Enter Project Id>>

sh ./scripts/deploy-all.sh

```
This script will perform the following steps:
1. **Run terraform scripts** - These terraform scripts will generate all the GCP resources and configuration files required for the frontend & backend. It will also generate embeddings and store it in the destination vector db.
1. **Deploy cloud run backend service with latest backend code** - The terraform in the previous step uses a dummy container image to deploy the initial version of cloud run service. This is the step where the actual backend code gets deployed.
1. **Deploy frontend app** - All the config files, web app etc required to create the frontend are deployed via terraform. However, the actual UI deployment takes place in this step.

### After deployment
***Auth Provider***

You need to enable at least one authentication provider in Firebase, you can enable it using the following steps:
1. Go to https://console.firebase.google.com/project/your_project_id/authentication/providers (change the `your_project_id` value)
2. Click on Get Started (if needed)
3. Select Google and enable it
4. Set the name for the project and support email for project
5. Save

This should deploy you end to end solution in the project with firebase web url

For detailed steps and known issues refer to  README.md under [`/terraform`](/terraform/)


🖥️ Build a angular based frontend for this solution   
---------------------------------------------------
Deploy backend apis for the solution, refer to the README.md under [`/backend-apis`](/backend-apis/). This APIs are designed with work with the frontend and provide access to run the solution.

Once the backend APIs deployed successfully deploy the frontend for the solution, refer to the README.md under [`/frontend`](/frontend/).


📗 FAQs and Best Practices  
-------------
If you successfully set up the solution accelerator and want to start optimizing to your needs, you can follow the tips in the [`Best Practice doc`](/docs/best_practices.md).
Additionally, if you stumble across any problems, take a look into the [`FAQ`](/docs/faq.md).

If neither of these resources helps, feel free to reach out to us directly by raising an Issue. 

🧹 CleanUp Resources 
-------------
To clean up the resources provisioned in this solution, use commands below to remove them using gcloud/bq: 

For cloudsql-pgvector as vector store : [Delete SQL Instance](<https://cloud.google.com/sql/docs/mysql/delete-instance#delete-cloud-sql-instance>)

```
gcloud sql instances delete <CloudSQL Instance Name> -q
```

Delete BigQuery Dataset Created for Logs and Vector Store : [Remove BQ Dataset](<https://cloud.google.com/bigquery/docs/reference/bq-cli-reference#bq_rm>)

```
bq rm -r -f -d <BigQuery Dataset Name for OpenDataQnA>
```

(For Backend APIs)Remove the Cloud Run service : [Delete Service](<https://cloud.google.com/run/docs/managing/services#delete>)

```
gcloud run services delete <Cloud Run Service Name>
```

For frontend, based on firebase: [Remove the firebase app](<https://support.google.com/firebase/answer/7047853?sjid=6757651181596811904-AP#how-to-remove>)

📄 Documentation
-------------

* [Open Data QnA Source Code (GitHub)](<https://github.com/GoogleCloudPlatform/Open_Data_QnA>)
* [Open Data QnA usage notebooks](/notebooks)
* [`Architecture`](/docs/architecture.md)
* [`FAQ`](/docs/faq.md)
* [`Best Practice doc`](/docs/best_practices.md)



🚧 Quotas and limits
------------------

[BigQuery quotas](<https://cloud.google.com/bigquery/quotas>) including hardware, software, and network components.

[Gemini quotas](<https://cloud.google.com/gemini/docs/quotas>).


🪪 License
-------

Open Data QnA is distributed with the [Apache-2.0 license](<LICENSE>).

It also contains code derived from the following third-party packages:

* [pandas](<https://pandas.pydata.org/>)
* [Python](<https://www.python.org/>)
  

🧪 Disclaimer
----------

This repository provides an open-source solution accelerator designed to streamline your development process. Please be aware that all resources associated with this accelerator will be deployed within your own Google Cloud Platform (GCP) instances.

It is imperative that you thoroughly test all components and configurations in a non-production environment before integrating any part of this accelerator with your production data or systems.

While we strive to provide a secure and reliable solution, we cannot be held responsible for any data loss, service disruptions, or other issues that may arise from the use of this accelerator.

By utilizing this repository, you acknowledge that you are solely responsible for the deployment, management, and security of the resources deployed within your GCP environment.

If you encounter any issues or have concerns about potential risks, please refrain from using this accelerator in a production setting.

We encourage responsible and informed use of this open-source solution.


🙋 Getting Help
----------

If you have any questions or if you found any problems with this repository, please report through GitHub issues.


================================================
FILE: SECURITY.md
================================================
# Security Policy

To report a security issue, please use [g.co/vulnz](https://g.co/vulnz).

The Google Security Team will respond within 5 working days of your report on g.co/vulnz.

We use g.co/vulnz for our intake, and do coordination and disclosure here using GitHub Security Advisory to privately discuss and fix the issue.


================================================
FILE: agents/BuildSQLAgent.py
================================================
from abc import ABC
from vertexai.language_models import CodeChatModel
from vertexai.generative_models import GenerativeModel, Content, Part, GenerationConfig
from .core import Agent 
import pandas as pd
import json
from datetime import datetime
from dbconnectors import pgconnector,bqconnector,firestoreconnector
from utilities import PROMPTS, format_prompt
from google.cloud.aiplatform import telemetry
import vertexai 
from utilities import PROJECT_ID, PG_REGION
from vertexai.generative_models import GenerationConfig

vertexai.init(project=PROJECT_ID, location=PG_REGION)


class BuildSQLAgent(Agent, ABC):

    agentType: str = "BuildSQLAgent"

    def __init__(self, model_id = 'gemini-1.5-pro'): 
        super().__init__(model_id=model_id)


    def build_sql(self,source_type,user_grouping, user_question,session_history,tables_schema,columns_schema, similar_sql, max_output_tokens=2048, temperature=0.4, top_p=1, top_k=32):
        not_related_msg=f'''select 'Question is not related to the dataset' as unrelated_answer;'''
        
        if source_type=='bigquery':

            from dbconnectors import bq_specific_data_types
            specific_data_types = bq_specific_data_types() 
        
        else:
           
            from dbconnectors import pg_specific_data_types
            specific_data_types = pg_specific_data_types()

        if f'usecase_{source_type}_{user_grouping}' in PROMPTS:
            usecase_context = PROMPTS[f'usecase_{source_type}_{user_grouping}']
        else:
            usecase_context = "No extra context for the usecase is provided"
            
        context_prompt = PROMPTS[f'buildsql_{source_type}']


        context_prompt = format_prompt(context_prompt,
                                       specific_data_types = specific_data_types,
                                       not_related_msg = not_related_msg, 
                                       usecase_context = usecase_context,
                                       similar_sql=similar_sql, 
                                       tables_schema=tables_schema, 
                                       columns_schema = columns_schema)

        # print(f"Prompt to Build SQL: \n{context_prompt}") 

            
        # Chat history Retrieval

        chat_history=[]
        for entry in session_history:
            
            timestamp = entry["timestamp"]
            timestamp_str = timestamp.isoformat(timespec='auto')

            user_message = Content(
                parts=[Part.from_text(entry["user_question"])],  
                role="user"
            )

            bot_message = Content(
                parts=[Part.from_text(entry["bot_response"])],
                role="assistant"
            )
            chat_history.extend([user_message, bot_message])  # Add both to the history
        

        # print("Chat History Retrieved")

        if self.model_id == 'codechat-bison-32k':
            with telemetry.tool_context_manager('opendataqna-buildsql-v2'):
            
                chat_session = self.model.start_chat(context=context_prompt)
        elif 'gemini' in self.model_id:
            with telemetry.tool_context_manager('opendataqna-buildsql-v2'):

                # print("SQL Builder Agent : " + str(self.model_id))
                config = GenerationConfig(
                    max_output_tokens=max_output_tokens, temperature=temperature, top_p=top_p, top_k=top_k
                )
                chat_session = self.model.start_chat(history=chat_history,response_validation=False)
                chat_session.send_message(context_prompt)
        else:
            raise ValueError('Invalid Model Specified')
        

        if session_history is None or not session_history:
            concated_questions = None
            re_written_qe = None
            previous_question = None
            previous_sql = None

        else:
            concated_questions,re_written_qe=self.rewrite_question(user_question,session_history)
            previous_question, previous_sql = self.get_last_sql(session_history)


        build_context_prompt=f"""

        Below is the previous user question from this conversation and its generated sql. 

        Previous Question:  {previous_question} 

        Previous Generated SQL : {previous_sql}

        Respond with 

        Generate SQL for User Question : {user_question}


        """

        # print("BUILD CONTEXT ::: "+str(build_context_prompt))


        with telemetry.tool_context_manager('opendataqna-buildsql-v2'):

            response = chat_session.send_message(build_context_prompt, stream=False)
            generated_sql = (str(response.text)).replace("```sql", "").replace("```", "")

        generated_sql = (str(response.text)).replace("```sql", "").replace("```", "")
        # print(generated_sql)
        return generated_sql

    def rewrite_question(self,question,session_history):
        formatted_history=''
        concat_questions=''
        for i, _row in enumerate(session_history,start=1):
            user_question = _row['user_question']
            sql_query = _row['bot_response']
            # print(user_question)
            formatted_history += f"User Question - Turn :: {i} : {user_question}\n"
            formatted_history += f"Generated SQL - Turn :: {i}: {sql_query}\n\n"
            concat_questions += f"{user_question} "

        # print(formatted_history)


        context_prompt = f"""
            Your main objective is to rewrite and refine the question passed based on the session history of question and sql generated.

            Refine the given question using the provided session history to produce a queryable statement. The refined question should be self-contained, requiring no additional context for accurate SQL generation.

            Make sure all the information is included in the re-written question

            Below is the previous session history:

            {formatted_history}

            Question to rewrite:

            {question}
        """
        re_written_qe = str(self.generate_llm_response(context_prompt))
        

        print("*"*25 +"Re-written question for the follow up:: "+"*"*25+"\n"+str(re_written_qe))

        return str(concat_questions),str(re_written_qe)

    def get_last_sql(self,session_history):

        for entry in reversed(session_history): 
            if entry.get("bot_response"):  
                return entry["user_question"],entry["bot_response"]  

        return None


================================================
FILE: agents/DebugSQLAgent.py
================================================
from abc import ABC

import vertexai
from vertexai.language_models import CodeChatModel
from vertexai.generative_models import GenerativeModel,GenerationConfig
from google.cloud.aiplatform import telemetry
from dbconnectors import pgconnector, bqconnector
from utilities import PROMPTS, format_prompt
from .core import Agent
import pandas as pd
import json  

from utilities import PROJECT_ID, PG_REGION
vertexai.init(project=PROJECT_ID, location=PG_REGION)


class DebugSQLAgent(Agent, ABC):
    """
    An agent designed to debug and refine SQL queries for BigQuery or PostgreSQL databases.

    This agent interacts with a chat-based language model (CodeChat or Gemini) to iteratively troubleshoot SQL queries. It receives feedback in the form of error messages and uses the model's capabilities to generate alternative queries that address the identified issues. The agent strives to maintain the original intent of the query while ensuring its syntactic and semantic correctness.

    Attributes:
        agentType (str): Indicates the type of agent, fixed as "DebugSQLAgent".
        model_id (str): The ID of the chat model to use for debugging. Valid options are:
            - "codechat-bison-32k"
            - "gemini-1.0-pro" 
            - "gemini-ultra"

    Methods:
        init_chat(source_type, tables_schema, columns_schema, sql_example) -> ChatSession:
            Initializes a chat session with the chosen chat model.

            Args:
                source_type (str): The database type ("bigquery" or "postgresql").
                tables_schema (str): A description of the available tables and their columns.
                columns_schema (str): Detailed descriptions of the columns in the tables.
                sql_example (str, optional): An example SQL query for reference. Defaults to "-No examples provided..-".

            Returns:
                ChatSession: The initiated chat session object.

        rewrite_sql_chat(chat_session, question, error_df) -> str:
            Generates an alternative SQL query based on the chat session, original query, and error message.

            Args:
                chat_session (ChatSession): The active chat session.
                question (str): The original SQL query.
                error_df (pandas.DataFrame): The error message as a DataFrame.

            Returns:
                str: The rewritten SQL query.

        start_debugger(source_type, query, user_question, SQLChecker, tables_schema, columns_schema, AUDIT_TEXT, similar_sql, DEBUGGING_ROUNDS, LLM_VALIDATION) -> Tuple[str, bool, str]:

            Args:
                source_type (str): The database type ("bigquery" or "postgresql").
                query (str): The initial SQL query to debug.
                user_question (str): The user's original question for reference.
                SQLChecker: An object to validate the SQL syntax.
                tables_schema (str): Table schema information.
                columns_schema (str): Detailed column descriptions.
                AUDIT_TEXT (str): Textual audit trail of the debugging process.
                similar_sql (str, optional): Example SQL queries. Defaults to "-No examples provided..-".
                DEBUGGING_ROUNDS (int, optional): Maximum debugging attempts. Defaults to 2.
                LLM_VALIDATION (bool, optional): Whether to use LLM for syntax validation. Defaults to True.

            Returns:
                Tuple[str, bool, str]:
                    - The final refined SQL query (or the original if unchanged).
                    - A boolean indicating if the final query is considered invalid.
                    - The updated AUDIT_TEXT with debugging steps.
    """


    agentType: str = "DebugSQLAgent"

    def __init__(self, model_id = 'gemini-1.5-pro'):
        super().__init__(model_id=model_id) 


    def init_chat(self,source_type,user_grouping, tables_schema,columns_schema,similar_sql="-No examples provided..-"):

        if f'usecase_{source_type}_{user_grouping}' in PROMPTS:
            usecase_context = PROMPTS[f'usecase_{source_type}_{user_grouping}']
        else:
            usecase_context = "No extra context for the usecase is provided"
            
        context_prompt = PROMPTS[f'debugsql_{source_type}']

        context_prompt = format_prompt(context_prompt,
                                       usecase_context = usecase_context,
                                       similar_sql=similar_sql, 
                                       tables_schema=tables_schema, 
                                       columns_schema = columns_schema)

        # print(f"Prompt to Debug SQL after formatting: \n{context_prompt}")
        
        if self.model_id == 'codechat-bison-32k':
            with telemetry.tool_context_manager('opendataqna-debugsql-v2'):

                chat_session = self.model.start_chat(context=context_prompt)
        elif 'gemini' in self.model_id:
            with telemetry.tool_context_manager('opendataqna-debugsql-v2'):

                chat_session = self.model.start_chat(response_validation=False)
                chat_session.send_message(context_prompt)
        else:
            raise ValueError('Invalid Chat Model Specified')
        
        return chat_session


    def rewrite_sql_chat(self, chat_session, sql, question, error_df):


        context_prompt = f"""
            What is an alternative SQL statement to address the error mentioned below?
            Present a different SQL from previous ones. It is important that the query still answer the original question.
            All columns selected must be present on tables mentioned on the join section.
            Avoid repeating suggestions.

            <Original SQL>
            {sql}
            </Original SQL>

            <Original Question>
            {question}
            </Original Question>

            <Error Message>
            {error_df}
            </Error Message>

            """

        if self.model_id =='codechat-bison-32k':
            with telemetry.tool_context_manager('opendataqna-debugsql-v2'):
                response = chat_session.send_message(context_prompt)
                resp_return = (str(response.candidates[0])).replace("```sql", "").replace("```", "")
        elif 'gemini' in self.model_id:
            with telemetry.tool_context_manager('opendataqna-debugsql-v2'):
                response = chat_session.send_message(context_prompt, stream=False)
                resp_return = (str(response.text)).replace("```sql", "").replace("```", "")
        else:
            raise ValueError('Invalid Model Id')

        return resp_return


    def start_debugger  (self,
                        source_type,
                        user_grouping,
                        query,
                        user_question, 
                        SQLChecker,
                        tables_schema, 
                        columns_schema,
                        AUDIT_TEXT, 
                        similar_sql="-No examples provided..-", 
                        DEBUGGING_ROUNDS = 2,
                        LLM_VALIDATION=False):
        i = 0  
        STOP = False 
        invalid_response = False 
        chat_session = self.init_chat(source_type,user_grouping,tables_schema,columns_schema,similar_sql)
        sql = query.replace("```sql","").replace("```","").replace("EXPLAIN ANALYZE ","")

        AUDIT_TEXT=AUDIT_TEXT+"\n\nEntering the debugging steps!"
        while (not STOP):

            json_syntax_result={ "valid":True, "errors":"None"}
            # Check if LLM Validation is enabled 
            if LLM_VALIDATION: 
                # sql = query.replace("```sql","").replace("```","").replace("EXPLAIN ANALYZE ","")
                json_syntax_result = SQLChecker.check(source_type,user_question,tables_schema,columns_schema, sql) 

            else: 
                json_syntax_result['valid'] = True
                AUDIT_TEXT=AUDIT_TEXT+"\nLLM Validation is deactivated. Jumping directly to dry run execution."
 

            if json_syntax_result['valid'] is True:
                AUDIT_TEXT=AUDIT_TEXT+"\nGenerated SQL is syntactically correct as per LLM Validation!"
                   
                # print(AUDIT_TEXT)
                if source_type=='bigquery':
                    connector=bqconnector
                else:
                    connector=pgconnector
                    
                correct_sql, exec_result_df = connector.test_sql_plan_execution(sql)
                
                if not correct_sql:
                        AUDIT_TEXT=AUDIT_TEXT+"\nGenerated SQL failed on execution! Here is the feedback from bigquery dryrun/ explain plan:  \n" + str(exec_result_df)
                        rewrite_result = self.rewrite_sql_chat(chat_session, sql, user_question, exec_result_df)
                        print('\n Rewritten and Cleaned SQL: ' + str(rewrite_result))
                        AUDIT_TEXT=AUDIT_TEXT+"\nRewritten and Cleaned SQL: \n' + str({rewrite_result})"
                        sql = str(rewrite_result).replace("```sql","").replace("```","").replace("EXPLAIN ANALYZE ","")

                else: STOP = True
            else:
                print(f'\nGenerated qeury failed on syntax check as per LLM Validation!\nError Message from LLM:  {json_syntax_result} \nRewriting the query...')
                AUDIT_TEXT=AUDIT_TEXT+'\nGenerated qeury failed on syntax check as per LLM Validation! \nError Message from LLM:  '+ str(json_syntax_result) + '\nRewriting the query...'
                
                syntax_err_df = pd.read_json(json.dumps(json_syntax_result))
                rewrite_result=self.rewrite_sql_chat(chat_session, sql, user_question, syntax_err_df)
                print(rewrite_result)
                AUDIT_TEXT=AUDIT_TEXT+'\n Rewritten SQL: ' + str(rewrite_result)
                sql=str(rewrite_result).replace("```sql","").replace("```","").replace("EXPLAIN ANALYZE ","")
            i+=1
            if i > DEBUGGING_ROUNDS:
                AUDIT_TEXT=AUDIT_TEXT+ "Exceeded the number of iterations for correction!"
                AUDIT_TEXT=AUDIT_TEXT+ "The generated SQL can be invalid!"
                STOP = True
                invalid_response=True
            # After the while is completed
        if i > DEBUGGING_ROUNDS:
            invalid_response=True
        return sql, invalid_response, AUDIT_TEXT

================================================
FILE: agents/DescriptionAgent.py
================================================
from abc import ABC
from .core import Agent 


class DescriptionAgent(Agent, ABC):
    """
    An agent specialized in generating descriptions for database tables and columns.

    This agent leverages a large language model to create concise and informative descriptions that aid in understanding the structure and content of database elements. The generated descriptions can be valuable for documenting schemas, enhancing data exploration, and facilitating SQL query generation.

    Attributes:
        agentType (str): Indicates the type of agent, fixed as "DescriptionAgent".

    Methods:
        generate_llm_response(prompt) -> str:
            Generates a response from the underlying language model based on the given prompt.

            Args:
                prompt (str): The prompt to feed into the language model.

            Returns:
                str: The generated text response, cleaned of any SQL-related formatting artifacts.

        generate_missing_descriptions(source, table_desc_df, column_name_df) -> Tuple[pd.DataFrame, pd.DataFrame]:
            Generates missing table and column descriptions using the language model.

            Args:
                source (str): The source of the database schema ("bigquery").  
                table_desc_df (pd.DataFrame): A DataFrame containing table metadata with potential missing descriptions.
                column_name_df (pd.DataFrame): A DataFrame containing column metadata with potential missing descriptions.

            Returns:
                Tuple[pd.DataFrame, pd.DataFrame]: 
                    - The updated `table_desc_df` with generated table descriptions.
                    - The updated `column_name_df` with generated column descriptions.
    """


    agentType: str = "DescriptionAgent"

    def generate_llm_response(self,prompt):
        context_query = self.model.generate_content(prompt,safety_settings=self.safety_settings,stream=False)
        return str(context_query.candidates[0].text).replace("```sql", "").replace("```", "")


    def generate_missing_descriptions(self,source,table_desc_df, column_name_df):
        llm_generated=0
        print("\n\n")
        for index, row in table_desc_df.iterrows():
            if row['table_description'] is None or row['table_description']=='NA':
                q=f"table_name == '{row['table_name']}' and table_schema == '{row['table_schema']}'"
                if source=='bigquery':
                    context_prompt = f"""
                        Generate short and crisp description for the table {row['project_id']}.{row['table_schema']}.{row['table_name']}
                        Remember that this desciprtion should help LLMs to help build better SQL for any quries related to this table.
                        Parameters:
                        - column metadata: {column_name_df.query(q).to_markdown(index = False)}
                        - table metadata: {table_desc_df.query(q).to_markdown(index = False)}
                        
                        DO NOT generate description that is more than two lines
                    """
                else:
                     context_prompt = f"""
                        Generate short and crisp description for the table {row['table_schema']}.{row['table_name']}
                        Remember that this desciprtions should help LLMs to help build better SQL for any quries related to this table.
                        Parameters:
                        - column metadata: {column_name_df.query(q).to_markdown(index = False)}
                        - table metadata: {table_desc_df.query(q).to_markdown(index = False)}
                        DO NOT generate description that is more than two lines
                    """

                table_desc_df.at[index,'table_description']=self.generate_llm_response(context_prompt)
                print(f"Generated table description for {row['table_schema']}.{row['table_name']}")
                llm_generated=llm_generated+1
        print("LLM generated "+ str(llm_generated) + " Table Descriptions")
        llm_generated = 0
        print("\n\n")
        for index, row in column_name_df.iterrows():
            # print(row['column_description'])
            if row['column_description'] is None or row['column_description']=='':
                q=f"table_name == '{row['table_name']}' and table_schema == '{row['table_schema']}'"
                if source=='bigquery':
                    context_prompt = f"""
                    Generate short and crisp description for the column {row['project_id']}.{row['table_schema']}.{row['table_name']}.{row['column_name']}
                    Remember that this description should help LLMs to help generate better SQL for any queries related to these columns.

                    Consider the below information while generating the description
                        Name of the column : {row['column_name']}
                        Data type of the column is : {row['data_type']}
                        Details of the table of this column are below:
                        {table_desc_df.query(q).to_markdown(index=False)}
                        Column Contrainst of this column are : {row['column_constraints']}

                    DO NOT generate description that is more than two lines
                """

                else:
                    context_prompt = f"""
                    Generate short and crisp description for the column {row['table_schema']}.{row['table_name']}.{row['column_name']}
                    Remember that this description should help LLMs to help generate better SQL for any queries related to these columns.

                    Consider the below information while generating the description

                        Name of the column : {row['column_name']}
                        Data type of the column is : {row['data_type']}
                        Details of the table of this column are below:
                        {table_desc_df.query(q).to_markdown(index=False)}
                        Column Contrainst of this column are : {row['column_constraints']}

                    DO NOT generate description that is more than two lines
                """
                column_name_df.at[index,'column_description']=self.generate_llm_response(prompt=context_prompt)
                print(f"Generated column description for {row['table_schema']}.{row['table_name']}.{row['column_name']}")
                llm_generated=llm_generated+1
                
        print("LLM generated "+ str(llm_generated) + " Column Descriptions")
        return table_desc_df,column_name_df

================================================
FILE: agents/EmbedderAgent.py
================================================
from abc import ABC
from .core import Agent 
from vertexai.language_models import TextEmbeddingModel



class EmbedderAgent(Agent, ABC):
    """
    An agent specialized in generating text embeddings using Large Language Models (LLMs).

    This agent supports two modes for generating embeddings:

    1. "vertex": Directly interacts with the Vertex AI TextEmbeddingModel.
    2. "lang-vertex": Uses LangChain's VertexAIEmbeddings for a streamlined interface.

    Attributes:
        agentType (str): Indicates the type of agent, fixed as "EmbedderAgent".
        mode (str): The embedding generation mode ("vertex" or "lang-vertex").
        model: The underlying embedding model (Vertex AI TextEmbeddingModel or LangChain's VertexAIEmbeddings).

    Methods:
        create(question) -> list:
            Generates text embeddings for the given question(s).

            Args:
                question (str or list): The text input for which embeddings are to be generated. Can be a single string or a list of strings.

            Returns:
                list: A list of embedding vectors. Each embedding vector is represented as a list of floating-point numbers.

            Raises:
                ValueError: If the input `question` is not a string or list, or if the specified `mode` is invalid.
    """


    agentType: str = "EmbedderAgent"

    def __init__(self, mode, embeddings_model='text-embedding-004'): 
        if mode == 'vertex': 
            self.mode = mode 
            self.model = TextEmbeddingModel.from_pretrained(embeddings_model)

        elif mode == 'lang-vertex': 
            self.mode = mode 
            from langchain.embeddings import VertexAIEmbeddings
            self.model = VertexAIEmbeddings() 

        else: raise ValueError('EmbedderAgent mode must be either vertex or lang-vertex')



    def create(self, question): 
        """Text embedding with a Large Language Model."""

        if self.mode == 'vertex': 
            if isinstance(question, str): 
                embeddings = self.model.get_embeddings([question])
                for embedding in embeddings:
                    vector = embedding.values
                return vector
            
            elif isinstance(question, list):  
                vector = list() 
                for q in question: 
                    embeddings = self.model.get_embeddings([q])

                    for embedding in embeddings:
                        vector.append(embedding.values) 
                return vector
            
            else: raise ValueError('Input must be either str or list')

        elif self.mode == 'lang-vertex': 
            vector = self.embeddings_service.embed_documents(question)
            return vector           

================================================
FILE: agents/ResponseAgent.py
================================================
import json 
from abc import ABC
from .core import Agent
from utilities import PROMPTS, format_prompt 
from vertexai.generative_models import HarmCategory, HarmBlockThreshold
from google.cloud.aiplatform import telemetry
import vertexai 
from utilities import PROJECT_ID, PG_REGION
vertexai.init(project=PROJECT_ID, location=PG_REGION)


class ResponseAgent(Agent, ABC):
    """
    An agent that generates natural language responses to user questions based on SQL query results.

    This agent acts as a data assistant, interpreting SQL query results and transforming them into user-friendly, natural language answers. It utilizes a language model (currently Gemini) to craft responses that effectively convey the information derived from the data.

    Attributes:
        agentType (str): Indicates the type of agent, fixed as "ResponseAgent".

    Methods:
        run(user_question, sql_result) -> str:
            Generates a natural language response to the user's question based on the SQL query result.

            Args:
                user_question (str): The question asked by the user in natural language.
                sql_result (str): The result of the SQL query executed to answer the question.

            Returns:
                str: The generated natural language response.
    """


    agentType: str = "ResponseAgent"

    def run(self, user_question, sql_result):

        context_prompt = PROMPTS['nl_reponse']



        context_prompt = format_prompt(context_prompt,
                                       user_question = user_question,
                                       sql_result = sql_result)
                                       
        # print(f"Prompt for Natural Language Response: \n{context_prompt}")


        if 'gemini' in self.model_id:
            with telemetry.tool_context_manager('opendataqna-response-v2'):
                context_query = self.model.generate_content(context_prompt,safety_settings=self.safety_settings, stream=False)
                generated_sql = str(context_query.candidates[0].text)

        else:
            with telemetry.tool_context_manager('opendataqna-response-v2'):
                context_query = self.model.predict(context_prompt, max_output_tokens = 8000, temperature=0)
                generated_sql = str(context_query.candidates[0])
        
        return generated_sql

    

================================================
FILE: agents/ValidateSQLAgent.py
================================================
import json 
from abc import ABC
from .core import Agent
from utilities import PROMPTS, format_prompt 



class ValidateSQLAgent(Agent, ABC):
    """
    An agent that validates the syntax and semantic correctness of SQL queries.

    This agent leverages a language model (currently Gemini) to analyze a given SQL query against a provided database schema. It assesses whether the query is valid according to a set of predefined guidelines and generates a JSON response indicating the validity status and any potential errors.

    Attributes:
        agentType (str): Indicates the type of agent, fixed as "ValidateSQLAgent".

    Methods:
        check(user_question, tables_schema, columns_schema, generated_sql) -> dict:
            Determines the validity of an SQL query and identifies potential errors.

            Args:
                user_question (str): The original question posed by the user (used for context).
                tables_schema (str): A description of the database tables and their relationships.
                columns_schema (str): Detailed descriptions of the columns within the tables.
                generated_sql (str): The SQL query to be validated.

            Returns:
                dict: A JSON-formatted dictionary with the following keys:
                    - "valid": A boolean value indicating whether the query is valid or not.
                    - "errors": A string describing any errors found in the query (empty if valid).
    """


    agentType: str = "ValidateSQLAgent"

    def check(self,source_type, user_question, tables_schema, columns_schema, generated_sql):
        
        context_prompt = PROMPTS['validatesql']
        context_prompt = format_prompt(context_prompt,
                                       source_type = source_type,
                                       user_question = user_question,
                                       tables_schema = tables_schema, 
                                       columns_schema = columns_schema,
                                       generated_sql=generated_sql)

        # print(f"Prompt to Validate SQL after formatting: \n{context_prompt}")
        
        if "gemini" in self.model_id:
            context_query = self.model.generate_content(context_prompt, stream=False)
            generated_sql = str(context_query.candidates[0].text)

        else:
            context_query = self.model.predict(context_prompt, max_output_tokens = 8000, temperature=0)
            generated_sql = str(context_query.candidates[0])


        json_syntax_result = json.loads(str(generated_sql).replace("```json","").replace("```",""))

        # print('\n SQL Syntax Validity:' + str(json_syntax_result['valid']))
        # print('\n SQL Syntax Error Description:' +str(json_syntax_result['errors']) + '\n')
        
        return json_syntax_result

================================================
FILE: agents/VisualizeAgent.py
================================================
#This agent generates google charts code for displaying charts on web application

#Generates two charts with elements "chart-div" and "chart-div-1"

#Code is in javascript

from abc import ABC
from vertexai.language_models import CodeChatModel
from vertexai.generative_models import GenerativeModel,HarmCategory,HarmBlockThreshold
from .core import Agent
from utilities import PROMPTS, format_prompt 
from agents import ValidateSQLAgent 
import pandas as pd
import json  
from google.cloud.aiplatform import telemetry
import vertexai 
from utilities import PROJECT_ID, PG_REGION
vertexai.init(project=PROJECT_ID, location=PG_REGION)

class VisualizeAgent(Agent, ABC):
    """
    An agent that generates JavaScript code for Google Charts based on user questions and SQL results.

    This agent analyzes the user's question and the corresponding SQL query results to determine suitable chart types. It then constructs JavaScript code that uses Google Charts to create visualizations based on the data.

    Attributes:
        agentType (str): Indicates the type of agent, fixed as "VisualizeAgent".
        model_id (str): The ID of the language model used for chart type suggestion and code generation.
        model: The language model instance.

    Methods:
        getChartType(user_question, generated_sql) -> str:
            Suggests the two most suitable chart types based on the user's question and the generated SQL query.

            Args:
                user_question (str): The natural language question asked by the user.
                generated_sql (str): The SQL query generated to answer the question.

            Returns:
                str: A JSON string containing two keys, "chart_1" and "chart_2", each representing a suggested chart type.

        getChartPrompt(user_question, generated_sql, chart_type, chart_div, sql_results) -> str:
            Creates a prompt for the language model to generate the JavaScript code for a specific chart.

            Args:
                user_question (str): The user's question.
                generated_sql (str): The generated SQL query.
                chart_type (str): The desired chart type (e.g., "Bar Chart", "Pie Chart").
                chart_div (str): The HTML element ID where the chart will be rendered.
                sql_results (str): The results of the SQL query in JSON format.

            Returns:
                str: The prompt for the language model to generate the chart code.

        generate_charts(user_question, generated_sql, sql_results) -> dict:
            Generates JavaScript code for two Google Charts based on the given inputs.

            Args:
                user_question (str): The user's question.
                generated_sql (str): The generated SQL query.
                sql_results (str): The results of the SQL query in JSON format.

            Returns:
                dict: A dictionary containing two keys, "chart_div" and "chart_div_1", each holding the generated JavaScript code for a chart.
    """


    agentType: str ="VisualizeAgent"

    def __init__(self):
        self.model_id = 'gemini-1.5-pro'
        self.model = GenerativeModel("gemini-1.5-pro-001")

    def getChartType(self,user_question, generated_sql):

        chart_type_prompt = PROMPTS['visualize_chart_type']

        chart_type_prompt = format_prompt(chart_type_prompt,
                                          user_question = user_question,
                                          generated_sql = generated_sql)

        chart_type=self.model.generate_content(chart_type_prompt, stream=False).candidates[0].text
        # print(chart_type)
        # chart_type = model.predict(map_prompt, max_output_tokens = 1024, temperature= 0.2).candidates[0].text
        return chart_type.replace("\n", "").replace("```", "").replace("json", "").replace("```html", "").replace("```", "").replace("js\n","").replace("json\n","").replace("python\n","").replace("javascript","")

    def getChartPrompt(self,user_question, generated_sql, chart_type, chart_div, sql_results):
        chart_prompt = PROMPTS['visualize_generate_chart_code']

        chart_prompt = format_prompt(chart_prompt,
                                     user_question = user_question,
                                     generated_sql = generated_sql,
                                     chart_type = chart_type,
                                     chart_div = chart_div,
                                     sql_results = sql_results)
        # print(f"Prompt to generate code for google charts visualization after formatting: \n{chart_prompt}")
        return chart_prompt

    def generate_charts(self,user_question,generated_sql,sql_results):
        chart_type = self.getChartType(user_question,generated_sql)
        # chart_type = chart_type.split(",")
        # chart_list = [x.strip() for x in chart_type]
        chart_json = json.loads(chart_type)
        chart_list =[chart_json['chart_1'],chart_json['chart_2']]
        print("Charts Suggested : " + str(chart_list))
        context_prompt=self.getChartPrompt(user_question,generated_sql,chart_list[0],"chart_div",sql_results)
        context_prompt_1=self.getChartPrompt(user_question,generated_sql,chart_list[1],"chart_div_1",sql_results)
        safety_settings: Optional[dict] = {
                HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_NONE,
                HarmCategory.HARM_CATEGORY_HATE_SPEECH: HarmBlockThreshold.BLOCK_NONE,
                HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: HarmBlockThreshold.BLOCK_NONE,
                HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_NONE,
            }
        with telemetry.tool_context_manager('opendataqna-visualize-v2'):
            context_query = self.model.generate_content(context_prompt,safety_settings=safety_settings, stream=False)
            context_query_1 = self.model.generate_content(context_prompt_1,safety_settings=safety_settings, stream=False)
        
        google_chart_js={"chart_div":context_query.candidates[0].text.replace("```json", "").replace("```", "").replace("json", "").replace("```html", "").replace("```", "").replace("js","").replace("json","").replace("python","").replace("javascript",""),
                        "chart_div_1":context_query_1.candidates[0].text.replace("```json", "").replace("```", "").replace("json", "").replace("```html", "").replace("```", "").replace("js","").replace("json","").replace("python","").replace("javascript","")}

        return google_chart_js

================================================
FILE: agents/__init__.py
================================================
from .BuildSQLAgent import BuildSQLAgent
from .ValidateSQLAgent import ValidateSQLAgent
from .DebugSQLAgent import DebugSQLAgent
from .EmbedderAgent import EmbedderAgent
from .ResponseAgent import ResponseAgent
from .VisualizeAgent import VisualizeAgent
from .DescriptionAgent import DescriptionAgent



__all__ = ["BuildSQLAgent", "ValidateSQLAgent", "DebugSQLAgent", "EmbedderAgent", "ResponseAgent","VisualizeAgent", "DescriptionAgent"]




================================================
FILE: agents/core.py
================================================
"""
Provides the base class for all Agents 
"""

from abc import ABC
import vertexai
from google.cloud.aiplatform import telemetry
from vertexai.language_models import TextGenerationModel
from vertexai.language_models import CodeGenerationModel
from vertexai.language_models import CodeChatModel
from vertexai.generative_models import GenerativeModel
from vertexai.generative_models import HarmCategory,HarmBlockThreshold



from utilities import PROJECT_ID, PG_REGION
vertexai.init(project=PROJECT_ID, location=PG_REGION)



class Agent(ABC):
    """
    The core class for all Agents
    """

    agentType: str = "Agent"

    def __init__(self,
                model_id:str):
        """
        model_id is the Model ID for initialization
        """

        self.model_id = model_id 

        if model_id == 'code-bison-32k':
            with telemetry.tool_context_manager('opendataqna'):
                self.model = CodeGenerationModel.from_pretrained('code-bison-32k')

        elif model_id == 'text-bison-32k':
            with telemetry.tool_context_manager('opendataqna'):
                self.model = TextGenerationModel.from_pretrained('text-bison-32k')
        
        elif model_id == 'codechat-bison-32k':
            with telemetry.tool_context_manager('opendataqna'):
                self.model = CodeChatModel.from_pretrained("codechat-bison-32k")
        
        elif model_id == 'gemini-1.0-pro':
            with telemetry.tool_context_manager('opendataqna'):
                # print("Model is gemini 1.0 pro")
                self.model = GenerativeModel("gemini-1.0-pro-001")
                self.safety_settings: Optional[dict] = {
                HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_NONE,
                HarmCategory.HARM_CATEGORY_HATE_SPEECH: HarmBlockThreshold.BLOCK_NONE,
                HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: HarmBlockThreshold.BLOCK_NONE,
                HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_NONE,
            }
        
        elif model_id == 'gemini-1.5-flash':
            with telemetry.tool_context_manager('opendataqna'):
                # print("Model is gemini 1.5 flash")
                self.model = GenerativeModel("gemini-1.5-flash-preview-0514")
                self.safety_settings: Optional[dict] = {
                HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_NONE,
                HarmCategory.HARM_CATEGORY_HATE_SPEECH: HarmBlockThreshold.BLOCK_NONE,
                HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: HarmBlockThreshold.BLOCK_NONE,
                HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_NONE,
            }

        elif model_id == 'gemini-1.5-pro':
            with telemetry.tool_context_manager('opendataqna'):
                # print("Model is gemini 1.5 Pro")
                self.model = GenerativeModel("gemini-1.5-pro-001")
                self.safety_settings: Optional[dict] = {
                HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_NONE,
                HarmCategory.HARM_CATEGORY_HATE_SPEECH: HarmBlockThreshold.BLOCK_NONE,
                HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: HarmBlockThreshold.BLOCK_NONE,
                HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_NONE,
            }
        
        else:
            raise ValueError("Please specify a compatible model.")

    def generate_llm_response(self,prompt):
        context_query = self.model.generate_content(prompt,safety_settings=self.safety_settings,stream=False)
        return str(context_query.candidates[0].text).replace("```sql", "").replace("```", "").rstrip("\n")


    def rewrite_question(self,question,session_history):
        formatted_history=''
        concat_questions=''
        for i, _row in enumerate(session_history,start=1):
            user_question = _row['user_question']
            # print(user_question)
            formatted_history += f"User Question - Turn :: {i} : {user_question}\n"
            concat_questions += f"{user_question} "

        # print(formatted_history)


        context_prompt = f"""
            Your main objective is to rewrite and refine the question based on the previous questions that has been asked.

            Refine the given question using the provided questions history to produce a standalone question with full context. The refined question should be self-contained, requiring no additional context for answering it.

            Make sure all the information is included in the re-written question. You just need to respond with the re-written question.

            Below is the previous questions history:

            {formatted_history}

            Question to rewrite:

            {question}
        """
        re_written_qe = str(self.generate_llm_response(context_prompt))
        

        print("*"*25 +"Re-written question:: "+"*"*25+"\n"+str(re_written_qe))

        return str(concat_questions),str(re_written_qe)



================================================
FILE: app.py
================================================
import streamlit as st
import pandas as pd
import json
from streamlit.components.v1 import html
from streamlit.logger import get_logger
from opendataqna import  generate_uuid, get_all_databases, run_pipeline, get_kgq
import asyncio

logger = get_logger(__name__)

# Initialize session state variables if they don't exist
if "session_id" not in st.session_state:
    st.session_state.session_id = generate_uuid()
    st.session_state.kgq = []
    st.session_state.user_grouping = None
    logger.info(f"New Session Created  - {st.session_state.session_id}")


def get_known_databases():
    """Retrieves a list of available database schemas from the backend.

    This function fetches a list of database schemas from the backend API.
    These schemas represent the available datasets that users can query.

    Returns:
        list: A list of database schema names.
    """
    logger.info("Getting list of all user databases")
    json_groupings, _ = get_all_databases()
    json_groupings = json.loads(json_groupings)
    groupings = [item["table_schema"] for item in json_groupings if isinstance(item, dict)]
    logger.info(f"user_groupings - {str(groupings)}")
    return groupings

def get_known_sql(selected_schema):
    """Retrieves known good SQL queries (KGQs) for a specific database schema.

    This function fetches a DataFrame containing KGQs for the given schema.
    KGQs are pre-defined SQL queries that can be used as examples or suggestions.

    Args:
        selected_schema (str): The name of the database schema.

    Returns:
        pd.DataFrame: A DataFrame containing KGQs for the specified schema.
    """
    data = get_kgq(selected_schema)
    parsed_data = list(eval(data[0]))
    df = pd.DataFrame(parsed_data)
    return df

def generate_sql_results(selected_schema,user_question):
    """Generates SQL query, executes it, and returns results and response.

    This function orchestrates the process of generating an SQL query based on
    the user's question and selected schema, executing the query, and generating
    a natural language response based on the results.

    Args:
        selected_schema (str): The name of the selected database schema.
        user_question (str): The user's natural language question.

    Returns:
        tuple: A tuple containing the generated SQL query (str), the query results
               as a Pandas DataFrame, and the generated natural language response (str).
    """
    logger.info(f"generating response for user question - {user_question}")
    logger.info(f"selected user groouping - {selected_schema}")
    final_sql, results_df, response = asyncio.run(
            run_pipeline(
                st.session_state.session_id,
                user_question,
                selected_schema,
                RUN_DEBUGGER=True,
                EXECUTE_FINAL_SQL=True,
                DEBUGGING_ROUNDS=2,
                LLM_VALIDATION=False,
                Embedder_model='vertex',  # Options: 'vertex' or 'vertex-lang'
                SQLBuilder_model='gemini-1.5-pro',
                SQLChecker_model='gemini-1.5-pro',
                SQLDebugger_model='gemini-1.5-pro',
                Responder_model='gemini-1.5-pro',
                num_table_matches=5,
                num_column_matches=10,
                table_similarity_threshold=0.1,
                column_similarity_threshold=0.1,
                example_similarity_threshold=0.1,
                num_sql_matches=3
            )
        )
    return(final_sql, results_df, response)

def generate_response(prompt):
    """Generates and displays a response to the user's prompt.

    This function takes a user prompt as input, generates an SQL query and
    response using the `generate_sql_results` function, and displays the
    results in a conversational format using Streamlit's chat message feature.

    Args:
        prompt (str): The user's input prompt.
    """
    for msg in st.session_state.messages:
        st.chat_message(msg["role"]).write(msg["content"])
    st.chat_message("user").write(prompt)
    st.session_state.messages.append({"role": "assistant", "content": msg})
    msg = "Generating Response"
    st.session_state.messages.append({"role": "assistant", "content": msg})
    st.chat_message("assistant").write(msg)
    query, results, response = generate_sql_results(st.session_state.user_grouping, prompt)
    msg = query
    st.session_state.messages.append({"role": "assistant", "content": msg})
    st.chat_message("assistant").write(msg)
    msg = response
    st.session_state.messages.append({"role": "assistant", "content": msg})
    st.chat_message("assistant").write(msg)
    with st.chat_message("assistant"): 
        st.dataframe(results)
        st.session_state.messages.append({"role": "assistant", "content": results})

    
st.set_page_config(page_title='Open Data QnA', page_icon="📊", initial_sidebar_state="expanded", layout='wide')
st.markdown("""
        <style>
               .block-container {
                    padding-top: 2rem;
                    padding-bottom: 0rem;
                    padding-left: 2rem;
                    padding-right: 2rem;
                }
        </style>
        """, unsafe_allow_html=True)

st.title("Open Data QnA")

with st.sidebar:
  st.session_state.user_grouping = st.selectbox(
    'Select Table Groupings',
     get_known_databases())
  if st.button("New Query"):
     st.session_state.session_id = generate_uuid()
     st.session_state.messages.clear()
     st.rerun() 
       
if "messages" not in st.session_state:
    st.session_state["messages"] = [{"role": "assistant", "content": "Frequently Asked Questions"}]
if st.session_state.user_grouping is not None:
    df = get_known_sql(st.session_state.user_grouping)
    for index, row in df.iterrows():
      url = text = row["example_user_question"]
      st.session_state.kgq.append(text)

if prompt := st.chat_input():
   generate_response(prompt)


================================================
FILE: backend-apis/README.md
================================================




<h3 style="text-align:center;"> Create Endpoints </h3>

   Here we are going to create publicly accessible endpoints (no authentication) .

   If you're working on a managed GCP project, it is common that there would be Domain Restricted Sharing Org Policies that will not allow the creation of a public facing endpoint.

   So we can allow all the domains and re-enable the same policy so that we don’t change the existing policy.

   Please run the below command before proceeding ahead. You need to have Organization Policy Admin rights to run the below commands.
```
export PROJECT_ID=<PROJECT_ID>
```

```
cd Open_Data_QnA/backend-apis

gcloud resource-manager org-policies set-policy --project=$PROJECT_ID policy.yaml #This command will create policy that overrides to allow all domain

```

Create the service account and add roles to run the solution backend for the APIs

```
gcloud iam service-accounts create opendataqna --project=$PROJECT_ID
gcloud projects add-iam-policy-binding $PROJECT_ID --member=serviceAccount:opendataqna@$PROJECT_ID.iam.gserviceaccount.com --role='roles/cloudsql.client' --project=$PROJECT_ID --quiet
gcloud projects add-iam-policy-binding $PROJECT_ID --member=serviceAccount:opendataqna@$PROJECT_ID.iam.gserviceaccount.com --role='roles/bigquery.admin' --project=$PROJECT_ID --quiet
gcloud projects add-iam-policy-binding $PROJECT_ID --member=serviceAccount:opendataqna@$PROJECT_ID.iam.gserviceaccount.com --role='roles/aiplatform.user' --project=$PROJECT_ID --quiet
gcloud projects add-iam-policy-binding $PROJECT_ID --member=serviceAccount:opendataqna@$PROJECT_ID.iam.gserviceaccount.com --role='roles/datastore.owner' --project=$PROJECT_ID --quiet

```



**Technologies**

* **Programming language:** Python
* **Framework:** Flask

**Before you start :** Ensure all variables in your config.ini file are correct, especially those for your Postgres instance and BigQuery dataset. If you need to change the Postgres instance or BigQuery dataset values, update the config.ini file before proceeding.   


   The endpoints deployed here are completely customized for the UI built in this demo solution. Feel free to customize the endpoint if needed for different UI/frontend. The gcloud run deploy command create a cloud build that uses the Dockerfile in the OpenDataQnA folder
    
  ***Deploy endpoints to Cloud Run***

```
export PROJECT_ID=<Enter your Project ID>
 ```
 ```
export SERVICE_NAME=opendataqna #change the name if needed 
export DEPLOY_REGION=us-central1 #change the cloud run deployment region if needed 
```

Enable the cloud build API to deploy the endpoints
```
gcloud services enable cloudbuild.googleapis.com --project $PROJECT_ID
```

Get default service account for compute engine and cloud build to deploy the cloud run and add IAM Roles for deployment
```
export PROJECT_NUMBER=$(gcloud projects describe $PROJECT_ID --format="value(projectNumber)")

export DEFAULT_CE_SA=$(gcloud iam service-accounts list --project=$PROJECT_ID --format="value(EMAIL)" --filter="EMAIL ~ $PROJECT_NUMBER-compute@developer.gserviceaccount.com")

gcloud projects add-iam-policy-binding $PROJECT_ID --member=serviceAccount:$DEFAULT_CE_SA --role='roles/storage.admin' --project=$PROJECT_ID --quiet

gcloud projects add-iam-policy-binding $PROJECT_ID --member=serviceAccount:$DEFAULT_CE_SA --role='roles/artifactregistry.admin' --project=$PROJECT_ID --quiet

gcloud projects add-iam-policy-binding $PROJECT_ID --member=serviceAccount:$DEFAULT_CE_SA --role='roles/firebase.admin' --project=$PROJECT_ID --quiet

gcloud projects add-iam-policy-binding $PROJECT_ID --member=serviceAccount:$DEFAULT_CE_SA --role='roles/cloudbuild.builds.builder' --project=$PROJECT_ID --quiet

gcloud projects add-iam-policy-binding $PROJECT_ID --member=serviceAccount:$DEFAULT_CE_SA --role='roles/logging.logWriter' --project=$PROJECT_ID --quiet


export DEFAULT_CB_SA=$PROJECT_NUMBER'@cloudbuild.gserviceaccount.com'

gcloud projects add-iam-policy-binding $PROJECT_ID --member=serviceAccount:$DEFAULT_CB_SA --role='roles/firebase.admin' --project=$PROJECT_ID --quiet

gcloud projects add-iam-policy-binding $PROJECT_ID --member=serviceAccount:$DEFAULT_CB_SA --role='roles/serviceusage.apiKeysAdmin' --project=$PROJECT_ID --quiet

gcloud projects add-iam-policy-binding $PROJECT_ID --member=serviceAccount:$DEFAULT_CB_SA --role='roles/cloudbuild.builds.builder' --project=$PROJECT_ID --quiet

gcloud projects add-iam-policy-binding $PROJECT_ID --member=serviceAccount:$DEFAULT_CB_SA --role='roles/artifactregistry.admin' --project=$PROJECT_ID --quiet

```

```
 cd Open_Data_QnA

 gcloud beta run deploy $SERVICE_NAME --region $DEPLOY_REGION --source . --service-account=opendataqna@$PROJECT_ID.iam.gserviceaccount.com --service-min-instances=1  --allow-unauthenticated --project=$PROJECT_ID 
 
 #if you are deploying cloud run application for the first time in the project you will be prompted for a couple of settings. Go ahead and type Yes.


```

   Once the deployment is done successfully you should be able to see the Service URL (endpoint point) link as shown below. Please keep this handy to add this in the frontend or you can get this uri from the cloud run page in the GCP Console. e.g. *https://OpenDataQnA-aeiouAEI-uc.a.run.app*

   Test if the endpoints are working with below command. This should return the dataset your created in the source env setup notebook.
```
 curl <URI of the end point>/available_databases

```



<p align="center">
    <a href="../utilities/imgs/Cloud Run Deploy.png">
        <img src="../utilities/imgs/Cloud Run Deploy.png" alt="aaie image">
    </a>
</p>


  Delete the Org Policy on the Project that's created above. Do not run this if you haven’t created the org policy above

```
gcloud resource-manager org-policies delete iam.allowedPolicyMemberDomains --project=$PROJECT_ID
```



**API Details**

   All the payloads are in JSON format

1. List Databases : Get the available databases in the vector store that solution can run against

    URI: {Service URL}/available_databases 
    Complete URL Sample : https://OpenDataQnA-aeiouAEI-uc.a.run.app/available_databases

    Method: GET

    Request Payload : NONE

    Request response:
    ```
    {
    "Error": "",
    "KnownDB": "[{\"table_schema\":\"imdb-postgres\"},{\"table_schema\":\"retail-postgres\"}]",
    "ResponseCode": 200
    }
    ```

2. Known SQL : Get suggestive questions (previously asked/examples added) for selected database

    URI: /get_known_sql
    Complete URL Sample : https://OpenDataQnA-aeiouAEI-uc.a.run.app/get_known_sql   

    Method: POST

    Request Payload :

    ```
    {
    "user_grouping":"retail"
    }
    ```

    Request response:

    ```
    {
    "Error": "",
    "KnownSQL": "[{\"example_user_question\":\"Which city had maximum number of sales and what was the count?\",\"example_generated_sql\":\"select st.city_id, count(st.city_id) as city_sales_count from retail.sales as s join retail.stores as st on s.id_store = st.id_store group by st.city_id order by city_sales_count desc limit 1;\"}]",
    "ResponseCode": 200
    }
    ```


3. SQL Generation : Generate the SQL for the input question asked against a database

    URI: /generate_sql


    Method: POST

    Complete URL Sample : https://OpenDataQnA-aeiouAEI-uc.a.run.app/get_known_sql


    Request payload:

    ```
    {
    "session_id":"",
    "user_id":"harry@hogwarts.com",
    "user_question":"Which city had maximum number of sales?",
    "user_grouping":"retail"
    }
    ```


    Request response:
    ```
    {
    "Error": "",
    "GeneratedSQL": " select st.city_id from retail.sales as s join retail.stores as st on s.id_store = st.id_store group by st.city_id order by count(*) desc limit 1;",
    "ResponseCode": 200,
    "SessionID":"1iuu2u-k1ij2-kkkhhj12131"
    }
    ```


4. Execute SQL : Run the SQL statement against provided database source

    URI:/run_query
    Complete URL Sample : https://OpenDataQnA-aeiouAEI-uc.a.run.app/run_query

    Method: POST

    Request payload:
    ```
    { "user_grouping": "retail",
    "generated_sql":"select st.city_id from retail.sales as s join retail.stores as st on s.id_store = st.id_store group by st.city_id order by count(*) desc limit 1;",
    "session_id":"1iuu2u-k1ij2-kkkhhj12131"
    }
    ```

    Request response:
    ```
    {
    "SessionID":"1iuu2u-k1ij2-kkkhhj12131",
    "Error": "",
    "KnownDB": "[{\"city_id\":\"C014\"}]",
    "ResponseCode": 200
    }
    ```
5. Embedd SQL : To embed known good SQLs to your example embeddings

    URI:/embed_sql
    Complete URL Sample : https://OpenDataQnA-aeiouAEI-uc.a.run.app/embed_sql

    METHOD: POST

    Request Payload:

    ```
    {
      "session_id":"1iuu2u-k1ij2-kkkhhj12131",
    "user_question":"Which city had maximum number of sales?",
    "generated_sql":"select st.city_id from retail.sales as s join retail.stores as st on s.id_store = st.id_store group by st.city_id order by count(*) desc limit 1;",
    "user_grouping":"retail"
    }
    ```

    Request response:
    ```
    {
    "ResponseCode" : 201, 
    "Message" : "Example SQL has been accepted for embedding",
    "Error":"",
    "SessionID":"1iuu2u-k1ij2-kkkhhj12131"
    }
    ```
6. Generate Visualization Code : To generated javascript Google Charts code based on the SQL Results and display them on the UI

    As per design we have two visualizations suggested showing up when the user clicks the visualize button. Hence two divs are send as part of the response “chart_div”, “chart_div_1” to bind them to that element in the UI
        

    If you are only looking to setup enpoint you can stop here. In case you require the demo app (frontend UI) built in the solution, proceed to the next step.

    URI:/generate_viz
    Complete URL Sample : https://OpenDataQnA-aeiouAEI-uc.a.run.app/generate_viz
    
    METHOD: POST

    Request Payload:
    ```
      {
      "session_id":"1iuu2u-k1ij2-kkkhhj12131" ,
      "user_question": "What are top 5 product skus that are ordered?",
      "sql_generated": "SELECT productSKU as ProductSKUCode, sum(total_ordered) as TotalOrderedItems FROM `inbq1-joonix.demo.sales_sku` group by productSKU order by sum(total_ordered) desc limit 5",
      "sql_results": [
        {
          "ProductSKUCode": "GGOEGOAQ012899",
          "TotalOrderedItems": 456
        },
        {
          "ProductSKUCode": "GGOEGDHC074099",
          "TotalOrderedItems": 334
        },
        {
          "ProductSKUCode": "GGOEGOCB017499",
          "TotalOrderedItems": 319
        },
        {
          "ProductSKUCode": "GGOEGOCC077999",
          "TotalOrderedItems": 290
        },
        {
          "ProductSKUCode": "GGOEGFYQ016599",
          "TotalOrderedItems": 253
        }
      ]
    }
    
    ```

    Request response:
    ```
    {
    "SessionID":"1iuu2u-k1ij2-kkkhhj12131",
    "Error": "",
    "GeneratedChartjs": {
        "chart_div": "google.charts.load('current', {\n  packages: ['corechart']\n});\ngoogle.charts.setOnLoadCallback(drawChart);\n\nfunction drawChart() {\n  var data = google.visualization.arrayToDataTable([\n    ['Product SKU', 'Total Ordered Items'],\n    ['GGOEGOAQ012899', 456],\n    ['GGOEGDHC074099', 334],\n    ['GGOEGOCB017499', 319],\n    ['GGOEGOCC077999', 290],\n    ['GGOEGFYQ016599', 253],\n  ]);\n\n  var options = {\n    title: 'Top 5 Product SKUs Ordered',\n    width: 600,\n    height: 300,\n    hAxis: {\n      textStyle: {\n        fontSize: 12\n      }\n    },\n    vAxis: {\n      textStyle: {\n        fontSize: 12\n      }\n    },\n    legend: {\n      textStyle: {\n        fontSize: 12\n      }\n    },\n    bar: {\n      groupWidth: '50%'\n    }\n  };\n\n  var chart = new google.visualization.BarChart(document.getElementById('chart_div'));\n\n  chart.draw(data, options);\n}\n",
        
        "chart_div_1": "google.charts.load('current', {'packages':['corechart']});\ngoogle.charts.setOnLoadCallback(drawChart);\nfunction drawChart() {\n  var data = google.visualization.arrayToDataTable([\n    ['ProductSKUCode', 'TotalOrderedItems'],\n    ['GGOEGOAQ012899', 456],\n    ['GGOEGDHC074099', 334],\n    ['GGOEGOCB017499', 319],\n    ['GGOEGOCC077999', 290],\n    ['GGOEGFYQ016599', 253]\n  ]);\n\n  var options = {\n    title: 'Top 5 Product SKUs that are Ordered',\n    width: 600,\n    height: 300,\n    hAxis: {\n      textStyle: {\n        fontSize: 5\n      }\n    },\n    vAxis: {\n      textStyle: {\n        fontSize: 5\n      }\n    },\n    legend: {\n      textStyle: {\n        fontSize: 10\n      }\n    },\n    bar: {\n      groupWidth: \"60%\"\n    }\n  };\n\n  var chart = new google.visualization.ColumnChart(document.getElementById('chart_div_1'));\n\n  chart.draw(data, options);\n}\n"
    },
    "ResponseCode": 200
    }

    ```
7. Get Results : To directly get the sql results in JSON format

    URI:/get_results
    Complete URL Sample : https://OpenDataQnA-aeiouAEI-uc.a.run.app/embed_sql

    METHOD: POST

    Request Payload:

    ```
    {
    "user_question":"Which city had maximum number of sales?",
    "user_database":"retail"
    }
    ```

    Request response:
    ```
    {
    "Error": "",
    "GeneratedResults": "[{\"city_id\":\"C014\"}]",
    "ResponseCode": 200
    }
    ```


### For setting up the demo UI with these endpoints please refer to README.md under [`/frontend`](/frontend/)


================================================
FILE: backend-apis/__init__.py
================================================


================================================
FILE: backend-apis/main.py
================================================
# -*- coding: utf-8 -*-


# Copyright 2024 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


from flask import Flask, request, jsonify, render_template, Response
import asyncio
from collections.abc import Callable
import logging as log
import json
import datetime
import urllib
import re
import time
import textwrap
import pandas as pd
from flask_cors import CORS
import os
import sys
import firebase_admin
from firebase_admin import credentials, auth
from functools import wraps

firebase_admin.initialize_app()

from opendataqna import get_all_databases,get_kgq,generate_sql,embed_sql,get_response,get_results,visualize


module_path = os.path.abspath(os.path.join('.'))
sys.path.append(module_path)


def jwt_authenticated(func: Callable[..., int]) -> Callable[..., int]:
    @wraps(func)
    async def decorated_function(*args, **kwargs):
        header = request.headers.get("Authorization", None)
        if header:
            token = header.split(" ")[1]
            try:
                
                print("TOKEN::"+str(token))
                decoded_token = firebase_admin.auth.verify_id_token(token)
            except Exception as e:
                log.exception(e)
                return Response(status=403, response=f"Error with authentication: {e}")
        else:
            return Response(status=401)
        
        request.uid = decoded_token["uid"]
        print("USER:: "+str(request.uid))
        return await func(*args, **kwargs) if asyncio.iscoroutinefunction(func) else func(*args, **kwargs)
    
    return decorated_function

RUN_DEBUGGER = True
DEBUGGING_ROUNDS = 2 
LLM_VALIDATION = False
EXECUTE_FINAL_SQL = True
Embedder_model = 'vertex'
SQLBuilder_model = 'gemini-1.5-pro'
SQLChecker_model = 'gemini-1.5-pro'
SQLDebugger_model = 'gemini-1.5-pro'
num_table_matches = 5
num_column_matches = 10
table_similarity_threshold = 0.3
column_similarity_threshold = 0.3
example_similarity_threshold = 0.3
num_sql_matches = 3

app = Flask(__name__) 
cors = CORS(app, resources={r"/*": {"origins": "*"}})



@app.route("/available_databases", methods=["GET"])
# @jwt_authenticated
def getBDList():

    result,invalid_response=get_all_databases()
    
    if not invalid_response:
        responseDict = { 
                "ResponseCode" : 200, 
                "KnownDB" : result,
                "Error":""
                }

    else:
        responseDict = { 
                "ResponseCode" : 500, 
                "KnownDB" : "",
                "Error":result
                } 
    return jsonify(responseDict)




@app.route("/embed_sql", methods=["POST"])
# @jwt_authenticated
async def embedSql():

    envelope = str(request.data.decode('utf-8'))
    envelope=json.loads(envelope)
    user_grouping=envelope.get('user_grouping')
    generated_sql = envelope.get('generated_sql')
    user_question = envelope.get('user_question')
    session_id = envelope.get('session_id')

    embedded, invalid_response=await embed_sql(session_id,user_grouping,user_question,generated_sql)

    if not invalid_response:
        responseDict = { 
                        "ResponseCode" : 201, 
                        "Message" : "Example SQL has been accepted for embedding",
                        "SessionID" : session_id,
                        "Error":""
                        } 
        return jsonify(responseDict)
    else:
        responseDict = { 
                   "ResponseCode" : 500, 
                   "KnownDB" : "",
                   "SessionID" : session_id,
                   "Error":embedded
                   } 
        return jsonify(responseDict)




@app.route("/run_query", methods=["POST"])
# @jwt_authenticated
def getSQLResult():
    
    envelope = str(request.data.decode('utf-8'))
    envelope=json.loads(envelope)

    user_question = envelope.get('user_question')
    user_grouping = envelope.get('user_grouping')
    generated_sql = envelope.get('generated_sql')
    session_id = envelope.get('session_id')

    result_df,invalid_response=get_results(user_grouping,generated_sql)


    if not invalid_response:
        _resp,invalid_response=get_response(session_id,user_question,result_df.to_json(orient='records'))
        if not invalid_response:
            responseDict = { 
                    "ResponseCode" : 200, 
                    "KnownDB" : result_df.to_json(orient='records'),
                    "NaturalResponse" : _resp,
                    "SessionID" : session_id,
                    "Error":""
                    }
        else:
            responseDict = { 
                    "ResponseCode" : 500, 
                    "KnownDB" : result_df.to_json(orient='records'),
                    "NaturalResponse" : _resp,
                    "SessionID" : session_id,
                    "Error":""
                    }

    else:
        _resp=result_df
        responseDict = { 
                "ResponseCode" : 500, 
                "KnownDB" : "",
                "NaturalResponse" : _resp,
                "SessionID" : session_id,
                "Error":result_df
                } 
    return jsonify(responseDict)




@app.route("/get_known_sql", methods=["POST"])
# @jwt_authenticated
def getKnownSQL():
    print("Extracting the known SQLs from the example embeddings.")
    envelope = str(request.data.decode('utf-8'))
    envelope=json.loads(envelope)
    
    user_grouping = envelope.get('user_grouping')


    result,invalid_response=get_kgq(user_grouping)
    
    if not invalid_response:
        responseDict = { 
                "ResponseCode" : 200, 
                "KnownSQL" : result,
                "Error":""
                }

    else:
        responseDict = { 
                "ResponseCode" : 500, 
                "KnownSQL" : "",
                "Error":result
                } 
    return jsonify(responseDict)



@app.route("/generate_sql", methods=["POST"])
# @jwt_authenticated
async def generateSQL():
    print("Here is the request payload ")
    envelope = str(request.data.decode('utf-8'))
    print("Here is the request payload " + envelope)
    envelope=json.loads(envelope)

    user_question = envelope.get('user_question')
    user_grouping = envelope.get('user_grouping')
    session_id = envelope.get('session_id')
    user_id = envelope.get('user_id')
    generated_sql,session_id,invalid_response = await generate_sql(session_id,
                user_question,
                user_grouping,  
                RUN_DEBUGGER,
                DEBUGGING_ROUNDS, 
                LLM_VALIDATION,
                Embedder_model,
                SQLBuilder_model,
                SQLChecker_model,
                SQLDebugger_model,
                num_table_matches,
                num_column_matches,
                table_similarity_threshold,
                column_similarity_threshold,
                example_similarity_threshold,
                num_sql_matches,
                user_id=user_id)

    if not invalid_response:
        responseDict = { 
                        "ResponseCode" : 200, 
                        "GeneratedSQL" : generated_sql,
                        "SessionID" : session_id,
                        "Error":""
                        }
    else:
        responseDict = { 
                        "ResponseCode" : 500, 
                        "GeneratedSQL" : "",
                        "SessionID" : session_id,
                        "Error":generated_sql
                        }          

    return jsonify(responseDict)


@app.route("/generate_viz", methods=["POST"])
# @jwt_authenticated
async def generateViz():
    envelope = str(request.data.decode('utf-8'))
    # print("Here is the request payload " + envelope)
    envelope=json.loads(envelope)

    user_question = envelope.get('user_question')
    generated_sql = envelope.get('generated_sql')
    sql_results = envelope.get('sql_results')
    session_id = envelope.get('session_id')
    chart_js=''

    try:
        chart_js, invalid_response = visualize(session_id,user_question,generated_sql,sql_results)
        
        if not invalid_response:
            responseDict = { 
            "ResponseCode" : 200, 
            "GeneratedChartjs" : chart_js,
            "Error":"",
            "SessionID":session_id
            }
        else:
            responseDict = { 
                "ResponseCode" : 500, 
                "GeneratedSQL" : "",
                "SessionID":session_id,
                "Error": chart_js
                } 


        return jsonify(responseDict)

    except Exception as e:
        # util.write_log_entry("Cannot generate the Visualization!!!, please check the logs!" + str(e))
        responseDict = { 
                "ResponseCode" : 500, 
                "GeneratedSQL" : "",
                "SessionID":session_id,
                "Error":"Issue was encountered while generating the Google Chart, please check the logs!"  + str(e)
                } 
        return jsonify(responseDict)

@app.route("/summarize_results", methods=["POST"])
# @jwt_authenticated
async def getSummary():
    envelope = str(request.data.decode('utf-8'))
    envelope=json.loads(envelope)
   
    user_question = envelope.get('user_question')
    sql_results = envelope.get('sql_results')

    result,invalid_response=get_response(user_question,sql_results)
    
    if not invalid_response:
        responseDict = { 
                    "ResponseCode" : 200, 
                    "summary_response" : result,
                    "Error":""
                    } 

    else:
        responseDict = { 
                    "ResponseCode" : 500, 
                    "summary_response" : "",
                    "Error":result
                    } 
    return jsonify(responseDict)




@app.route("/natural_response", methods=["POST"])
# @jwt_authenticated
async def getNaturalResponse():
   envelope = str(request.data.decode('utf-8'))
   #print("Here is the request payload " + envelope)
   envelope=json.loads(envelope)
   
   user_question = envelope.get('user_question')
   user_grouping = envelope.get('user_grouping')
   
   generated_sql,session_id,invalid_response = await generate_sql(user_question,
                user_grouping,  
                RUN_DEBUGGER,
                DEBUGGING_ROUNDS, 
                LLM_VALIDATION,
                Embedder_model,
                SQLBuilder_model,
                SQLChecker_model,
                SQLDebugger_model,
                num_table_matches,
                num_column_matches,
                table_similarity_threshold,
                column_similarity_threshold,
                example_similarity_threshold,
                num_sql_matches)
   
   if not invalid_response:

        result_df,invalid_response=get_results(user_grouping,generated_sql)
        
        if not invalid_response:
            result,invalid_response=get_response(user_question,result_df.to_json(orient='records'))

            if not invalid_response:
                responseDict = { 
                            "ResponseCode" : 200, 
                            "summary_response" : result,
                            "Error":""
                            } 

            else:
                responseDict = { 
                            "ResponseCode" : 500, 
                            "summary_response" : "",
                            "Error":result
                            } 


        else:
            responseDict = { 
                    "ResponseCode" : 500, 
                    "KnownDB" : "",
                    "Error":result_df
                    } 

   else:
        responseDict = { 
                        "ResponseCode" : 500, 
                        "GeneratedSQL" : "",
                        "Error":generated_sql
                        }

   return jsonify(responseDict)   


@app.route("/get_results", methods=["POST"])
async def getResultsResponse():
   envelope = str(request.data.decode('utf-8'))
   #print("Here is the request payload " + envelope)
   envelope=json.loads(envelope)
   
   user_question = envelope.get('user_question')
   user_database = envelope.get('user_database')
   
   generated_sql,invalid_response = await generate_sql(user_question,
                user_database,  
                RUN_DEBUGGER,
                DEBUGGING_ROUNDS, 
                LLM_VALIDATION,
                Embedder_model,
                SQLBuilder_model,
                SQLChecker_model,
                SQLDebugger_model,
                num_table_matches,
                num_column_matches,
                table_similarity_threshold,
                column_similarity_threshold,
                example_similarity_threshold,
                num_sql_matches)
   
   if not invalid_response:

        result_df,invalid_response=get_results(user_database,generated_sql)
        
        if not invalid_response:
            responseDict = { 
                            "ResponseCode" : 200, 
                            "GeneratedResults" : result_df.to_json(orient='records'),
                            "Error":""
                            } 

        else:
            responseDict = { 
                    "ResponseCode" : 500, 
                    "GeneratedResults" : "",
                    "Error":result_df
                    } 

   else:
        responseDict = { 
                        "ResponseCode" : 500, 
                        "GeneratedResults" : "",
                        "Error":generated_sql
                        }

   return jsonify(responseDict)  
   
if __name__ == "__main__":
    app.run(debug=True, host="0.0.0.0", port=int(os.environ.get("PORT", 8080)))

================================================
FILE: backend-apis/policy.yaml
================================================
constraint: constraints/iam.allowedPolicyMemberDomains
listPolicy:
  allValues: ALLOW

================================================
FILE: config.ini
================================================
[CONFIG]
embedding_model = vertex
description_model = gemini-1.5-pro
vector_store = bigquery-vector
debugging = yes
logging = yes
kgq_examples = yes
firestore_region = us-central1
use_session_history = yes
use_column_samples = no

[GCP]
project_id = three-p-o

[PGCLOUDSQL]
pg_region = us-central1
pg_instance = pg15-opendataqna
pg_database = opendataqna-db
pg_user = pguser
pg_password = pg123

[BIGQUERY]
bq_dataset_region = us-central1
bq_opendataqna_dataset_name = opendataqna
bq_log_table_name = audit_log_table






================================================
FILE: dbconnectors/BQConnector.py
================================================
"""
BigQuery Connector Class
"""
from google.cloud import bigquery
from google.cloud import bigquery_connection_v1 as bq_connection
from dbconnectors import DBConnector
from abc import ABC
from datetime import datetime
import google.auth
import pandas as pd
from google.cloud.exceptions import NotFound

def get_auth_user():
    credentials, project_id = google.auth.default()

    if hasattr(credentials, 'service_account_email'):
        return credentials.service_account_email
    else:
        return "Not Determined"

def bq_specific_data_types(): 
    return '''
    BigQuery offers a wide variety of datatypes to store different types of data effectively. Here's a breakdown of the available categories:
    Numeric Types -
    INTEGER (INT64): Stores whole numbers within the range of -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807. Ideal for non-fractional values.
    FLOAT (FLOAT64): Stores approximate floating-point numbers with a range of -1.7E+308 to 1.7E+308. Suitable for decimals with a degree of imprecision.
    NUMERIC: Stores exact fixed-precision decimal numbers, with up to 38 digits of precision and 9 digits to the right of the decimal point. Useful for precise financial and accounting calculations.
    BIGNUMERIC: Similar to NUMERIC but with even larger scale and precision. Designed for extreme precision in calculations.
    
    Character Types -
    STRING: Stores variable-length Unicode character sequences. Enclosed using single, double, or triple quotes.
    
    Boolean Type -
    BOOLEAN: Stores logical values of TRUE or FALSE (case-insensitive).
    
    Date and Time Types -
    DATE: Stores dates without associated time information.
    TIME: Stores time information independent of a specific date.
    DATETIME: Stores both date and time information (without timezone information).
    TIMESTAMP: Stores an exact moment in time with microsecond precision, including a timezone component for global accuracy.
    
    Other Types
    BYTES: Stores variable-length binary data. Distinguished from strings by using 'B' or 'b' prefix in values.
    GEOGRAPHY: Stores points, lines, and polygons representing locations on the Earth's surface.
    ARRAY: Stores an ordered collection of zero or more elements of the same (non-ARRAY) data type.
    STRUCT: Stores an ordered collection of fields, each with its own name and data type (can be nested).
    
    This list covers the most common datatypes in BigQuery.
    '''


class BQConnector(DBConnector, ABC):
    """
    A connector class for interacting with BigQuery databases.

    This class provides methods for connecting to BigQuery, executing queries, retrieving results as DataFrames, logging interactions, and managing embeddings.

    Attributes:
        project_id (str): The Google Cloud project ID where the BigQuery dataset resides.
        region (str): The region where the BigQuery dataset is located.
        dataset_name (str): The name of the BigQuery dataset to interact with.
        opendataqna_dataset (str): Name of the dataset to use for OpenDataQnA functionalities.
        audit_log_table_name (str): Name of the table to store audit logs.
        client (bigquery.Client): The BigQuery client instance for executing queries.

    Methods:
        getconn() -> bigquery.Client:
            Establishes a connection to BigQuery and returns a client object.

        retrieve_df(query) -> pd.DataFrame:
            Executes a SQL query and returns the results as a pandas DataFrame.

        make_audit_entry(source_type, user_grouping, model, question, generated_sql, found_in_vector, need_rewrite, failure_step, error_msg, FULL_LOG_TEXT) -> str:
            Logs an audit entry to BigQuery, recording details of the interaction and the generated SQL query.

        create_vertex_connection(connection_id) -> None:
            Creates a Vertex AI connection for remote model usage in BigQuery.

        create_embedding_model(connection_id, embedding_model) -> None:
            Creates or replaces an embedding model in BigQuery using a Vertex AI connection.

        retrieve_matches(mode, user_grouping, qe, similarity_threshold, limit) -> list:
            Retrieves the most similar table schemas, column schemas, or example queries based on the given mode and parameters.

        getSimilarMatches(mode, user_grouping, qe, num_matches, similarity_threshold) -> str:
            Returns a formatted string containing similar matches found for tables, columns, or examples.

        getExactMatches(query) -> str or None:
            Checks if the exact question is present in the example SQL set and returns the corresponding SQL query if found.

        test_sql_plan_execution(generated_sql) -> Tuple[bool, str]:
            Tests the execution plan of a generated SQL query in BigQuery. Returns a tuple indicating success and a message.

        return_table_schema_sql(dataset, table_names=None) -> str:
            Returns a SQL query to retrieve table schema information from a BigQuery dataset.

        return_column_schema_sql(dataset, table_names=None) -> str:
            Returns a SQL query to retrieve column schema information from a BigQuery dataset.
    """


    def __init__(self,
                 project_id:str,
                 region:str,
                 opendataqna_dataset:str,
                 audit_log_table_name:str):

        self.project_id = project_id
        self.region = region
        self.opendataqna_dataset = opendataqna_dataset
        self.audit_log_table_name = audit_log_table_name
        self.client=self.getconn()

    def getconn(self):
        client = bigquery.Client(project=self.project_id)
        return client
    
    def retrieve_df(self,query):
        return self.client.query_and_wait(query).to_dataframe()

    def make_audit_entry(self, source_type, user_grouping, model, question, generated_sql, found_in_vector, need_rewrite, failure_step, error_msg, FULL_LOG_TEXT):
        # global FULL_LOG_TEXT
        auth_user=get_auth_user()

        PROJECT_ID = self.project_id

        table_id= PROJECT_ID+ '.' + self.opendataqna_dataset + '.' + self.audit_log_table_name
        now = datetime.now()

        table_exists=False
        client = self.getconn()

        df1 = pd.DataFrame(columns=[
                'source_type',
                'project_id',
                'user',
                'user_grouping',
                'model_used',
                'question',
                'generated_sql',
                'found_in_vector',
                'need_rewrite',
                'failure_step',
                'error_msg',
                'execution_time',
                'full_log'
                ])

        new_row = {
                "source_type":source_type,
                "project_id":str(PROJECT_ID),
                "user":str(auth_user),
                "user_grouping": user_grouping,
                "model_used": model,
                "question": question,
                "generated_sql": generated_sql,
                "found_in_vector":found_in_vector,
                "need_rewrite":need_rewrite,
                "failure_step":failure_step,
                "error_msg":error_msg,
                "execution_time": now,
                "full_log": FULL_LOG_TEXT
                }

        df1.loc[len(df1)] = new_row

        db_schema=[
                    # Specify the type of columns whose type cannot be auto-detected. For
                    # example the "title" column uses pandas dtype "object", so its
                    # data type is ambiguous.
                    bigquery.SchemaField("source_type", bigquery.enums.SqlTypeNames.STRING),
                    bigquery.SchemaField("project_id", bigquery.enums.SqlTypeNames.STRING),
                    bigquery.SchemaField("user", bigquery.enums.SqlTypeNames.STRING),
                    bigquery.SchemaField("user_grouping", bigquery.enums.SqlTypeNames.STRING),
                    bigquery.SchemaField("model_used", bigquery.enums.SqlTypeNames.STRING),
                    bigquery.SchemaField("question", bigquery.enums.SqlTypeNames.STRING),
                    bigquery.SchemaField("generated_sql", bigquery.enums.SqlTypeNames.STRING),
                    bigquery.SchemaField("found_in_vector", bigquery.enums.SqlTypeNames.STRING),
                    bigquery.SchemaField("need_rewrite", bigquery.enums.SqlTypeNames.STRING),
                    bigquery.SchemaField("failure_step", bigquery.enums.SqlTypeNames.STRING),
                    bigquery.SchemaField("error_msg", bigquery.enums.SqlTypeNames.STRING),
                    bigquery.SchemaField("execution_time", bigquery.enums.SqlTypeNames.TIMESTAMP),
                    bigquery.SchemaField("full_log", bigquery.enums.SqlTypeNames.STRING),
                ]

        try:
            client.get_table(table_id)  # Make an API request.
            # print("Table {} already exists.".format(table_id))
            table_exists=True
        except NotFound:
            print("Table {} is not found. Will create this log table".format(table_id))
            table_exists=False

        if table_exists is True:
            # print('Performing streaming insert')
            errors = client.insert_rows_from_dataframe(table=table_id, dataframe=df1, selected_fields=db_schema)  # Make an API request.
            if errors == [[]]:
                   print("Logged the run")
            else:
                   print("Encountered errors while inserting rows: {}".format(errors))
        else:
            job_config = bigquery.LoadJobConfig(schema=db_schema,write_disposition="WRITE_TRUNCATE")
            # pandas_gbq.to_gbq(df1, table_id, project_id=PROJECT_ID)  # replace to replace table; append to append to a table
            client.load_table_from_dataframe(df1,table_id,job_config=job_config)  # replace to replace table; append to append to a table


        # df1.loc[len(df1)] = new_row
        # pandas_gbq.to_gbq(df1, table_id, project_id=PROJECT_ID, if_exists='append')  # replace to replace table; append to append to a table
            # print('\n Query added to BQ log table \n')
        return 'Completed the logging step'

    def create_vertex_connection(self, connection_id : str):
        client=bq_connection.ConnectionServiceClient()
        
        cloud_resource_properties = bq_connection.types.CloudResourceProperties()
        new_connection=bq_connection.Connection(cloud_resource=cloud_resource_properties)
        response= client.create_connection(parent=f'projects/{self.project_id}/locations/{self.region}',connection=new_connection,connection_id=connection_id)

    
    def create_embedding_model(self,connection_id: str, embedding_model: str):
        client = self.getconn()
        client.query_and_wait(f'''CREATE OR REPLACE MODEL `{self.project_id}.{self.opendataqna_dataset}.EMBEDDING_MODEL`
                                            REMOTE WITH CONNECTION `{self.project_id}.{self.region}.{connection_id}`
                                            OPTIONS (ENDPOINT = '{embedding_model}');''')
   
    
    def retrieve_matches(self, mode, user_grouping, qe, similarity_threshold, limit): 
        """
        This function retrieves the most similar table_schema and column_schema.
        Modes can be either 'table', 'column', or 'example' 
        """
        matches = []

        if mode == 'table':
            sql = '''select base.content as tables_content from vector_search(
                 (SELECT * FROM `{}.table_details_embeddings` WHERE user_grouping = '{}'), "embedding", 
            (SELECT {} as qe), top_k=> {},distance_type=>"COSINE") where 1-distance > {} '''
        
        elif mode == 'column':
            sql='''select base.content as columns_content from vector_search(
                 (SELECT * FROM `{}.tablecolumn_details_embeddings` WHERE user_grouping = '{}'), "embedding",
            (SELECT {} as qe), top_k=> {}, distance_type=>"COSINE") where 1-distance > {} '''

        elif mode == 'example': 
            sql='''select base.example_user_question, base.example_generated_sql from vector_search ( 
                (SELECT * FROM `{}.example_prompt_sql_embeddings` WHERE user_grouping = '{}'), "embedding",
            (select {} as qe), top_k=> {}, distance_type=>"COSINE") where 1-distance > {} '''
    
        else: 
            ValueError("No valid mode. Must be either table, column, or example")
            name_txt = ''

        results=self.client.query_and_wait(sql.format('{}.{}'.format(self.project_id,self.opendataqna_dataset),user_grouping,qe,limit,similarity_threshold)).to_dataframe()
        # CHECK RESULTS 
        if len(results) == 0:
            print(f"Did not find any results for {mode}. Adjust the query parameters.")
        else:
            print(f"Found {len(results)} similarity matches for {mode}.")

        if mode == 'table': 
            name_txt = ''
            for _ , r in results.iterrows():
                name_txt=name_txt+r["tables_content"]+"\n"

        elif mode == 'column': 
            name_txt = '' 
            for _ ,r in results.iterrows():
                name_txt=name_txt+r["columns_content"]+"\n"

        elif mode == 'example': 
            name_txt = ''
            for _ , r in results.iterrows():
                example_user_question=r["example_user_question"]
                example_sql=r["example_generated_sql"]
                name_txt = name_txt + "\n Example_question: "+example_user_question+ "; Example_SQL: "+example_sql

        else: 
            ValueError("No valid mode. Must be either table, column, or example")
            name_txt = ''

        matches.append(name_txt)
        

        return matches

    def getSimilarMatches(self, mode, user_grouping, qe, num_matches, similarity_threshold):

        if mode == 'table': 
            match_result= self.retrieve_matches(mode, user_grouping, qe, similarity_threshold, num_matches)
            match_result = match_result[0]
            # print(match_result)

        elif mode == 'column': 
            match_result= self.retrieve_matches(mode, user_grouping, qe, similarity_threshold, num_matches)
            match_result = match_result[0]
        
        elif mode == 'example': 
            match_result= self.retrieve_matches(mode, user_grouping, qe, similarity_threshold, num_matches)
            if len(match_result) == 0:
                match_result = None
            else:
                match_result = match_result[0]

        return match_result

    def getExactMatches(self, query):
        """Checks if the exact question is already present in the example SQL set"""
        check_history_sql=f"""SELECT example_user_question,example_generated_sql FROM `{self.project_id}.{self.opendataqna_dataset}.example_prompt_sql_embeddings`
                          WHERE lower(example_user_question) = lower("{query}") LIMIT 1; """

        exact_sql_history = self.client.query_and_wait(check_history_sql).to_dataframe()


        if exact_sql_history[exact_sql_history.columns[0]].count() != 0:
            sql_example_txt = ''
            exact_sql = ''
            for index, row in exact_sql_history.iterrows():
                example_user_question=row["example_user_question"]
                example_sql=row["example_generated_sql"]
                exact_sql=example_sql
                sql_example_txt = sql_example_txt + "\n Example_question: "+example_user_question+ "; Example_SQL: "+example_sql

            # print("Found a matching question from the history!" + str(sql_example_txt))
            final_sql=exact_sql

        else: 
            print("No exact match found for the user prompt")
            final_sql = None

        return final_sql

    def test_sql_plan_execution(self, generated_sql):
        try:
            exec_result_df=""
            job_config=bigquery.QueryJobConfig(dry_run=True, use_query_cache=False)
            query_job = self.client.query(generated_sql,job_config=job_config)
            # print(query_job)
            exec_result_df=("This query will process {} bytes.".format(query_job.total_bytes_processed))
            correct_sql = True
            print(exec_result_df)
            return correct_sql, exec_result_df
        except Exception as e:
            return False,str(e)


    def return_table_schema_sql(self, dataset, table_names=None): 
        """
        Returns the SQL query to be run on 'Source DB' to get the Table Schema
        The SQL query below returns a df containing the cols table_schema, table_name, table_description, table_columns (with cols in the table)
        for the schema specified above, e.g. 'retail'
        - table_schema: e.g. retail 
        - table_name: name of the table inside the schema, e.g. products 
        - table_description: text descriptor, can be empty 
        - table_columns: aggregate of the col names inside the table 
        """

        user_dataset = self.project_id + '.' + dataset

        table_filter_clause = ""

        if table_names:
            # Extract individual table names from the input string
            #table_names = [name.strip() for name in table_names[1:-1].split(",")]  # Handle the string as a list
            formatted_table_names = [f"'{name}'" for name in table_names]
            table_filter_clause = f"""AND TABLE_NAME IN ({', '.join(formatted_table_names)})"""


        table_schema_sql = f"""
        (SELECT
            TABLE_CATALOG as project_id, TABLE_SCHEMA as table_schema , TABLE_NAME as table_name,  OPTION_VALUE as table_description,
            (SELECT STRING_AGG(column_name, ', ') from `{user_dataset}.INFORMATION_SCHEMA.COLUMNS` where TABLE_NAME= t.TABLE_NAME and TABLE_SCHEMA=t.TABLE_SCHEMA) as table_columns
        FROM
            `{user_dataset}.INFORMATION_SCHEMA.TABLE_OPTIONS` as t
        WHERE
            OPTION_NAME = "description"
            {table_filter_clause}
        ORDER BY
            project_id, table_schema, table_name)

        UNION ALL

        (SELECT
            TABLE_CATALOG as project_id, TABLE_SCHEMA as table_schema , TABLE_NAME as table_name,  "NA" as table_description,
            (SELECT STRING_AGG(column_name, ', ') from `{user_dataset}.INFORMATION_SCHEMA.COLUMNS` where TABLE_NAME= t.TABLE_NAME and TABLE_SCHEMA=t.TABLE_SCHEMA) as table_columns
        FROM
            `{user_dataset}.INFORMATION_SCHEMA.TABLES` as t 
        WHERE 
            NOT EXISTS (SELECT 1   FROM
            `{user_dataset}.INFORMATION_SCHEMA.TABLE_OPTIONS`  
        WHERE
            OPTION_NAME = "description" AND  TABLE_NAME= t.TABLE_NAME and TABLE_SCHEMA=t.TABLE_SCHEMA)
            {table_filter_clause}
        ORDER BY
            project_id, table_schema, table_name)
        """
        return table_schema_sql
    


    def return_column_schema_sql(self, dataset, table_names=None): 
        """
        Returns the SQL query to be run on 'Source DB' to get the column schema 
         
        The SQL query below returns a df containing the cols table_schema, table_name, column_name, data_type, column_description, table_description, primary_key, column_constraints
        for the schema specified above, e.g. 'retail'
        - table_schema: e.g. retail 
        - table_name: name of the tables inside the schema, e.g. products 
        - column_name: name of each col in each table in the schema, e.g. id_product 
        - data_type: data type of each col 
        - column_description: col descriptor, can be empty 
        - table_description: text descriptor, can be empty 
        - primary_key: whether the col is PK; if yes, the field contains the col_name 
        - column_constraints: e.g. "Primary key for this table"
        """

        user_dataset = self.project_id + '.' + dataset

        table_filter_clause = ""

        if table_names:
            
            # table_names = [name.strip() for name in table_names[1:-1].split(",")]  # Handle the string as a list
            formatted_table_names = [f"'{name}'" for name in table_names]
            table_filter_clause = f"""AND C.TABLE_NAME IN ({', '.join(formatted_table_names)})"""
            
        column_schema_sql = f"""
        SELECT
            C.TABLE_CATALOG as project_id, C.TABLE_SCHEMA as table_schema, C.TABLE_NAME as table_name, C.COLUMN_NAME as column_name,
            C.DATA_TYPE as data_type, C.DESCRIPTION as column_description, CASE WHEN T.CONSTRAINT_TYPE="PRIMARY KEY" THEN "This Column is a Primary Key for this table" WHEN 
            T.CONSTRAINT_TYPE = "FOREIGN_KEY" THEN "This column is Foreign Key" ELSE NULL END as column_constraints
        FROM
            `{user_dataset}.INFORMATION_SCHEMA.COLUMN_FIELD_PATHS` C 
        LEFT JOIN 
            `{user_dataset}.INFORMATION_SCHEMA.TABLE_CONSTRAINTS` T 
            ON C.TABLE_CATALOG = T.TABLE_CATALOG AND
            C.TABLE_SCHEMA = T.TABLE_SCHEMA AND 
            C.TABLE_NAME = T.TABLE_NAME AND  
            T.ENFORCED ='YES'
        LEFT JOIN 
            `{user_dataset}.INFORMATION_SCHEMA.KEY_COLUMN_USAGE` K
            ON K.CONSTRAINT_NAME=T.CONSTRAINT_NAME AND C.COLUMN_NAME = K.COLUMN_NAME 
        WHERE
            1=1
            {table_filter_clause} 
        ORDER BY
            project_id, table_schema, table_name, column_name;
    """

        return column_schema_sql

    def get_column_samples(self,columns_df):
        sample_column_list=[]

        for index, row in columns_df.iterrows():
            get_column_sample_sql=f'''SELECT STRING_AGG(CAST(value AS STRING)) as sample_values FROM UNNEST((SELECT APPROX_TOP_COUNT({row["column_name"]},5) as osn 
            FROM `{row["project_id"]}.{row["table_schema"]}.{row["table_name"]}`
            ))'''

            column_samples_df=self.retrieve_df(get_column_sample_sql)
            # display(column_samples_df)
            sample_column_list.append(column_samples_df['sample_values'].to_string(index=False))

        columns_df["sample_values"]=sample_column_list
        return columns_df


================================================
FILE: dbconnectors/FirestoreConnector.py
================================================
from google.cloud import firestore 
from google.cloud.exceptions import NotFound
import time
from dbconnectors import DBConnector
from abc import ABC
import uuid

def create_unique_id():
  """Creates a unique ID using the UUID4 algorithm.

  Returns:
    A string representing a unique ID.
  """

  return str(uuid.uuid1())


class FirestoreConnector(DBConnector, ABC):
    def __init__(self, 
                project_id:str, 
                firestore_database:str):
        """Initializes the Firestore connection and authentication."""
        self.db = firestore.Client(project=project_id,database=firestore_database)

    def log_chat(self,session_id, user_question, bot_response,user_id="TEST",):
        """Logs a chat message to Firestore.
        Args:
            session_id (str): The ID of the chat session.
            user_id (str): The ID of the user who sent the message.
            user_question (str): The question the user asked.
            bot_response (str): The response from the bot.
        """

        log_chat = {
            "session_id": session_id,
            "user_id": user_id,
            "user_question": user_question,
            "bot_response": bot_response,
            "timestamp": firestore.SERVER_TIMESTAMP,
        }

        self.db.collection("session_logs").document().set(log_chat)  
        
    def get_chat_logs_for_session(self,session_id):
        """Gets all chat logs for a given session.
        Args:
            session_id (str): The ID of the chat session.
        """

        sessions_log_ref = self.db.collection("session_logs")

        # sessions_log_ref=sessions_log_ref.order_by("timestamp")
        query= sessions_log_ref.where(filter=firestore.FieldFilter("session_id","==",session_id))
        
        # query = sessions_log_ref.where("session_id", "==", session_id).order_by("timestamp")  

        # Note: Use of CollectionRef stream() is prefered to get()
        docs = query.stream()

        session_history=[]
        for doc in docs:
            session_history.append(doc.to_dict())  # Add values to the list
        sorted_session_history=sorted(session_history,key=lambda x: x["timestamp"])

        return [{'user_question': item['user_question'], 'bot_response': item['bot_response'],'timestamp':item['timestamp']} for item in sorted_session_history]
  


================================================
FILE: dbconnectors/PgConnector.py
================================================
"""
PostgreSQL Connector Class 
"""
import asyncpg
from google.cloud.sql.connector import Connector
from sqlalchemy import create_engine
import pandas as pd 
from sqlalchemy.sql import text
from pgvector.asyncpg import register_vector
import asyncio
from pg8000.exceptions import DatabaseError 

from utilities import root_dir
from google.cloud.sql.connector import Connector

from dbconnectors import DBConnector
from abc import ABC



def pg_specific_data_types(): 
    return '''
    PostgreSQL offers a wide variety of datatypes to store different types of data effectively. Here's a breakdown of the available categories:

    Numeric datatypes -
    SMALLINT: Stores small-range integers between -32768 and 32767.
    INTEGER: Stores typical integers between -2147483648 and 2147483647.
    BIGINT: Stores large-range integers between -9223372036854775808 and 9223372036854775807.
    DECIMAL(p,s): Stores arbitrary precision numbers with a maximum of p digits and s digits to the right of the decimal point.
    NUMERIC: Similar to DECIMAL but with additional features like automatic scaling.
    REAL: Stores single-precision floating-point numbers with an approximate range of -3.4E+38 to 3.4E+38.
    DOUBLE PRECISION: Stores double-precision floating-point numbers with an approximate range of -1.7E+308 to 1.7E+308.


    Character datatypes -
    CHAR(n): Fixed-length character string with a specified length of n characters.
    VARCHAR(n): Variable-length character string with a maximum length of n characters.
    TEXT: Variable-length string with no maximum size limit.
    CHARACTER VARYING(n): Alias for VARCHAR(n).
    CHARACTER: Alias for CHAR.

    Monetary datatypes -
    MONEY: Stores monetary amounts with two decimal places.

    Date/Time datatypes -
    DATE: Stores dates without time information.
    TIME: Stores time of day without date information (optionally with time zone).
    TIMESTAMP: Stores both date and time information (optionally with time zone).
    INTERVAL: Stores time intervals between two points in time.

    Binary types -
    BYTEA: Stores variable-length binary data.
    BIT: Stores single bits.
    BIT VARYING: Stores variable-length bit strings.

    Other types -
    BOOLEAN: Stores true or false values.
    UUID: Stores universally unique identifiers.
    XML: Stores XML data.
    JSON: Stores JSON data.
    ENUM: Stores user-defined enumerated values.
    RANGE: Stores ranges of data values.

    This list covers the most common datatypes in PostgreSQL.
    '''




class PgConnector(DBConnector, ABC):
    """
    A connector class for interacting with PostgreSQL databases.

    This class provides methods for establishing connections to PostgreSQL instances, executing SQL queries, retrieving results as DataFrames, caching known SQL queries, and managing embeddings. It utilizes the `pg8000` library for connections and the `asyncpg` library for asynchronous operations.

    Attributes:
        project_id (str): The Google Cloud project ID where the PostgreSQL instance resides.
        region (str): The region where the PostgreSQL instance is located.
        instance_name (str): The name of the PostgreSQL instance.
        database_name (str): The name of the database to connect to.
        database_user (str): The username for authentication.
        database_password (str): The password for authentication.
        pool (Engine): A SQLAlchemy engine object for managing database connections.

    Methods:
        getconn() -> connection:
            Establishes a connection to the PostgreSQL instance and returns a connection object.

        retrieve_df(query) -> pd.DataFrame:
            Executes a SQL query and returns the results as a pandas DataFrame. Handles potential database errors.

        cache_known_sql() -> None:
            Caches known good SQL queries into a PostgreSQL table for future reference.

        retrieve_matches(mode, user_grouping, qe, similarity_threshold, limit) -> list:
            Retrieves similar matches (table schemas, column schemas, or example queries) from the database based on the given mode, query embedding (`qe`), similarity threshold, and limit.

        getSimilarMatches(mode, user_grouping, qe, num_matches, similarity_threshold) -> str:
            Gets similar matches for tables, columns, or examples asynchronously, formatting the results into a string.

        test_sql_plan_execution(generated_sql) -> Tuple[bool, pd.DataFrame]:
            Tests the execution plan of a generated SQL query in PostgreSQL. Returns a tuple indicating success and the result DataFrame.

        getExactMatches(query) -> str or None:
            Checks if the exact question is present in the example SQL set and returns the corresponding SQL query if found.

        return_column_schema_sql(schema) -> str:
            Returns a SQL query to retrieve column schema information from a PostgreSQL schema.

        return_table_schema_sql(schema) -> str:
            Returns a SQL query to retrieve table schema information from a PostgreSQL schema.
    """


    def __init__(self,
                project_id:str, 
                region:str, 
                instance_name:str,
                database_name:str, 
                database_user:str, 
                database_password:str): 

        self.project_id = project_id
        self.region = region 
        self.instance_name = instance_name 
        self.database_name = database_name
        self.database_user = database_user
        self.database_password = database_password

        self.pool = create_engine(
            "postgresql+pg8000://",
            creator=self.getconn,
        )


    def getconn(self): 
        """
        function to return the database connection object
        """
        # initialize Connector object
        connector = Connector()
        conn = connector.connect(
            f"{self.project_id}:{self.region}:{self.instance_name}",
            "pg8000",
            user=f"{self.database_user}",
            password=f"{self.database_password}",
            db=f"{self.database_name}"
        )

        return conn 


    def retrieve_df(self, query):
        """ 
        TODO: Description 
        """

        result_df=pd.DataFrame()
        try: 
            with self.pool.connect() as db_conn:
               
                df = pd.read_sql(text(query), con=db_conn)
                result_df = df
            # print('\n Return from code execution: ' + str(result_df) )
            return result_df
        
        except Exception as e: 
            print(f"Database Error: {e}")
            df = pd.DataFrame({'Error. Message': e}, index=[0])
            return df 
        
    
    async def cache_known_sql(self):
        
        df = pd.read_csv(f"{root_dir}/{scripts}/known_good_sql.csv")
        df = df.loc[:, ["prompt", "sql", "database_name"]]
        df = df.dropna()

        loop = asyncio.get_running_loop()
        async with Connector(loop=loop) as connector:
            # # Create connection to Cloud SQL database.
            conn: asyncpg.Connection = await connector.connect_async(
                f"{self.project_id}:{self.region}:{self.instance_name}", 
                "asyncpg",
                user=f"{self.database_user}",
                password=f"{self.database_password}",
                db=f"{self.database_name}",
            )


            await register_vector(conn)

            # Delete the table if it exists.
            await conn.execute("DROP TABLE IF EXISTS query_example_embeddings CASCADE")
                
            # Create the `query_example_embeddings` table.
            await conn.execute(
                """CREATE TABLE query_example_embeddings(
                                    prompt TEXT,
                                    sql TEXT,
                                    user_grouping TEXT)"""
            )

            # Copy the dataframe to the 'query_example_embeddings' table.
            tuples = list(df.itertuples(index=False))
            await conn.copy_records_to_table(
                "query_example_embeddings", records=tuples, columns=list(df), timeout=10000
            )
            
        await conn.close()


    async def retrieve_matches(self, mode, user_groupinguping, qe, similarity_threshold, limit): 
        """
        This function retrieves the most similar table_schema and column_schema.
        Modes can be either 'table', 'column', or 'example' 
        """
        matches = [] 

        loop = asyncio.get_running_loop()
        async with Connector(loop=loop) as connector:
            # # Create connection to Cloud SQL database.
            conn: asyncpg.Connection = await connector.connect_async(
                f"{self.project_id}:{self.region}:{self.instance_name}", 
                "asyncpg",
                user=f"{self.database_user}",
                password=f"{self.database_password}",
                db=f"{self.database_name}",
            )


            await register_vector(conn)


            # Prepare the SQL depending on 'mode' 
            if mode == 'table': 
                sql = """
                    SELECT content as tables_content,
                    1 - (embedding <=> $1) AS similarity
                    FROM table_details_embeddings
                    WHERE 1 - (embedding <=> $1) > $2
                    AND user_grouping = $4
                    ORDER BY similarity DESC LIMIT $3
                """
                

            elif mode == 'column': 
                sql = """
                    SELECT content as columns_content,
                    1 - (embedding <=> $1) AS similarity
                    FROM tablecolumn_details_embeddings
                    WHERE 1 - (embedding <=> $1) > $2
                    AND user_grouping = $4
                    ORDER BY similarity DESC LIMIT $3
                """

            elif mode == 'example': 
                sql = """
                    SELECT user_grouping, example_user_question, example_generated_sql,
                    1 - (embedding <=> $1) AS similarity
                    FROM example_prompt_sql_embeddings
                    WHERE 1 - (embedding <=> $1) > $2
                    AND user_grouping = $4
                    ORDER BY similarity DESC LIMIT $3
                """

            else: 
                ValueError("No valid mode. Must be either table, column, or example")
                name_txt = ''
                
            # print(sql,qe,similarity_threshold,limit,user_grouping)
            # FETCH RESULTS FROM POSTGRES DB 
            results = await conn.fetch(
                sql,
                qe,
                similarity_threshold,
                limit,
                user_groupinguping
            )

            # CHECK RESULTS 
            if len(results) == 0:
                print(f"Did not find any results  for {mode}. Adjust the query parameters.")
            else:
                print(f"Found {len(results)} similarity matches for {mode}.")

            if mode == 'table': 
                name_txt = ''
                for r in results:
                    name_txt=name_txt+r["tables_content"]+"\n\n"

            elif mode == 'column': 
                name_txt = '' 
                for r in results:
                    name_txt=name_txt+r["columns_content"]+"\n\n "

            elif mode == 'example': 
                name_txt = ''
                for r in results:
                    example_user_question=r["example_user_question"]
                    example_sql=r["example_generated_sql"]
                    # print(example_user_question+"\nThreshold::"+str(r["similarity"]))
                    name_txt = name_txt + "\n Example_question: "+example_user_question+ "; Example_SQL: "+example_sql

            else: 
                ValueError("No valid mode. Must be either table, column, or example")
                name_txt = ''

            matches.append(name_txt)

        # Close the connection to the database.
        await conn.close()

        return matches 



    async def getSimilarMatches(self, mode, user_grouping, qe, num_matches, similarity_threshold):

        if mode == 'table': 
            match_result=await self.retrieve_matches(mode, user_grouping, qe, similarity_threshold, num_matches)
            match_result = match_result[0]

        elif mode == 'column': 
            match_result=await self.retrieve_matches(mode, user_grouping, qe, similarity_threshold, num_matches)
            match_result = match_result[0]
        
        elif mode == 'example': 
            match_result=await self.retrieve_matches(mode, user_grouping, qe, similarity_threshold, num_matches)
            if len(match_result) == 0:
                match_result = None
            else:
                match_result = match_result[0]

        return match_result


    def test_sql_plan_execution(self, generated_sql):
        try:
            exec_result_df = pd.DataFrame()
            sql = f"""EXPLAIN ANALYZE {generated_sql}"""
            exec_result_df = self.retrieve_df(sql)

            if not exec_result_df.empty:
                if str(exec_result_df.iloc[0]).startswith('Error. Message'):
                    correct_sql = False 
                    
                else:
                    print('\n No need to rewrite the query. This seems to work fine and returned rows...')
                    correct_sql = True
            else:
                print('\n No need to rewrite the query. This seems to work fine but no rows returned...')
                correct_sql = True
        
            return correct_sql, exec_result_df

        except Exception as e:
            return False,str(e)
        



    def getExactMatches(self, query): 
        """ 
        Checks if the exact question is already present in the example SQL set 
        """
        check_history_sql=f"""SELECT example_user_question,example_generated_sql
        FROM example_prompt_sql_embeddings
        WHERE lower(example_user_question) = lower('{query}') LIMIT 1; """

        exact_sql_history = self.retrieve_df(check_history_sql)

        if exact_sql_history[exact_sql_history.columns[0]].count() != 0:
            sql_example_txt = ''
            exact_sql = ''
            for index, row in exact_sql_history.iterrows():
                example_user_question=row["example_user_question"]
                example_sql=row["example_generated_sql"]
                exact_sql=example_sql
                sql_example_txt = sql_example_txt + "\n Example_question: "+example_user_question+ "; Example_SQL: "+example_sql

            # print("Found a matching question from the history!" + str(sql_example_txt))
            final_sql=exact_sql

        else: 
            print("No exact match found for the user prompt")
            final_sql = None

        return final_sql 
    




    def return_column_schema_sql(self, schema, table_names=None): 
        """
        This SQL returns a df containing the cols table_schema, table_name, column_name, data_type, column_description, table_description, primary_key, column_constraints
        for the schema specified above, e.g. 'retail'
        - table_schema: e.g. retail 
        - table_name: name of the table inside the schema, e.g. products 
        - column_name: name of each col in each table in the schema, e.g. id_product 
        - data_type: data type of each col 
        - column_description: col descriptor, can be empty 
        - table_description: text descriptor, can be empty 
        - primary_key: whether the col is PK; if yes, the field contains the col_name 
        - column_constraints: e.g. "Primary key for this table"
        """
        table_filter_clause = ""
        if table_names:
            
            # table_names = [name.strip() for name in table_names[1:-1].split(",")]  # Handle the string as a list
            formatted_table_names = [f"'{name}'" for name in table_names]
            table_filter_clause = f"""and table_name in ({', '.join(formatted_table_names)})"""
            

        column_schema_sql = f'''
        WITH
        columns_schema
        AS
        (select c.table_schema,c.table_name,c.column_name,c.data_type,d.description as column_description, obj_description(c1.oid) as table_description
        from information_schema.columns c
        inner join pg_class c1
        on c.table_name=c1.relname
        inner join pg_catalog.pg_namespace n
        on c.table_schema=n.nspname
        and c1.relnamespace=n.oid
        left join pg_catalog.pg_description d
        on d.objsubid=c.ordinal_position
        and d.objoid=c1.oid
        where
        c.table_schema='{schema}' {table_filter_clause}) ,
        pk_schema as
        (SELECT table_name, column_name AS primary_key
        FROM information_schema.key_column_usage
WHERE TABLE_SCHEMA='{schema}' {table_filter_clause}
        AND CONSTRAINT_NAME like '%_pkey%'
        ORDER BY table_name, primary_key),
        fk_schema as
        (SELECT table_name, column_name AS foreign_key
        FROM information_schema.key_column_usage
        WHERE TABLE_SCHEMA='{schema}' {table_filter_clause}
        AND CONSTRAINT_NAME like '%_fkey%'
        ORDER BY table_name, foreign_key)

        select lr.*,
        case
        when primary_key is not null then 'Primary key for this table'
        when foreign_key is not null then CONCAT('Foreign key',column_description)
        else null
        END as column_constraints
        from
        (select l.*,r.primary_key
        from
        columns_schema l
        left outer join
        pk_schema r
        on
        l.table_name=r.table_name
        and
        l.column_name=r.primary_key) lr
        left outer join
        fk_schema rt
        on
        lr.table_name=rt.table_name
        and
        lr.column_name=rt.foreign_key
        ;
        '''

        return column_schema_sql



    
    def return_table_schema_sql(self, schema, table_names=None): 
        """
        This SQL returns a df containing the cols table_schema, table_name, table_description, table_columns (with cols in the table)
        for the schema specified above, e.g. 'retail'
        - table_schema: e.g. retail 
        - table_name: name of the table inside the schema, e.g. products 
        - table_description: text descriptor, can be empty 
        - table_columns: aggregate of the col names inside the table 
        """

        table_filter_clause = ""

        if table_names:
            # Extract individual table names from the input string
            #table_names = [name.strip() for name in table_names[1:-1].split(",")]  # Handle the string as a list
            formatted_table_names = [f"'{name}'" for name in table_names]
            table_filter_clause = f"""and table_name in ({', '.join(formatted_table_names)})"""


        table_schema_sql = f'''
        SELECT table_schema, table_name,table_description, array_to_string(array_agg(column_name), ' , ') as table_columns
        FROM
        (select c.table_schema,c.table_name,c.column_name,c.ordinal_position,c.column_default,c.data_type,d.description, obj_description(c1.oid) as table_description
        from information_schema.columns c
        inner join pg_class c1
        on c.table_name=c1.relname
        inner join pg_catalog.pg_namespace n
        on c.table_schema=n.nspname
        and c1.relnamespace=n.oid
        left join pg_catalog.pg_description d
        on d.objsubid=c.ordinal_position
        and d.objoid=c1.oid
        where
        c.table_schema='{schema}' {table_filter_clause} ) data
        GROUP BY table_schema, table_name, table_description
        ORDER BY table_name;
        '''

        return table_schema_sql  
    

    def get_column_samples(self,columns_df):
        sample_column_list=[]

        for index, row in columns_df.iterrows():
            get_column_sample_sql=f'''SELECT most_common_vals AS sample_values FROM pg_stats WHERE tablename = '{row["table_name"]}' AND schemaname = '{row["table_schema"]}' AND attname = '{row["column_name"]}' '''

            column_samples_df=self.retrieve_df(get_column_sample_sql)
            # display(column_samples_df)
            sample_column_list.append(column_samples_df['sample_values'].to_string(index=False).replace("{","").replace("}",""))

        columns_df["sample_values"]=sample_column_list
        return columns_df


================================================
FILE: dbconnectors/__init__.py
================================================
from .core import DBConnector
from .PgConnector import PgConnector, pg_specific_data_types
from .BQConnector import BQConnector, bq_specific_data_types
from .FirestoreConnector import FirestoreConnector
from utilities import (PROJECT_ID, 
                       PG_INSTANCE, PG_DATABASE, PG_USER, PG_PASSWORD, PG_REGION,BQ_REGION,
                       BQ_OPENDATAQNA_DATASET_NAME,BQ_LOG_TABLE_NAME)

pgconnector = PgConnector(PROJECT_ID, PG_REGION, PG_INSTANCE, PG_DATABASE, PG_USER, PG_PASSWORD)
bqconnector = BQConnector(PROJECT_ID,BQ_REGION,BQ_OPENDATAQNA_DATASET_NAME,BQ_LOG_TABLE_NAME)
firestoreconnector = FirestoreConnector(PROJECT_ID,"opendataqna-session-logs")

__all__ = ["pgconnector", "pg_specific_data_types", "bqconnector","firestoreconnector"]

================================================
FILE: dbconnectors/core.py
================================================
"""
Provides the base class for all Connectors 
"""


from abc import ABC


class DBConnector(ABC):
    """
    The core class for all Connectors
    """

    connectorType: str = "Base"

    def __init__(self,
                project_id:str, 
                region:str, 
                instance_name:str,
                database_name:str, 
                database_user:str, 
                database_password:str,
                dataset_name:str):
        """
        Args:
            project_id (str | None): GCP Project Id.
            dataset_name (str): 
            TODO
        """
        self.project_id = project_id
        self.region = region 
        self.instance_name = instance_name 
        self.database_name = database_name
        self.database_user = database_user
        self.database_password = database_password
        self.dataset_name = dataset_name
    

================================================
FILE: docs/README.md
================================================
This directory contains documentation and resources to help you understand and use the Open Data QnA library effectively.

## Contents

* **README.md:** This file. Provides an overview of the documentation in this directory.
* **best_practices.md:** Best practices and guidelines for using the library, including recommended configurations, tips for improving performance, and common pitfalls to avoid.
* **faq.md:** Frequently asked questions about the library, covering common issues, troubleshooting tips, and general usage guidance.
* **repo_structure.md:** A detailed explanation of the library's repository structure, including the purpose of each file and directory, and how to navigate the codebase.


## How to Use This Documentation
**Start with the README.md on the root dir:** This file provides a high-level overview and guides you to the relevant resources.
**Consult the FAQ:** If you have any questions or encounter issues, check the FAQ section for possible solutions and answers.
**Explore Best Practices:** For optimizing your usage and getting the most out of the library, review the best practices document.
**Understand the Codebase:** If you want to dive deeper into the library's code, refer to the repository structure document for a detailed explanation of how the code is organized.


================================================
FILE: docs/architecture.md
================================================
Architecture
-------------
<p align="center">
    <a href="/utilities/imgs/OpenDataQnA_architecture.png">
        <img src="/utilities/imgs/OpenDataQnA_architecture.png" alt="aaie image">
    </a>
</p>



Architecture Summary
-------------
Open Data QnA operates in a sequence of well-defined steps, orchestrating various agents to process user queries and generate informative responses:

* **Vector Store Creation:** The vector store is initialized, storing embeddings of known good SQL queries, table schemas, and column details. This serves as a knowledge base for retrieval-augmented generation (RAG).

* **RAG (Retrieval-Augmented Generation):** User queries are embedded and compared to the vector store to retrieve relevant context (table/column details and similar past queries) for improved query generation.

* **SQL Generation (BuildSQLAgent):**  The BuildSQLAgent leverages the retrieved context and the user's natural language question to generate an initial SQL query.

* **Optional Validation (ValidateSQLAgent):** If enabled, the ValidateSQLAgent assesses the generated SQL for syntactic and semantic correctness.

* **Optional Debugging (DebugSQLAgent):** If the initial SQL is invalid and debugging is enabled, the DebugSQLAgent iteratively refines the query based on error feedback.

* **SQL Execution (Dry Run/Explain):** The refined SQL query is tested with a dry run (BigQuery) or explain plan (PostgreSQL) to estimate resource usage and identify potential errors.

* **SQL Execution (Full Run):** If the query is deemed valid, it's executed against the database to fetch the results.

* **Response Generation (ResponseAgent):** The ResponseAgent analyzes the SQL results and the user's question to generate a natural language response, providing a clear and concise answer.

* **Optional Visualization (VisualizeAgent):** If enabled, the VisualizeAgent suggests suitable chart types and generates JavaScript code for Google Charts to display the SQL results in a visually appealing manner.


**Key Points:**

* **Modularity:** Each step is handled by a specialized agent, allowing for flexibility and customization.
* **RAG Enhancement:** The use of retrieval-augmented generation leverages existing knowledge for better query formulation.
* **Validation and Debugging:** Optional agents enhance the reliability and accuracy of generated queries.
* **Informative Responses:** The ResponseAgent aims to provide meaningful and contextually relevant answers.
* **Visual Appeal:** The optional visualization adds an interactive layer to the user experience.


================================================
FILE: docs/best_practices.md
================================================
# Open Data QnA: Best Practices

## General Usage 

### Select the Right Database Connector: 
Choose between `PgConnector`(Google Cloud SQL PostgreSQL) and `BQConnector`(BigQuery) to match your specific database. 

### Prepare your data: 
Ensure your database tables are structured logically with appropriate column names and data types. We further recommend adding concise descriptions to tables and columns to provide the LLM agents with the necessary context. 
Additionally, please ensure that the overall data quality of your database is good - if you have pattern mismatches or missing values, these will impact the performance of the Open Data QnA solution. 

### Start simple: 
Begin with straightforward questions and fewer tables and progressively experiment with more complex queries and adding more tables. 

### Leverage the ‘Known Good SQL’ Cache
The `Known Good SQL` cache can (and should) be populated with example user question <-> SQL query pairs relating to your use case. This benefits the solution in two ways: 
Caching layer reduces latency: if a known user question is found in the cache that exactly matches (meaning, each char is matching, down to punctuation) the new input question, the known good SQL query is fetched and SQL generation will be skipped. 
In Context Learning: if a known user question is found to be similar to one of the existing queries in the cache, the similar user question is retrieved along with the corresponding SQL query and used as a few-shot example in the prompt for the SQL Generation agent. The user can specify how many example values should be retrieved to use as few-shot examples. We recommend using 3-5 examples, but this further depends on the variations of user questions you expect in your use case. 

### Explore Visualizations
Utilize the `VisualizeAgent`to generate charts and graphs for a more intuitive understanding of your data. However, make sure to only run the agent on queries that the pipeline has flagged as ‘valid’. 



## Customization & Optimization
### Agent Modification 
The `core`Agent class (agents/core.py) specifies the models supported for the different agents in the Open Data QnA solution. 

In version 1, these are: 
- Code Bison ('code-bison-32k')
- Text Bison ('text-bison-32k')
- Codechat Bison ('codechat-bison-32k') 
- Gemini 1.0 pro ('gemini-1.0-pro')

You can set the different models for each agent when calling the pipeline_run function (see below under `Pipeline Run Configurations`). 

### Prompt Engineering 
Each of the defined agents has their own prompt specified in its agent class file. 
BuildSQLAgent.py: prompts for BigQuery and PostgreSQL SQL Generation. 
DebugSQLAgent.py: prompts for debugging for either BQ or PG queries. 
DescriptionAgent.py: prompts for generating missing table and column descriptions. 
ResponseAgent.py: prompt to generate a natural language response, answering the user question by using the output of the generated SQL query. 
ValidateSQLAgent.py: prompt to classify a given SQL as valid or invalid. 
VisualizeAgent.py two prompts; one for proposing a fitting graph / plot for a given question <-> SQL pair; the other for generating the visualization. 


### Pipeline Run Configurations 
Additionally to changing the base models and the prompts, it is advisable to experiment with different configuration settings of the pipeline run function: 
```
async def run_pipeline(user_question,
               RUN_DEBUGGER=True,
               EXECUTE_FINAL_SQL=True,
               DEBUGGING_ROUNDS = 2,
               LLM_VALIDATION=True,
               SQLBuilder_model= 'gemini-1.0-pro',
               SQLChecker_model= 'gemini-1.0-pro',
               SQLDebugger_model= 'gemini-1.0-pro',
               Responder_model= 'gemini-1.0-pro',
               num_table_matches = 5,
               num_column_matches = 10,
               table_similarity_threshold = 0.3,
               column_similarity_threshold = 0.3,
               example_similarity_threshold = 0.3,
               num_sql_matches=3)
```


Args:

* **user_question (str):** The natural language question to answer.
* **RUN_DEBUGGER (bool, optional):** Whether to run the SQL debugger. Defaults to True.
It is recommended to use the debugger for improved SQL Generation accuracy.
* **DEBUGGING_ROUNDS (int, optional):** The number of debugging rounds. Defaults to 2.
We suggest using a value between 2-5, depending on your accuracy and latency requirements.  
* **EXECUTE_FINAL_SQL (bool, optional):** Whether to execute the final SQL query. Defaults to True.
You can disable the SQL execution. This will leave you with the generated SQL query as a response, skipping the retrieval of the execution result and the response generation. 
* **LLM_VALIDATION (bool, optional):** Whether to use LLM for SQL validation during debugging. Defaults to True.
You can disable the SQL Validator if you have specific latency requirements. When disabled, the Debugger will execute a dry run to retrieve any errors from the database call and debug accordingly. 
* **SQLBuilder_model (str, optional):** The name of the SQL building model. Defaults to 'gemini-1.0-pro'.
* **SQLChecker_model (str, optional):** The name of the SQL validation model. Defaults to 'gemini-1.0-pro'.
* **SQLDebugger_model (str, optional):** The name of the SQL debugging model. Defaults to 'gemini-1.0-pro'.
* **Responder_model (str, optional):** The name of the response generation model. Defaults to 'gemini-1.0-pro'.
* **num_table_matches (int, optional):** The number of similar tables to retrieve. Defaults to 5.
These will be used when calling the SQL Generation Agent. 
We recommend setting this higher if you have high variations in your database and user queries. 
* **num_column_matches (int, optional):** The number of similar columns to retrieve. Defaults to 10.
These will be used when calling the SQL Generation Agent. 
We recommend setting this higher if you have high variations in your database and user queries. 
* **table_similarity_threshold (float, optional):** The similarity threshold for tables. Defaults to 0.3.
Start with higher values and gradually decrease them if you’re not getting enough relevant results. 
* **column_similarity_threshold (float, optional):** The similarity threshold for columns. Defaults to 0.3.
Start with higher values and gradually decrease them if you’re not getting enough relevant results. 
* **example_similarity_threshold (float, optional):** The similarity threshold for example questions. Defaults to 0.3.
Start with higher values and gradually decrease them if you’re not getting enough relevant results. 
* **num_sql_matches (int, optional):** The number of similar SQL queries to retrieve. Defaults to 3.











================================================
FILE: docs/changelog.md
================================================
# Release Notes - Open Data QnA v2.0.0
This major release brings significant improvements and new features to Open Data QnA.

## Multi turn capabilities
Ability to interact back and forth with the database in a context. Initial v1 was established with a single turn query. In this release, we have created a multi turn architecture that saves the session info, previous query information and can answer accordingly. For more information on the architecture:  link 

## Table Grouping

Initial v1 was tied to single dataset processing and all the tables under this dataset. In reality, users most likely want to restrict the tables and add other datasets if needed. This table grouping provides a way for users to be able to define their scope 
<p align="center">
    <a href="images/table_grouping.png">
        <img src="images/table_grouping.png" alt="aaie image">
    </a>
</p>

## Data Sampling
We provide a sampling of data values in a column to provide contextual information to the SQL Generation agent. For this, top 5 values are retrieved for every column in the specified tables. 
This information is aggregated and stored back into the vector store, and is retrieved during the retrieval process. 
<p align="center">
    <a href="images/data_sampling1.png">
        <img src="images/data_sampling1.png" alt="aaie image">
    </a>
</p>
<p align="center">
    <a href="images/data_sampling2.png">
        <img src="images/data_sampling2.png" alt="aaie image">
    </a>
</p>


## Data summarization
In the initial V1 release, the results were in tabular format. With this release , we provide summarized answers in a natural language format that can be integrated into a chatbot. User does have an option to still get the tabular and visualized results based on their settings.
<p align="center">
    <a href="images/data_summarization.png">
        <img src="images/data_summarization.png" alt="aaie image">
    </a>
</p>

## Resolving ambiguities
The multi-turn approach helps to resolve ambiguities in the questions, by allowing the user to provide follow-up questions and clarifications. 

Furthermore, it is possible to provide additional context in the instruction prompt to let the LLM resolve ambiguities before triggering the pipeline. This can be achieved with the help of a LLM router added as a first layer before the Open Data QnA pipeline. 
These clarification questions can help provide more context to the SQL creation.

Ambiguities can be categorized into semantic, application, business and database context. With this release we look for semantic and business level context and resolve such ambiguities through the chat interface. 

## UX through Flutter and Streamlit

In addition to the AngularJS, we have added support through Flutter as part of the release which can be found under the front end code folder. 
Furthermore, to enable more efficient development, we have added support for streamlit, so users can quickly iterate and test in a dev frontend before deploying to Angular or Flutter. 





# Release Notes - Open Data QnA v1.2.0
This release brings significant improvements and new features to enhance the stability, functionality, and user experience of the Open Data QnA.

## 🗝️ Key Enhancements:
* **Enhanced Functionality:** Added the ability to specify a list of table names to be processed in BQ, instead of parsing all tables in a dataset. 
* **Improved Debugging:** The SQL debugger now incorporates the user's question into its prompts, leading to more accurate and relevant debugging suggestions.
* **Simplified Setup:** Streamlined notebook setup and environment variable management for a smoother user experience.
* **Quickstart**: Added a standalone notebook for quick experimentation with the overall approach, limited to BQ. 
* **Flexible Configuration:** Introduced optional arguments for the CLI pipeline, allowing users to customize various parameters like table and column similarity thresholds.
* **Code Refinements:** Removed hardcoded embedding models and added a save_config function for cleaner configuration management.
* **Bug Fixes:** Resolved various bugs, including issues with root directory checking, utility initialization, source type determination, and safety settings.
* **Expanded Documentation:** Added comprehensive docstrings to functions for better clarity and understanding.

## 📈 Additional Improvements:
* **Code Cleanup:** Removed unnecessary files and redundant code, improving overall code maintainability.
* **Updated README:** Improved the README file with clearer instructions and updated information.
* **Enhanced User Interface:** Introduced a CLI approach (experimental) for more streamlined interaction.

## 🐜 Bug Fixes:
* Fixed bugs in standalone notebook functionality.
* Removed telemetry test code.
* Corrected embedding distances in BigQuery.
* Resolved various typos and inconsistencies in the codebase.

This release marks a significant step forward in the development of the Open Data QnA SQL Generation tool, making it more reliable, flexible, and user-friendly. We encourage you to upgrade and explore the new features!


================================================
FILE: docs/config_guide.md
================================================
## Follow the below guide to populate your config.ini file: 

______________

**[CONFIG]**

**embedding_model = vertex**     *;Options: 'vertex' or 'vertex-lang'*

**description_model = gemini-1.0-pro**   *;Options 'gemini-1.0-pro', 'gemini-1.5-pro', 'text-bison-32k', 'gemini-1.5-flash'*

**vector_store = cloudsql-pgvector**    *;Options: 'bigquery-vector', 'cloudsql-pgvector'*

**debugging = yes**    *;if debugging is enabled. yes or no*

**logging = yes**    *;if logging is enabled. yes or no* 

**kgq_examples = yes**    *;if known-good-queries are provided. yes or no.* 

**use_session_history = yes** *;if you want to use current session's questions without re-evaluating them*

**use_column_samples = yes** *;if you want the solution to collect some samples values from the data source columns to imporve understanding of values. yes or no*

**[GCP]**

**project_id = my_project**    *;your GCP project* 


*; fill out the values below if you want to use PG as your vector database:*

**[PGCLOUDSQL]**

**pg_region = us-central1**   

**pg_instance = pg15-opendataqna**

**pg_database = opendataqna-db**

**pg_user = pguser**

**pg_password = pg123**


*; fill out the values below if you want to use BQ as your vector database:* 

**[BIGQUERY]**


*; the remaining values are the settings for the BQ vector store / log dataset and table created by the solution:* 

**bq_dataset_region = us-central1**

**bq_opendataqna_dataset_name = opendataqna**

**bq_log_table_name = audit_log_table**

**firestore_region = us-central** *;region for NoSQL DB firestore region to deploy*


________________


================================================
FILE: docs/faq.md
================================================
# Open Data QnA: FAQ

## Source and Vector Store Setup
**Q: If new to the vector store concept, which vector store would you recommend?**

A: Both the vector stores (pgvector and bigquery vector) are created using embedding model as you specify and also the vector search for both the vector stores are using cosine similarity to find the nearest matches. You can choose bigquery vector as that avoids any extra resource like cloudsql. 
Vector Embeddings and Search

________

**Q: Why are my example SQLs not being pulled as few-shot examples for the question asked even though the question is almost similar?**

A: Verify if the embedding of the example question has happened successfully.
Check the retrieval SQL written to pull the similar sqls for a few shot examples. If the cosine similarity logic is wrong that might be the reason for the issue. Correct the SQL to pull required similarity based SQLs

## Accuracy and Latency

**Q: How accurate are the results?**

A: Depending on the context, the more accurate these are helpful with accuracy.
Building blocks such as known good sql, validation all help with accuracy

________
**Q: How is the latency overall?**
A: Ambiguous questions have increased latency. If latency is a factor, would suggest adding caching layer and reducing validation steps
V2 is also coming up with resolving ambiguity



## Overall Solution
**Q: How do I get started quickly?**

A: The quickest way is to follow the "Quickstart with Open Data QnA: Standalone BigQuery Notebook." It provides a simplified experience using BigQuery. If you need more customization, follow the instructions for setting up the main repository.

________
**Q: Which databases does Open Data QnA currently support?**

A: Currently, it supports Google Cloud SQL for PostgreSQL and Google BigQuery.

________
**Q: What are the requirements to use Open Data QnA?**

A: You'll need:
A Google Cloud Project
An active database (PostgreSQL or BigQuery)
Python 3.9 or higher
Required Python packages (listed in requirements.txt)

________
**Q: Can I customize the behavior of the agents?**

A: Yes, the agents are designed to be modular and extensible. You can modify their code or create your own custom agents.

________
**Q: How do I incorporate my own known good SQL queries into the system?**

A: Follow the setup instructions or use the "3. Loading Known Good SQL Examples" notebook to add your own SQL queries to the vector store. This will improve the accuracy of query generation through RAG.
________

**Q: How do I set the table, column, and example similarity thresholds?**

A: These thresholds are used during the Retrieval-Augmented Generation (RAG) process to determine how similar your query is to the stored embeddings.
Table Similarity Threshold: Determines how closely a user's query needs to match a table name in the vector store to be considered relevant. Higher values make the matching stricter.
Column Similarity Threshold: Similar to the table threshold, but for column names.
Example Similarity Threshold: Controls how closely a user's query needs to match a known good SQL query example to be considered similar.
You can adjust these thresholds when running the pipeline_run function. Start with the default values and experiment to find what works best for your specific data and queries. Generally, start with higher values and gradually decrease them if you're not getting enough relevant results.

________
**Q: Can I visualize the results of my queries?**

A: Yes, the VisualizeAgent can generate JavaScript code for Google Charts to create visualizations of your data.
________

**Q: Are all building blocks mandatory?**

A: No. They can be replaced
________

**Q: Can this be tested against any database?**

A: Tested against Oracle and Snowflake
________
**Q: How are the competitors doing?**

A: Few langchain labs, some experimenting with agents
________

**Q: I created a test colab with langchain and a simple implementation. Why complicate it?**

A: If your environment is not complex, we would suggest to leverage your simplified approach, or look into the [standalone notebook](/notebooks/(standalone)Run_OpenDataQnA.ipynb) 




================================================
FILE: docs/repo_structure.md
================================================
Repository Structure 
-------------

```
.
├── agents
  └── __init__.py
  └── core.py
  └── BuildSQLAgent.py
  └── DebugSQLAgent.py
  └── DescriptionAgent.py
  └── EmbedderAgent.py
  └── ResponseAgent.py
  └── ValidateSQLAgent.py
  └── VisualizeAgent.py
└── Dockerfile
└── backend-apis
  └── __init__.py
  └── policy.yaml
  └── main.py
└── dbconnectors
  └── __init__.py
  └── core.py
  └── PgConnector.py
  └── BQConnector.py
└── docs
  └── best_practices.md
  └── faq.md
  └── repo_structure.md
└── embeddings
  └── __init__.py
  └── retrieve_embeddings.py
  └── store_embeddings.py
  └── kgq_embeddings.py
└── frontend
└── notebooks
  └── 0_CopyDataToBigQuery.ipynb
  └── 0_CopyDataToCloudSqlPG.ipynb
  └── 1_Setup_OpenDataQnA.ipynb
  └── 2_Run_OpenDataQnA.ipynb
  └── 3_LoadKnownGoodSQL.ipynb
└── scripts
  └── tables_columns_descriptions.csv
  └── copy_select_table_column_bigquery.csv
  └── data_source_list.csv
  └── known_good_sql.csv
  └── save_config.py
  └── Scenarios Sample.csv
└── utilities
  └── __init__.py
└── prompts.yaml
└── pyproject.toml
└── config.ini
└── env_setup.py
└── opendataqna.py
```

- [`/agents`](/agents): Source code for the LLM Agents.  
- [`/backend-apis`](/backend-apis/) : Cloud Run based api deployement for frontend to demo the solution on a UI
- [`/dbconnectors`](/dbconnectors): Source code for database connectors.
- [`/docs`](/docs): Documentations, including FAQ & Best Practices for using this library. 
- [`/embeddings`](/embeddings): Source code for creating and storing embeddings.
  - [`/retrieve_embeddings.py`](/embeddings/retrieve_embeddings.py): Source code for retrieving table schema and embedding creation. 
  - [`/store_embeddings.py`](/embeddings/store_embeddings.py): Source code for storing table schema embeddings in Vector Store.
  - [`/kgq_embeddings.py`](/embeddings/kgq_embeddings.py): Source code for loading good sqls and creating embeddings in the Vector Store) 
- [`/frontend`](/frontend) : Angular based frontend code to deploy demo app using the API developed with [`/main.py`](backend-apis/main.py)
- [`/notebooks`](/notebooks): Sample notebooks demonstrating the usage of this library.  
- [`/scripts`](/scripts): Additional scripts for initial setup.
  - [`/Sample Scenarios.csv`](/scripts/Scenarios%20Sample.csv): Sample Scenarios file that can used to load them on the frontend UI for demos
  - [`/copy_select_table_column_bigquery.py`](/scripts/copy_select_table_column_bigquery.py): Code Sample to copy select tables and columns from one BQ table to another; add table and column descriptions from csv file.
  - [`/tables_columns_descriptions.csv`](/scripts/tables_columns_descriptions.csv): CSV file containing table and column names and descriptions to be copied 
  - [`/known_good_sql.csv`](/scripts/known_good_sql.csv): CSV files
  - [`/data_source_list.csv`](/scripts/data_source_list.csv): Data Source CSV File to mention the list of tables and source type etc.
- [`/Dockerfile`](/Dockerfile): Dockerfile for deployment of backend apis. It is placed at the root folder to give it right context and access to the files.
- [`/env_setup.py`](/env_setup.py): Python file for initial setup. 
- [`/opendataqna.py`](/opendataqna.py): Python file for running the main pipeline. 
- [`/prompts.yaml`](/prompts.yaml): Yaml file that contains the prompts used by the solution. It also provides users the ability to prompt extra context for the use case if any.


================================================
FILE: embeddings/__init__.py
================================================
from .retrieve_embeddings import retrieve_embeddings
from .store_embeddings import store_schema_embeddings
from .kgq_embeddings import store_kgq_embeddings, setup_kgq_table, load_kgq_df



__all__ = ["retrieve_embeddings", "store_schema_embeddings","store_kgq_embeddings", "setup_kgq_table", "load_kgq_df"]

================================================
FILE: embeddings/kgq_embeddings.py
================================================
import os
import asyncio
import asyncpg
import pandas as pd
import numpy as np
from pgvector.asyncpg import register_vector
from google.cloud.sql.connector import Connector
from langchain_community.embeddings import VertexAIEmbeddings
from google.cloud import bigquery
from dbconnectors import pgconnector
from agents import EmbedderAgent
from sqlalchemy.sql import text
from utilities import PROJECT_ID, PG_INSTANCE, PG_DATABASE, PG_USER, PG_PASSWORD, PG_REGION, BQ_OPENDATAQNA_DATASET_NAME, BQ_REGION

embedder = EmbedderAgent('vertex')


async def setup_kgq_table( project_id,
                            instance_name,
                            database_name,
                            schema,
                            database_user,
                            database_password,
                            region,
                            VECTOR_STORE = "cloudsql-pgvector"):
    """ 
    This function sets up or refreshes the Vector Store for Known Good Queries (KGQ)
    """
    if VECTOR_STORE=='bigquery-vector':

        # Create BQ Client
        client=bigquery.Client(project=project_id)

        # Delete an old table
        # client.query_and_wait(f'''DROP TABLE IF EXISTS `{project_id}.{schema}.example_prompt_sql_embeddings`''')
        # Create a new emptry table
        client.query_and_wait(f'''CREATE TABLE IF NOT EXISTS `{project_id}.{schema}.example_prompt_sql_embeddings` (
                              user_grouping string NOT NULL, example_user_question string NOT NULL, example_generated_sql string NOT NULL,
                              embedding ARRAY<FLOAT64>)''')
        

    elif VECTOR_STORE=='cloudsql-pgvector':

        loop = asyncio.get_running_loop()
        async with Connector(loop=loop) as connector:
            # Create connection to Cloud SQL database
            conn: asyncpg.Connection = await connector.connect_async(
                f"{project_id}:{region}:{instance_name}",  # Cloud SQL instance connection name
                "asyncpg",
                user=f"{database_user}",
                password=f"{database_password}",
                db=f"{database_name}",
            )

            # Drop on old table
            # await conn.execute("DROP TABLE IF EXISTS example_prompt_sql_embeddings")
            # Create a new emptry table
            await conn.execute(
            """CREATE TABLE IF NOT EXISTS example_prompt_sql_embeddings(
                                user_grouping VARCHAR(1024) NOT NULL,
                                example_user_question text NOT NULL,
                                example_generated_sql text NOT NULL,
                                embedding vector(768))"""
            )

    else: raise ValueError("Not a valid parameter for a vector store.")

async def store_kgq_embeddings(df_kgq, 
                            project_id,
                            instance_name,
                            database_name,
                            schema,
                            database_user,
                            database_password,
                            region,
                            VECTOR_STORE = "cloudsql-pgvector"
                            ):
    """ 
    Create and save the Known Good Query Embeddings to Vector Store  
    """
    if VECTOR_STORE=='bigquery-vector':

        client=bigquery.Client(project=project_id)
        
        examp

Download .txt

gitextract_yfzkgoxq/

├── .gitignore
├── CODE_OF_CONDUCT.md
├── CONTRIBUTING.md
├── Dockerfile
├── LICENSE
├── MANIFEST.in
├── OWNERS
├── README.md
├── SECURITY.md
├── agents/
│   ├── BuildSQLAgent.py
│   ├── DebugSQLAgent.py
│   ├── DescriptionAgent.py
│   ├── EmbedderAgent.py
│   ├── ResponseAgent.py
│   ├── ValidateSQLAgent.py
│   ├── VisualizeAgent.py
│   ├── __init__.py
│   └── core.py
├── app.py
├── backend-apis/
│   ├── README.md
│   ├── __init__.py
│   ├── main.py
│   └── policy.yaml
├── config.ini
├── dbconnectors/
│   ├── BQConnector.py
│   ├── FirestoreConnector.py
│   ├── PgConnector.py
│   ├── __init__.py
│   └── core.py
├── docs/
│   ├── README.md
│   ├── architecture.md
│   ├── best_practices.md
│   ├── changelog.md
│   ├── config_guide.md
│   ├── faq.md
│   └── repo_structure.md
├── embeddings/
│   ├── __init__.py
│   ├── kgq_embeddings.py
│   ├── retrieve_embeddings.py
│   └── store_embeddings.py
├── env_setup.py
├── frontend/
│   ├── .gitignore
│   ├── README.md
│   ├── angular.json
│   ├── database.indexes.json
│   ├── database.rules.json
│   ├── firebase_setup.json
│   ├── frontend-flutter/
│   │   ├── .flutter-plugins
│   │   ├── .flutter-plugins-dependencies
│   │   ├── Open Data QnA - Working Sheet V2 - sample_questions_UI copy.csv
│   │   ├── Open_Data_QnA_sample_questions_v3 copy.csv
│   │   ├── README.md
│   │   ├── analysis_options.yaml
│   │   ├── android/
│   │   │   ├── .gitignore
│   │   │   ├── app/
│   │   │   │   ├── build.gradle
│   │   │   │   ├── google-services.json
│   │   │   │   └── src/
│   │   │   │       ├── debug/
│   │   │   │       │   └── AndroidManifest.xml
│   │   │   │       ├── main/
│   │   │   │       │   ├── AndroidManifest.xml
│   │   │   │       │   ├── kotlin/
│   │   │   │       │   │   └── com/
│   │   │   │       │   │       └── pilotcap/
│   │   │   │       │   │           └── ttmd/
│   │   │   │       │   │               └── MainActivity.kt
│   │   │   │       │   └── res/
│   │   │   │       │       ├── drawable/
│   │   │   │       │       │   └── launch_background.xml
│   │   │   │       │       ├── drawable-v21/
│   │   │   │       │       │   └── launch_background.xml
│   │   │   │       │       ├── values/
│   │   │   │       │       │   └── styles.xml
│   │   │   │       │       └── values-night/
│   │   │   │       │           └── styles.xml
│   │   │   │       └── profile/
│   │   │   │           └── AndroidManifest.xml
│   │   │   ├── build.gradle
│   │   │   ├── gradle/
│   │   │   │   └── wrapper/
│   │   │   │       └── gradle-wrapper.properties
│   │   │   ├── gradle.properties
│   │   │   ├── nl2sql_oss_android.iml
│   │   │   └── settings.gradle
│   │   ├── build/
│   │   │   └── web/
│   │   │       └── .last_build_id
│   │   ├── ios/
│   │   │   ├── .gitignore
│   │   │   ├── Flutter/
│   │   │   │   ├── AppFrameworkInfo.plist
│   │   │   │   ├── Debug.xcconfig
│   │   │   │   └── Release.xcconfig
│   │   │   ├── Podfile
│   │   │   ├── Runner/
│   │   │   │   ├── AppDelegate.swift
│   │   │   │   ├── Assets.xcassets/
│   │   │   │   │   ├── AppIcon.appiconset/
│   │   │   │   │   │   └── Contents.json
│   │   │   │   │   └── LaunchImage.imageset/
│   │   │   │   │       ├── Contents.json
│   │   │   │   │       └── README.md
│   │   │   │   ├── Base.lproj/
│   │   │   │   │   ├── LaunchScreen.storyboard
│   │   │   │   │   └── Main.storyboard
│   │   │   │   ├── GoogleService-Info.plist
│   │   │   │   ├── Info.plist
│   │   │   │   └── Runner-Bridging-Header.h
│   │   │   ├── Runner.xcodeproj/
│   │   │   │   ├── project.pbxproj
│   │   │   │   ├── project.xcworkspace/
│   │   │   │   │   ├── contents.xcworkspacedata
│   │   │   │   │   └── xcshareddata/
│   │   │   │   │       ├── IDEWorkspaceChecks.plist
│   │   │   │   │       └── WorkspaceSettings.xcsettings
│   │   │   │   └── xcshareddata/
│   │   │   │       └── xcschemes/
│   │   │   │           └── Runner.xcscheme
│   │   │   ├── Runner.xcworkspace/
│   │   │   │   ├── contents.xcworkspacedata
│   │   │   │   └── xcshareddata/
│   │   │   │       ├── IDEWorkspaceChecks.plist
│   │   │   │       └── WorkspaceSettings.xcsettings
│   │   │   └── RunnerTests/
│   │   │       └── RunnerTests.swift
│   │   ├── lib/
│   │   │   ├── firebase_options.dart
│   │   │   ├── main.dart
│   │   │   ├── screens/
│   │   │   │   ├── bot.dart
│   │   │   │   ├── bot_chat_view.dart
│   │   │   │   ├── disclaimer.dart
│   │   │   │   └── settings.dart
│   │   │   ├── services/
│   │   │   │   ├── display_stepper/
│   │   │   │   │   ├── display_stepper_cubit.dart
│   │   │   │   │   └── display_stepper_state.dart
│   │   │   │   ├── first_question/
│   │   │   │   │   ├── first_question_cubit.dart
│   │   │   │   │   └── first_question_state.dart
│   │   │   │   ├── load_question/
│   │   │   │   │   ├── load_question_cubit.dart
│   │   │   │   │   └── load_question_state.dart
│   │   │   │   ├── new_suggestions/
│   │   │   │   │   ├── new_suggestion_cubit.dart
│   │   │   │   │   └── new_suggestion_state.dart
│   │   │   │   ├── text_to_doc_question/
│   │   │   │   │   ├── text_to_doc_question_cubit.dart
│   │   │   │   │   └── text_to_doc_question_state.dart
│   │   │   │   ├── update_expert_mode/
│   │   │   │   │   ├── update_expert_mode_cubit.dart
│   │   │   │   │   └── update_expert_mode_state.dart
│   │   │   │   ├── update_popular_questions/
│   │   │   │   │   ├── update_popular_questions_cubit.dart
│   │   │   │   │   └── update_popular_questions_state.dart
│   │   │   │   └── update_stepper/
│   │   │   │       ├── update_stepper_cubit.dart
│   │   │   │       └── update_stepper_state.dart
│   │   │   └── utils/
│   │   │       ├── Input_custom.dart
│   │   │       ├── TextToDocParameter.dart
│   │   │       ├── custom_input_field.dart
│   │   │       ├── most_popular_questions.dart
│   │   │       ├── pdf_viewer.dart
│   │   │       ├── stepper_expert_info.dart
│   │   │       └── tabbed_container.dart
│   │   ├── nl2sql_oss.iml
│   │   ├── pubspec.yaml
│   │   ├── test/
│   │   │   └── widget_test.dart
│   │   └── web/
│   │       ├── index 01.49.28.html
│   │       ├── index.html
│   │       └── manifest.json
│   ├── frontend.yaml
│   ├── package.json
│   ├── server.ts
│   ├── src/
│   │   ├── app/
│   │   │   ├── agent-chat/
│   │   │   │   ├── agent-chat.component.html
│   │   │   │   ├── agent-chat.component.scss
│   │   │   │   ├── agent-chat.component.spec.ts
│   │   │   │   └── agent-chat.component.ts
│   │   │   ├── app-routing.module.ts
│   │   │   ├── app.component.html
│   │   │   ├── app.component.scss
│   │   │   ├── app.component.spec.ts
│   │   │   ├── app.component.ts
│   │   │   ├── app.module.server.ts
│   │   │   ├── app.module.ts
│   │   │   ├── business-user/
│   │   │   │   ├── business-user.component.html
│   │   │   │   ├── business-user.component.scss
│   │   │   │   ├── business-user.component.spec.ts
│   │   │   │   └── business-user.component.ts
│   │   │   ├── grouping-modal/
│   │   │   │   ├── grouping-modal.component.html
│   │   │   │   ├── grouping-modal.component.scss
│   │   │   │   ├── grouping-modal.component.spec.ts
│   │   │   │   └── grouping-modal.component.ts
│   │   │   ├── header/
│   │   │   │   ├── header.component.html
│   │   │   │   ├── header.component.scss
│   │   │   │   ├── header.component.spec.ts
│   │   │   │   └── header.component.ts
│   │   │   ├── home/
│   │   │   │   ├── home.component.html
│   │   │   │   ├── home.component.scss
│   │   │   │   ├── home.component.spec.ts
│   │   │   │   └── home.component.ts
│   │   │   ├── http.interceptor.ts
│   │   │   ├── login/
│   │   │   │   ├── login.component.html
│   │   │   │   ├── login.component.scss
│   │   │   │   ├── login.component.spec.ts
│   │   │   │   └── login.component.ts
│   │   │   ├── login-button/
│   │   │   │   ├── login-button.component.html
│   │   │   │   ├── login-button.component.scss
│   │   │   │   ├── login-button.component.spec.ts
│   │   │   │   └── login-button.component.ts
│   │   │   ├── menu/
│   │   │   │   ├── menu.component.html
│   │   │   │   ├── menu.component.scss
│   │   │   │   ├── menu.component.spec.ts
│   │   │   │   └── menu.component.ts
│   │   │   ├── prism/
│   │   │   │   ├── prism.component.html
│   │   │   │   ├── prism.component.scss
│   │   │   │   ├── prism.component.spec.ts
│   │   │   │   ├── prism.component.ts
│   │   │   │   └── prism.d.ts
│   │   │   ├── scenario-list/
│   │   │   │   ├── scenario-list.component.html
│   │   │   │   ├── scenario-list.component.scss
│   │   │   │   ├── scenario-list.component.spec.ts
│   │   │   │   └── scenario-list.component.ts
│   │   │   ├── shared/
│   │   │   │   └── services/
│   │   │   │       ├── chat.service.spec.ts
│   │   │   │       ├── chat.service.ts
│   │   │   │       ├── home.service.spec.ts
│   │   │   │       ├── home.service.ts
│   │   │   │       ├── login.service.spec.ts
│   │   │   │       ├── login.service.ts
│   │   │   │       ├── shared.service.spec.ts
│   │   │   │       └── shared.service.ts
│   │   │   ├── upload-template/
│   │   │   │   ├── upload-template.component.html
│   │   │   │   ├── upload-template.component.scss
│   │   │   │   ├── upload-template.component.spec.ts
│   │   │   │   └── upload-template.component.ts
│   │   │   ├── user-journey/
│   │   │   │   ├── user-journey.component.html
│   │   │   │   ├── user-journey.component.scss
│   │   │   │   ├── user-journey.component.spec.ts
│   │   │   │   └── user-journey.component.ts
│   │   │   └── user-photo/
│   │   │       ├── user-photo.component.html
│   │   │       ├── user-photo.component.scss
│   │   │       ├── user-photo.component.spec.ts
│   │   │       └── user-photo.component.ts
│   │   ├── assets/
│   │   │   ├── .gitkeep
│   │   │   └── constants.ts
│   │   ├── index.html
│   │   ├── main.server.ts
│   │   ├── main.ts
│   │   ├── styles/
│   │   │   └── variables.scss
│   │   └── styles.scss
│   ├── tsconfig.app.json
│   ├── tsconfig.json
│   └── tsconfig.spec.json
├── notebooks/
│   ├── 0_CopyDataToBigQuery.ipynb
│   ├── 0_CopyDataToCloudSqlPG.ipynb
│   ├── 1_Setup_OpenDataQnA.ipynb
│   ├── 2_Run_OpenDataQnA.ipynb
│   └── 3_LoadKnownGoodSQL.ipynb
├── opendataqna.py
├── prompts.yaml
├── pyproject.toml
├── scripts/
│   ├── .~lock.Scenarios Sample.csv#
│   ├── Scenarios Sample.csv
│   ├── __init__.py
│   ├── copy_select_table_column_bigquery.py
│   ├── data_source_list.csv
│   ├── data_source_list_sample.csv
│   ├── known_good_sql.csv
│   ├── save_config.py
│   └── tables_columns_descriptions.csv
├── terraform/
│   ├── .gitignore
│   ├── README.md
│   ├── backend.tf
│   ├── bq.tf
│   ├── embeddings-setup.tf
│   ├── frontend.tf
│   ├── iam.tf
│   ├── locals.tf
│   ├── main.tf
│   ├── outputs.tf
│   ├── pg-vector.tf
│   ├── scripts/
│   │   ├── backend-deployment.sh
│   │   ├── copy-firebase-json.sh
│   │   ├── create-and-store-embeddings.py
│   │   ├── deploy-all.sh
│   │   ├── execute-gcloud-cmd.sh
│   │   ├── execute-python-files.sh
│   │   ├── frontend-deployment.sh
│   │   └── install-dependencies.sh
│   ├── templates/
│   │   ├── config.ini.tftpl
│   │   └── constants.ts.tftpl
│   ├── terraform.tfvars.sample
│   ├── variables.tf
│   └── versions.tf
└── utilities/
    ├── __init__.py
    └── imgs/
        └── aa

Download .txt

SYMBOL INDEX (457 symbols across 77 files)

FILE: agents/BuildSQLAgent.py
  class BuildSQLAgent (line 18) | class BuildSQLAgent(Agent, ABC):
    method __init__ (line 22) | def __init__(self, model_id = 'gemini-1.5-pro'):
    method build_sql (line 26) | def build_sql(self,source_type,user_grouping, user_question,session_hi...
    method rewrite_question (line 135) | def rewrite_question(self,question,session_history):
    method get_last_sql (line 171) | def get_last_sql(self,session_history):

FILE: agents/DebugSQLAgent.py
  class DebugSQLAgent (line 17) | class DebugSQLAgent(Agent, ABC):
    method __init__ (line 78) | def __init__(self, model_id = 'gemini-1.5-pro'):
    method init_chat (line 82) | def init_chat(self,source_type,user_grouping, tables_schema,columns_sc...
    method rewrite_sql_chat (line 114) | def rewrite_sql_chat(self, chat_session, sql, question, error_df):
    method start_debugger (line 151) | def start_debugger  (self,

FILE: agents/DescriptionAgent.py
  class DescriptionAgent (line 5) | class DescriptionAgent(Agent, ABC):
    method generate_llm_response (line 41) | def generate_llm_response(self,prompt):
    method generate_missing_descriptions (line 46) | def generate_missing_descriptions(self,source,table_desc_df, column_na...

FILE: agents/EmbedderAgent.py
  class EmbedderAgent (line 7) | class EmbedderAgent(Agent, ABC):
    method __init__ (line 38) | def __init__(self, mode, embeddings_model='text-embedding-004'):
    method create (line 52) | def create(self, question):

FILE: agents/ResponseAgent.py
  class ResponseAgent (line 12) | class ResponseAgent(Agent, ABC):
    method run (line 36) | def run(self, user_question, sql_result):

FILE: agents/ValidateSQLAgent.py
  class ValidateSQLAgent (line 8) | class ValidateSQLAgent(Agent, ABC):
    method check (line 36) | def check(self,source_type, user_question, tables_schema, columns_sche...

FILE: agents/VisualizeAgent.py
  class VisualizeAgent (line 20) | class VisualizeAgent(Agent, ABC):
    method __init__ (line 70) | def __init__(self):
    method getChartType (line 74) | def getChartType(self,user_question, generated_sql):
    method getChartPrompt (line 87) | def getChartPrompt(self,user_question, generated_sql, chart_type, char...
    method generate_charts (line 99) | def generate_charts(self,user_question,generated_sql,sql_results):

FILE: agents/core.py
  class Agent (line 21) | class Agent(ABC):
    method __init__ (line 28) | def __init__(self,
    method generate_llm_response (line 84) | def generate_llm_response(self,prompt):
    method rewrite_question (line 89) | def rewrite_question(self,question,session_history):

FILE: app.py
  function get_known_databases (line 19) | def get_known_databases():
  function get_known_sql (line 35) | def get_known_sql(selected_schema):
  function generate_sql_results (line 52) | def generate_sql_results(selected_schema,user_question):
  function generate_response (line 93) | def generate_response(prompt):

FILE: backend-apis/main.py
  function jwt_authenticated (line 46) | def jwt_authenticated(func: Callable[..., int]) -> Callable[..., int]:
  function getBDList (line 90) | def getBDList():
  function embedSql (line 114) | async def embedSql():
  function getSQLResult (line 147) | def getSQLResult():
  function getKnownSQL (line 195) | def getKnownSQL():
  function generateSQL (line 224) | async def generateSQL():
  function generateViz (line 272) | async def generateViz():
  function getSummary (line 316) | async def getSummary():
  function getNaturalResponse (line 345) | async def getNaturalResponse():
  function getResultsResponse (line 409) | async def getResultsResponse():

FILE: dbconnectors/BQConnector.py
  function get_auth_user (line 13) | def get_auth_user():
  function bq_specific_data_types (line 21) | def bq_specific_data_types():
  class BQConnector (line 52) | class BQConnector(DBConnector, ABC):
    method __init__ (line 102) | def __init__(self,
    method getconn (line 114) | def getconn(self):
    method retrieve_df (line 118) | def retrieve_df(self,query):
    method make_audit_entry (line 121) | def make_audit_entry(self, source_type, user_grouping, model, question...
    method create_vertex_connection (line 212) | def create_vertex_connection(self, connection_id : str):
    method create_embedding_model (line 220) | def create_embedding_model(self,connection_id: str, embedding_model: s...
    method retrieve_matches (line 227) | def retrieve_matches(self, mode, user_grouping, qe, similarity_thresho...
    method getSimilarMatches (line 286) | def getSimilarMatches(self, mode, user_grouping, qe, num_matches, simi...
    method getExactMatches (line 306) | def getExactMatches(self, query):
    method test_sql_plan_execution (line 332) | def test_sql_plan_execution(self, generated_sql):
    method return_table_schema_sql (line 346) | def return_table_schema_sql(self, dataset, table_names=None):
    method return_column_schema_sql (line 400) | def return_column_schema_sql(self, dataset, table_names=None):
    method get_column_samples (line 451) | def get_column_samples(self,columns_df):

FILE: dbconnectors/FirestoreConnector.py
  function create_unique_id (line 8) | def create_unique_id():
  class FirestoreConnector (line 18) | class FirestoreConnector(DBConnector, ABC):
    method __init__ (line 19) | def __init__(self,
    method log_chat (line 25) | def log_chat(self,session_id, user_question, bot_response,user_id="TES...
    method get_chat_logs_for_session (line 44) | def get_chat_logs_for_session(self,session_id):

FILE: dbconnectors/PgConnector.py
  function pg_specific_data_types (line 21) | def pg_specific_data_types():
  class PgConnector (line 70) | class PgConnector(DBConnector, ABC):
    method __init__ (line 115) | def __init__(self,
    method getconn (line 136) | def getconn(self):
    method retrieve_df (line 153) | def retrieve_df(self, query):
    method cache_known_sql (line 173) | async def cache_known_sql(self):
    method retrieve_matches (line 213) | async def retrieve_matches(self, mode, user_groupinguping, qe, similar...
    method getSimilarMatches (line 318) | async def getSimilarMatches(self, mode, user_grouping, qe, num_matches...
    method test_sql_plan_execution (line 338) | def test_sql_plan_execution(self, generated_sql):
    method getExactMatches (line 363) | def getExactMatches(self, query):
    method return_column_schema_sql (line 395) | def return_column_schema_sql(self, schema, table_names=None):
    method return_table_schema_sql (line 475) | def return_table_schema_sql(self, schema, table_names=None):
    method get_column_samples (line 516) | def get_column_samples(self,columns_df):

FILE: dbconnectors/core.py
  class DBConnector (line 9) | class DBConnector(ABC):
    method __init__ (line 16) | def __init__(self,

FILE: embeddings/kgq_embeddings.py
  function setup_kgq_table (line 18) | async def setup_kgq_table( project_id,
  function store_kgq_embeddings (line 68) | async def store_kgq_embeddings(df_kgq,
  function load_kgq_df (line 165) | def load_kgq_df():

FILE: embeddings/retrieve_embeddings.py
  function get_embedding_chunked (line 14) | def get_embedding_chunked(textinput, batch_size):
  function retrieve_embeddings (line 28) | def retrieve_embeddings(SOURCE, SCHEMA="public", table_names = None):

FILE: embeddings/store_embeddings.py
  function store_schema_embeddings (line 16) | async def store_schema_embeddings(table_details_embeddings,
  function add_sql_embedding (line 181) | async def add_sql_embedding(user_question, generated_sql, database):

FILE: env_setup.py
  function setup_postgresql (line 22) | def setup_postgresql(pg_instance, pg_region, pg_database, pg_user, pg_pa...
  function create_vector_store (line 79) | def create_vector_store():
  function get_embeddings (line 162) | def get_embeddings():
  function store_embeddings (line 269) | async def store_embeddings(table_schema_embeddings, col_schema_embeddings):
  function create_kgq_sql_table (line 323) | async def create_kgq_sql_table():
  function store_kgq_sql_embeddings (line 376) | async def store_kgq_sql_embeddings():
  function create_firestore_db (line 439) | def create_firestore_db(firestore_region=FIRESTORE_REGION,firestore_data...

FILE: frontend/frontend-flutter/lib/firebase_options.dart
  class DefaultFirebaseOptions (line 17) | class DefaultFirebaseOptions {

FILE: frontend/frontend-flutter/lib/main.dart
  function main (line 61) | void main()
  class ttmd (line 91) | class ttmd extends StatelessWidget {
    method build (line 96) | Widget build(BuildContext context)
  class ContentTtmd (line 192) | class ContentTtmd extends StatefulWidget {
    method createState (line 198) | State<ContentTtmd> createState()
  class _ContentTtmdState (line 201) | class _ContentTtmdState extends State<ContentTtmd> {
    method CreateNodesMostPopularQuestion (line 247) | List<TreeNode<String>> CreateNodesMostPopularQuestion(
    method _addData (line 263) | void _addData(String data, bool isHistory)
    method _nodeSelected (line 340) | void _nodeSelected(context, nodeValue)
    method _nodeSelected1 (line 384) | void _nodeSelected1(context, nodeValue)
    method _nodeBuilder (line 393) | Widget _nodeBuilder(context, nodeValue)
    method _nodeBuilder2 (line 402) | Widget _nodeBuilder2(context, nodeValue)
    method _nodeBuilder1 (line 460) | Widget _nodeBuilder1(context, nodeValue)
    method _nodeBuilder3 (line 484) | Widget _nodeBuilder3(context, nodeValue)
    method _createSideMenu (line 514) | FutureBuilder<List<List<dynamic>>?> _createSideMenu()
    method initState (line 803) | void initState()
    method setup (line 808) | Future<void> setup()
    method initializeFirestore (line 824) | Future<void> initializeFirestore()
    method loadQuestionsFromFirestore (line 829) | Future<List<List<dynamic>>?> loadQuestionsFromFirestore()
    method loadCfgFromFirestore (line 878) | Future<void> loadCfgFromFirestore()
    method _getUserGrouping (line 950) | Future<List<String>> _getUserGrouping()
    method _getQuestions (line 1010) | Future<List<Map<String, dynamic>>> _getQuestions()
    method build (line 1017) | Widget build(BuildContext context)
    method _onItemTapped (line 1362) | void _onItemTapped(int index)
    method _createPDF (line 1368) | Future<List<int>> _createPDF()
    method displayDateTime (line 1376) | String displayDateTime()
    method createCards (line 1384) | Widget createCards(
    method makeSuggestions (line 1534) | Padding makeSuggestions(
    method createCardsSuggestion (line 1717) | Widget createCardsSuggestion(
    method _createDebugInfoCard (line 1853) | Widget _createDebugInfoCard(Widget leading, String title, String subti...
    method _buildStepper (line 1922) | Container _buildStepper()
    method setStepsStates (line 1952) | void setStepsStates(UpdateStepperState state)
    method _dialogRequestGenerated (line 2089) | Future<void> _dialogRequestGenerated(
    method loadImageAsset (line 2359) | Future<ui.Image> loadImageAsset(String assetPath)
    method convertImageToListInt (line 2367) | Future<List<int>> convertImageToListInt(String assetPath)
    method _loadFile (line 2373) | Future<Uint8List> _loadFile(String uri)
    method importQuestions (line 2381) | Future<List<List<dynamic>>> importQuestions()
    method checkImportedCSVFile (line 2438) | void checkImportedCSVFile()
    method noCfgStoredinFirestore (line 2466) | void noCfgStoredinFirestore()
    method createQuestionList (line 2575) | List<TreeNode<String>> createQuestionList(List<List<dynamic>> question...
    method getQuestionCount (line 2755) | int getQuestionCount(List<List<dynamic>> questionList, String scenario)
    method _getLastQuestions (line 2775) | Future<List<Map<String, dynamic>>> _getLastQuestions()
    method _getLastQuestionsOld (line 2812) | Future<List<String>> _getLastQuestionsOld(String userGrouping)
    method SaveImportedQuestionsToFirestore (line 2914) | void SaveImportedQuestionsToFirestore(
  class Config (line 2965) | class Config {

FILE: frontend/frontend-flutter/lib/screens/bot.dart
  function randomString (line 33) | String randomString()
  class Bot (line 39) | class Bot extends StatefulWidget {
    method createState (line 45) | State<Bot> createState()
  class BotState (line 48) | class BotState extends State<Bot> with SingleTickerProviderStateMixin {
    method initState (line 98) | void initState()
    method dispose (line 104) | void dispose()
    method setup (line 110) | Future<void> setup()
    method build (line 115) | Widget build(BuildContext context)
    method dummyFunction (line 179) | Text dummyFunction()
    method generateSnapshot (line 188) | Future<void> generateSnapshot()
    method _addMessage (line 226) | void _addMessage(types.Message message)
    method _handleSendPressed (line 237) | void _handleSendPressed(types.PartialText message)
    method _handleReceivedResponse (line 290) | void _handleReceivedResponse(String msg, String type)
    method getChatResponseTextToDoCStream (line 426) | Future<List<Object>?> getChatResponseTextToDoCStream(
    method waitForFirstSSEData (line 481) | Future<String> waitForFirstSSEData(
    method getChatResponseTextToDoC (line 533) | Future<List<Object>?> getChatResponseTextToDoC(
    method ShowCapturedWidget (line 633) | Future<dynamic> ShowCapturedWidget(
    method transfromDynamicListToStringList (line 648) | List<String> transfromDynamicListToStringList(List<dynamic> list)
    method _handleFileSelection (line 668) | void _handleFileSelection()
    method _handleMessageTap (line 700) | void _handleMessageTap(BuildContext _, types.Message message)
    method _handlePreviewDataFetched (line 708) | void _handlePreviewDataFetched(
    method _handleAttachmentPressed (line 722) | void _handleAttachmentPressed()
    method _bubbleBuilder (line 764) | Widget _bubbleBuilder(
    method customMessageBuilder (line 785) | Widget customMessageBuilder(types.CustomMessage customMessage,
    method getGoogleGraph (line 1007) | Widget getGoogleGraph(dynamic dataViz)
    method getGoogleTable (line 1052) | Widget getGoogleTable(dynamic dataViz)
    method _generateImage (line 1089) | Future<Uint8List> _generateImage(Widget widget)
    method showFeedbackDialog (line 1103) | void showFeedbackDialog()
    method copyGraphToClipBoard (line 1181) | Future<void> copyGraphToClipBoard(String imageKey, String summaryText)
    method getChatResponseNew (line 1214) | Future<List<Object>?> getChatResponseNew(String msg, String mime, Stri...
    method extractContentGenerateSQL (line 1398) | String extractContentGenerateSQL(String jsonResponse)
    method extractContentResultsOpenDataQnA (line 1421) | Future<List<Object>> extractContentResultsOpenDataQnA(
    method getDataVisualization (line 1534) | Future<dynamic> getDataVisualization(
    method setBubbleColor (line 1627) | List<String> setBubbleColor(String text, {types.User? user})
    method countOccurences (line 1664) | int countOccurences(String mainString, String search)
    method _dialogExtension (line 1677) | Future<void> _dialogExtension(BuildContext context, String extension)
    method displayDateTime (line 1707) | String displayDateTime()
    method avatarBuilder (line 1727) | Widget avatarBuilder(types.User user)
    method anonymizedData (line 1743) | String anonymizedData(String responseRunQuery)
    method generateRandomString (line 1820) | String generateRandomString(int length)
    method _updateUserGroupingInSessionLogs (line 1829) | Future<void> _updateUserGroupingInSessionLogs()
    method createPaginatedTable (line 1877) | PaginatedDataTable? createPaginatedTable(String data)
  class OpenDataQnASource (line 1929) | class OpenDataQnASource extends DataTableSource {
    method getRow (line 1938) | DataRow? getRow(int index)

FILE: frontend/frontend-flutter/lib/screens/bot_chat_view.dart
  class BotChatView (line 5) | class BotChatView extends StatefulWidget {
    method createState (line 9) | State<BotChatView> createState()
  class BotChatViewState (line 12) | class BotChatViewState extends State<BotChatView> {
    method initState (line 19) | void initState()
    method build (line 59) | Widget build(BuildContext context)
    method onSendTap (line 83) | void onSendTap(String message, ReplyMessage replyMessage, MessageType ...

FILE: frontend/frontend-flutter/lib/screens/disclaimer.dart
  class Disclaimer (line 10) | class Disclaimer extends StatefulWidget {
    method createState (line 16) | State<Disclaimer> createState()
  class DisclaimerState (line 19) | class DisclaimerState extends State<Disclaimer> {
    method build (line 26) | Widget build(BuildContext context)
    method signInWithGoogle (line 306) | Future<UserCredential> signInWithGoogle()
    method firebaseAuthentication (line 320) | Future<String?> firebaseAuthentication()

FILE: frontend/frontend-flutter/lib/screens/settings.dart
  class Settings (line 11) | class Settings extends StatefulWidget {
    method createState (line 31) | State<Settings> createState()
  class SettingsState (line 34) | class SettingsState extends State<Settings> {
    method build (line 36) | Widget build(BuildContext context)
    method importFrontEndCfgFile (line 281) | void importFrontEndCfgFile()
    method showSuccessfulUploadMsg (line 361) | void showSuccessfulUploadMsg()
    method displayCfgUploadErrorMsg (line 387) | void displayCfgUploadErrorMsg()
    method updateFrontEndFlutterCfg (line 437) | void updateFrontEndFlutterCfg({required String parameter, required boo...
  class Config (line 467) | class Config {
    method toString (line 477) | String toString()

FILE: frontend/frontend-flutter/lib/services/display_stepper/display_stepper_cubit.dart
  class DisplayStepperCubit (line 8) | class DisplayStepperCubit extends Cubit<DisplayStepperState> {
    method displayStepper (line 14) | Future<void> displayStepper(bool isDisplay)

FILE: frontend/frontend-flutter/lib/services/display_stepper/display_stepper_state.dart
  type displayStepperStatus (line 4) | enum displayStepperStatus {display_stepper,remove_stepper}
  class DisplayStepperState (line 6) | class DisplayStepperState extends Equatable {
    method copyWith (line 16) | DisplayStepperState copyWith({

FILE: frontend/frontend-flutter/lib/services/first_question/first_question_cubit.dart
  class FirstQuestionCubit (line 5) | class FirstQuestionCubit extends Cubit<FirstQuestionState> {
    method removeWelcomeMessage (line 11) | Future<void> removeWelcomeMessage({String? message = ""})

FILE: frontend/frontend-flutter/lib/services/first_question/first_question_state.dart
  type firstQuestionStatus (line 4) | enum firstQuestionStatus {display_welcome_message,remove_welcome_message}
  class FirstQuestionState (line 6) | class FirstQuestionState extends Equatable {
    method copyWith (line 18) | FirstQuestionState copyWith({

FILE: frontend/frontend-flutter/lib/services/load_question/load_question_cubit.dart
  class LoadQuestionCubit (line 7) | class LoadQuestionCubit extends Cubit<LoadQuestionState> {
    method loadQuestionToChat (line 13) | Future<void> loadQuestionToChat({String? question, String ? time})

FILE: frontend/frontend-flutter/lib/services/load_question/load_question_state.dart
  type LoadQuestionStatus (line 3) | enum LoadQuestionStatus {initial,loaded, error}
  class LoadQuestionState (line 5) | class LoadQuestionState extends Equatable {
    method copyWith (line 19) | LoadQuestionState copyWith({

FILE: frontend/frontend-flutter/lib/services/new_suggestions/new_suggestion_cubit.dart
  class NewSuggestionCubit (line 10) | class NewSuggestionCubit extends Cubit<NewSuggestionState> {
    method generateNewSuggestions (line 18) | Future<void> generateNewSuggestions(int scenarioNumber, String question,
    method getAllquestions (line 199) | Future<void> getAllquestions(String userGrouping)
    method displayDateTime (line 268) | String displayDateTime()
    method pickUpNextQuestions (line 276) | List<String> pickUpNextQuestions(int scenarioNumber, String question)
    method getIndexOfQuestion (line 281) | int getIndexOfQuestion(
    method pickUpRandomQuestion (line 296) | List<String> pickUpRandomQuestion(

FILE: frontend/frontend-flutter/lib/services/new_suggestions/new_suggestion_state.dart
  type NewSuggestionStateStatus (line 4) | enum NewSuggestionStateStatus {initial,loading,loaded, all_questions_loa...
  class NewSuggestionState (line 6) | class NewSuggestionState extends Equatable {
    method copyWith (line 24) | NewSuggestionState copyWith({

FILE: frontend/frontend-flutter/lib/services/text_to_doc_question/text_to_doc_question_cubit.dart
  class TextToDocQuestionCubit (line 5) | class TextToDocQuestionCubit extends Cubit<TextToDocQuestionState> {
    method switchToTextToDoc (line 11) | Future<void> switchToTextToDoc({required bool isTextToDoc, String? mes...

FILE: frontend/frontend-flutter/lib/services/text_to_doc_question/text_to_doc_question_state.dart
  type textToDocStatus (line 3) | enum textToDocStatus {not_text_to_doc,text_to_doc}
  class TextToDocQuestionState (line 5) | class TextToDocQuestionState extends Equatable {
    method copyWith (line 17) | TextToDocQuestionState copyWith({

FILE: frontend/frontend-flutter/lib/services/update_expert_mode/update_expert_mode_cubit.dart
  class UpdateExpertModeCubit (line 7) | class UpdateExpertModeCubit extends Cubit<UpdateExpertModeState> {
    method updateExpertMode (line 13) | Future<void> updateExpertMode(bool isDisplay)

FILE: frontend/frontend-flutter/lib/services/update_expert_mode/update_expert_mode_state.dart
  type updateExpertModeStatus (line 4) | enum updateExpertModeStatus {expert_mode_on,expert_mode_off}
  class UpdateExpertModeState (line 6) | class UpdateExpertModeState extends Equatable {
    method copyWith (line 16) | UpdateExpertModeState copyWith({

FILE: frontend/frontend-flutter/lib/services/update_popular_questions/update_popular_questions_cubit.dart
  class UpdatePopularQuestionsCubit (line 6) | class UpdatePopularQuestionsCubit extends Cubit<UpdatePopularQuestionsSt...
    method updateMostPopularQuestions (line 17) | Future<void> updateMostPopularQuestions(

FILE: frontend/frontend-flutter/lib/services/update_popular_questions/update_popular_questions_state.dart
  type UpdateMostPopularQuestionStatus (line 6) | enum UpdateMostPopularQuestionStatus {initial,loaded}
  class UpdatePopularQuestionsState (line 8) | class UpdatePopularQuestionsState extends Equatable {
    method copyWith (line 22) | UpdatePopularQuestionsState copyWith({

FILE: frontend/frontend-flutter/lib/services/update_stepper/update_stepper_cubit.dart
  class UpdateStepperCubit (line 9) | class UpdateStepperCubit extends Cubit<UpdateStepperState> {
    method updateStepperStatusUploaded (line 23) | Future<void> updateStepperStatusUploaded(

FILE: frontend/frontend-flutter/lib/services/update_stepper/update_stepper_state.dart
  type StepperStatus (line 6) | enum StepperStatus {
  class UpdateStepperState (line 20) | class UpdateStepperState extends Equatable {
    method copyWith (line 43) | UpdateStepperState copyWith(

FILE: frontend/frontend-flutter/lib/utils/Input_custom.dart
  class InputCustom (line 5) | class InputCustom extends Input {

FILE: frontend/frontend-flutter/lib/utils/TextToDocParameter.dart
  class TextToDocParameter (line 1) | class TextToDocParameter {

FILE: frontend/frontend-flutter/lib/utils/custom_input_field.dart
  class CustomInputField (line 25) | class CustomInputField extends StatefulWidget {
    method createState (line 54) | State<CustomInputField> createState()
  class _InputState (line 58) | class _InputState extends State<CustomInputField> {
    method initState (line 86) | void initState()
    method _handleSendButtonVisibilityModeChange (line 94) | void _handleSendButtonVisibilityModeChange()
    method _handleSendPressed (line 108) | void _handleSendPressed()
    method _handleTextControllerChange (line 122) | void _handleTextControllerChange()
    method _inputBuilder (line 131) | Widget _inputBuilder()
    method loadCfgFromFirestore (line 281) | Future<void> loadCfgFromFirestore()
    method getAllquestions (line 343) | Future<List<Suggestion>> getAllquestions()
    method getAllquestionsFromUserGroup (line 368) | Future<List<String>> getAllquestionsFromUserGroup(String userGrouping)
    method _getUserGrouping (line 428) | Future<List<String>> _getUserGrouping()
    method didUpdateWidget (line 487) | void didUpdateWidget(covariant CustomInputField oldWidget)
    method dispose (line 496) | void dispose()
    method build (line 503) | Widget build(BuildContext context)
    method suggestionsCallback (line 511) | Future<List<Suggestion>> suggestionsCallback(String pattern)
    method suggestionsEmptyCallback (line 521) | Future<List<Suggestion>> suggestionsEmptyCallback(String pattern)
  class InputOptions (line 536) | @immutable
  class Suggestion (line 589) | class Suggestion {

FILE: frontend/frontend-flutter/lib/utils/most_popular_questions.dart
  class MostPopularQ (line 1) | class MostPopularQ {
    method toString (line 8) | String toString()

FILE: frontend/frontend-flutter/lib/utils/pdf_viewer.dart
  class PdfViewer (line 8) | class PdfViewer extends StatefulWidget {
    method createState (line 12) | _PdfViewerState createState()
  class _PdfViewerState (line 15) | class _PdfViewerState extends State<PdfViewer> {
    method build (line 24) | Widget build(BuildContext context)

FILE: frontend/frontend-flutter/lib/utils/stepper_expert_info.dart
  class StepperExpertInfo (line 1) | class StepperExpertInfo {

FILE: frontend/frontend-flutter/lib/utils/tabbed_container.dart
  class TabbedContainer (line 4) | class TabbedContainer extends StatefulWidget {
    method createState (line 19) | _TabbedContainerState createState()
  class _TabbedContainerState (line 22) | class _TabbedContainerState extends State<TabbedContainer>
    method initState (line 27) | void initState()
    method build (line 39) | Widget build(BuildContext context)

FILE: frontend/frontend-flutter/test/widget_test.dart
  function main (line 13) | void main()

FILE: frontend/server.ts
  function app (line 9) | function app(): express.Express {
  function run (line 46) | function run(): void {

FILE: frontend/src/app/agent-chat/agent-chat.component.ts
  class AgentChatComponent (line 14) | class AgentChatComponent implements AfterViewInit {
    method constructor (line 39) | constructor(public homeService: HomeService, private snackBar: MatSnac...
    method ngOnInit (line 40) | ngOnInit() {
    method ngAfterContentChecked (line 57) | ngAfterContentChecked() {
    method ngAfterViewInit (line 61) | ngAfterViewInit() {
    method getResultforSql (line 66) | getResultforSql() {
    method tabClick (line 100) | async tabClick(event: any, displayedColumns: any) {
    method visualizeBtn (line 108) | visualizeBtn(msg: any, ind: any) {
    method onChange (line 123) | onChange(value: any, ind: any, key: any) {
    method thumbsUp (line 129) | thumbsUp(sql: any, ind: any) {
    method updateLocalMessage (line 154) | updateLocalMessage(ind: any, res: any, data: any) {
    method showSnackbarCssStyles (line 180) | showSnackbarCssStyles(content: any, action: any, duration: any) {
    method showContentCopiedMsg (line 190) | showContentCopiedMsg() {
    method closeFeedback (line 194) | closeFeedback() {
    method thumbsDown (line 198) | thumbsDown() {
    method submitFeedback (line 202) | submitFeedback(ind: any, comment: any) {
    method feedbackOption (line 213) | feedbackOption(val: any) {
    method ngOnDestroy (line 223) | ngOnDestroy() {

FILE: frontend/src/app/app-routing.module.ts
  class AppRoutingModule (line 19) | class AppRoutingModule { }

FILE: frontend/src/app/app.component.ts
  class AppComponent (line 8) | class AppComponent {
    method constructor (line 11) | constructor() {}
    method ngOnInit (line 12) | ngOnInit() {}

FILE: frontend/src/app/app.module.server.ts
  class AppServerModule (line 13) | class AppServerModule {}

FILE: frontend/src/app/app.module.ts
  class AppModule (line 145) | class AppModule { }

FILE: frontend/src/app/business-user/business-user.component.ts
  type Tabledata (line 12) | interface Tabledata {
  class BusinessUserComponent (line 21) | class BusinessUserComponent {
    method constructor (line 53) | constructor(public loginService: LoginService, public homeService: Hom...
    method ngOnChanges (line 65) | ngOnChanges(changes: SimpleChanges) {
    method ngOnInit (line 80) | ngOnInit() {
    method reloadComponent (line 84) | reloadComponent(self: boolean, urlToNavigateTo?: string) {
    method loadInitialChat (line 95) | loadInitialChat() {
    method followUp (line 106) | followUp(query: any, event?: any) {
    method suggestionResult (line 132) | suggestionResult(selectedsql: any) {
    method showSnackbarCssStyles (line 142) | showSnackbarCssStyles(content: any, action: any, duration: any) {
    method updateStyleItem (line 151) | updateStyleItem(value: boolean) {
    method showContentCopiedMsg (line 154) | showContentCopiedMsg() {
    method ngOnDestroy (line 158) | ngOnDestroy() {

FILE: frontend/src/app/grouping-modal/grouping-modal.component.ts
  class GroupingModalComponent (line 11) | class GroupingModalComponent {
    method constructor (line 12) | constructor(public dialogRef: MatDialogRef<GroupingModalComponent>) {
    method closeDialog (line 15) | closeDialog() {

FILE: frontend/src/app/header/header.component.ts
  class HeaderComponent (line 8) | class HeaderComponent {
    method constructor (line 9) | constructor() { }
    method ngOnInit (line 11) | ngOnInit() {

FILE: frontend/src/app/home/home.component.ts
  class HomeComponent (line 16) | class HomeComponent {
    method constructor (line 39) | constructor(private homeService: HomeService, private observer: Breakp...
    method ngOnInit (line 46) | async ngOnInit() {
    method groupingValAndKnownSql (line 74) | groupingValAndKnownSql() {
    method changeDb (line 98) | changeDb(dbtype: any) {
    method updateBackgroundStyle (line 111) | updateBackgroundStyle(data: boolean) {
    method checkSideNavTAb (line 114) | checkSideNavTAb(data: any) {
    method sendHistory (line 121) | sendHistory(data: any) {
    method toggleMenu (line 125) | toggleMenu() {
    method ngOnDestroy (line 133) | ngOnDestroy() {

FILE: frontend/src/app/http.interceptor.ts
  class AppHttpInterceptor (line 7) | class AppHttpInterceptor implements HttpInterceptor {
    method constructor (line 12) | constructor(public loginService: LoginService) {
    method intercept (line 19) | intercept(req: HttpRequest<any>, next: HttpHandler): Observable<HttpEv...

FILE: frontend/src/app/login-button/login-button.component.ts
  class LoginButtonComponent (line 11) | class LoginButtonComponent {
    method constructor (line 14) | constructor(public fireservice: SharedService, public loginService: Lo...
    method getLogin (line 17) | getLogin() {
    method updateUserData (line 26) | updateUserData(userDetails: any): void {

FILE: frontend/src/app/login/login.component.ts
  class LoginComponent (line 15) | class LoginComponent {
    method constructor (line 21) | constructor(private _router: Router, public loginService: LoginService...
    method ngAfterViewInit (line 32) | ngAfterViewInit() {
    method navigateToUserJourney (line 39) | navigateToUserJourney() {
    method showLogIn (line 50) | showLogIn(): void {
    method checkboxChecked (line 58) | checkboxChecked(event: any) {
    method showSnackbarCssStyles (line 66) | showSnackbarCssStyles(content: any, action: any, duration: any) {

FILE: frontend/src/app/menu/menu.component.ts
  class MenuComponent (line 18) | class MenuComponent {
    method constructor (line 40) | constructor(public _router: Router, public homeService: HomeService, p...
    method ngOnInit (line 43) | ngOnInit() {
    method ngOnChanges (line 50) | ngOnChanges(changes: SimpleChanges) {
    method showMore (line 87) | showMore() {
    method onMenuClick (line 90) | onMenuClick(item: 'Query' | 'New Query' | 'Reports' | 'History' | 'Ope...
    method onClickHistory (line 102) | onClickHistory(chatThread: any) {
    method openDialog (line 117) | openDialog() {
    method uploadTemplate (line 126) | uploadTemplate() {
    method onFileChange (line 137) | onFileChange(fileInput: any) {
    method arrToObject (line 156) | arrToObject(arr: any[], header: any[]) {

FILE: frontend/src/app/prism/prism.component.ts
  class PrismComponent (line 10) | class PrismComponent {
    method constructor (line 15) | constructor() { }
    method ngAfterViewInit (line 17) | ngAfterViewInit() {
    method ngOnChanges (line 20) | ngOnChanges(changes: any): void {

FILE: frontend/src/app/scenario-list/scenario-list.component.ts
  type Question (line 12) | interface Question {
  type ScenarioNode (line 17) | interface ScenarioNode {
  class ScenarioListComponent (line 29) | class ScenarioListComponent {
    method constructor (line 37) | constructor(public homeService: HomeService, public chatService: ChatS...
    method ngOnChanges (line 43) | ngOnChanges(changes: SimpleChanges) {
    method ngOnInit (line 56) | ngOnInit() {
    method formatCsvData (line 60) | formatCsvData() {
    method onClickScenario (line 99) | onClickScenario(question: any, scenario: any) {
    method resetSelectedScenario (line 119) | resetSelectedScenario() {
    method constructNestedTree (line 122) | constructNestedTree(questions: any[]) {

FILE: frontend/src/app/shared/services/chat.service.ts
  type Message (line 6) | interface Message {
  type ChatSession (line 19) | interface ChatSession {
  class ChatService (line 27) | class ChatService {
    method constructor (line 40) | constructor(public homeService: HomeService) {
    method createNewSession (line 52) | createNewSession() {
    method addToSessionThread (line 64) | addToSessionThread(message: Message) {
    method addQuestion (line 75) | addQuestion(question: string, userId: string, agentCase: string, selec...
    method generate_sql (line 98) | generate_sql(question: string, userId: string) {
    method generate_history_thread (line 115) | generate_history_thread(selectedHistory: any) {

FILE: frontend/src/app/shared/services/home.service.ts
  class HomeService (line 10) | class HomeService {
    method constructor (line 24) | constructor(public http: HttpClient) { }
    method ngOnInit (line 26) | ngOnInit() { }
    method getUserSessions (line 28) | getUserSessions(userId: string) {
    method getAvailableDatabases (line 35) | getAvailableDatabases(): any {
    method sqlSuggestionList (line 46) | sqlSuggestionList(grouping: any, dbtype: any) {
    method generateSql (line 66) | generateSql(userQuestion: any, grouping: any, session_id: any, user_id...
    method handleError (line 87) | private handleError(error: HttpErrorResponse) {
    method setAvailableDBList (line 96) | setAvailableDBList(databaseList: string) {
    method getAvailableDBList (line 99) | getAvailableDBList(): string {
    method setSelectedDbGrouping (line 102) | setSelectedDbGrouping(selectedDBGroup: any) {
    method getSelectedDbGrouping (line 105) | getSelectedDbGrouping(): string {
    method setselectedDbName (line 109) | setselectedDbName(databaseList: any) {
    method getselectedDbName (line 112) | getselectedDbName(): string {
    method setSessionId (line 116) | setSessionId(session_id: any) {
    method getSessionId (line 119) | getSessionId(): string {
    method getChatMsgs (line 123) | getChatMsgs(): any[] {
    method updateChatMsgs (line 127) | updateChatMsgs(chatMsgs: any) {
    method getSelectedHistory (line 131) | getSelectedHistory() {
    method updateSelectedHistory (line 135) | updateSelectedHistory(selectedHistory: any) {
    method updateChatMsgsAtIndex (line 139) | updateChatMsgsAtIndex(chatMsg: any, ind: any) {
    method runQuery (line 143) | runQuery(query: any, grouping: any, user_question: any, session_id: an...
    method thumbsUp (line 162) | thumbsUp(sql: any, user_question: any, selectedGrouping: any, session_...
    method generateViz (line 182) | generateViz(question: any, query: any, result: any, session_id: any) {

FILE: frontend/src/app/shared/services/login.service.ts
  class LoginService (line 7) | class LoginService {
    method constructor (line 9) | constructor() { }
    method getLoginError (line 16) | getLoginError(): any {
    method updateLoginError (line 19) | updateLoginError(msg: any) {
    method getUserDetails (line 22) | getUserDetails(): Observable<any> {
    method getIdToken (line 25) | getIdToken(): any {
    method setIdToken (line 29) | setIdToken(token: any) {
    method sendUserDetails (line 33) | sendUserDetails(message: any) {

FILE: frontend/src/app/shared/services/shared.service.ts
  class SharedService (line 9) | class SharedService {
    method constructor (line 13) | constructor(public loginservice: LoginService) { }
    method googleSignin (line 15) | async googleSignin() {

FILE: frontend/src/app/upload-template/upload-template.component.ts
  class UploadTemplateComponent (line 11) | class UploadTemplateComponent {
    method constructor (line 12) | constructor(public dialogRef: MatDialogRef<UploadTemplateComponent>) {
    method closeDialog (line 15) | closeDialog() {

FILE: frontend/src/app/user-journey/user-journey.component.ts
  class UserJourneyComponent (line 12) | class UserJourneyComponent implements AfterViewInit  {
    method constructor (line 27) | constructor(public _router: Router, public loginService: LoginService,...
    method onDemoVideoClick (line 36) | onDemoVideoClick(){
    method ngOnInit (line 39) | ngOnInit() {
    method ngAfterViewInit (line 50) | ngAfterViewInit(){
    method navigateToHome (line 54) | async navigateToHome(userTitle: String) {
    method ngOnDestroy (line 69) | ngOnDestroy() {

FILE: frontend/src/app/user-photo/user-photo.component.ts
  class UserPhotoComponent (line 14) | class UserPhotoComponent {
    method constructor (line 22) | constructor(private _router: Router, public dialog: Dialog, public log...
    method showLogIn (line 47) | showLogIn(): void {
    method ngOnDestroy (line 55) | ngOnDestroy() {

FILE: frontend/src/assets/constants.ts
  constant ENDPOINT_OPENDATAQNA (line 9) | const ENDPOINT_OPENDATAQNA = 'https://opendataqna-kdr33rftkq-uc.a.run.app'
  constant FIRESTORE_DATABASE_ID (line 11) | const FIRESTORE_DATABASE_ID = 'opendataqna-session-logs'

FILE: opendataqna.py
  function generate_uuid (line 28) | def generate_uuid():
  function get_all_databases (line 41) | def get_all_databases():
  function get_source_type (line 85) | def get_source_type(user_grouping):
  function generate_sql (line 129) | async def generate_sql(session_id,
  function get_results (line 324) | def get_results(user_grouping, final_sql, invalid_response=False, EXECUT...
  function get_response (line 384) | def get_response(session_id,user_question,result_df,Responder_model='gem...
  function run_pipeline (line 410) | async def run_pipeline(session_id,
  function get_kgq (line 498) | def get_kgq(user_grouping):
  function embed_sql (line 548) | async def embed_sql(session_id,user_grouping,user_question,generate_sql):
  function visualize (line 592) | def visualize(session_id,user_question,generated_sql,sql_results):

FILE: scripts/copy_select_table_column_bigquery.py
  function copy_tables (line 4) | def copy_tables(project_id, source_dataset, destination_dataset, df):
  function add_table_description (line 45) | def add_table_description(project_id, dataset, df):
  function add_column_description (line 65) | def add_column_description(project_id, dataset, df):

FILE: scripts/save_config.py
  function is_root_dir (line 6) | def is_root_dir():
  function save_config (line 24) | def save_config(embedding_model,

FILE: terraform/scripts/create-and-store-embeddings.py
  function create_and_store_embeddings (line 16) | async def create_and_store_embeddings():

FILE: utilities/__init__.py
  function is_root_dir (line 8) | def is_root_dir():
  function load_yaml (line 24) | def load_yaml(file_path: str) -> dict:
  function format_prompt (line 41) | def format_prompt(context_prompt, **kwargs):

Download .json

Condensed preview — 253 files, each showing path, character count, and a content snippet. Download the .json file or copy for the full structured content (1,150K chars).

[
  {
    "path": ".gitignore",
    "chars": 253,
    "preview": ".venv/\n__pycache__/\nagents/__pycache__/\napplication_default_credentials.json\ndatabases/__pycache__/\nembeddings/__pycache"
  },
  {
    "path": "CODE_OF_CONDUCT.md",
    "chars": 4555,
    "preview": "<!-- # Generated by synthtool. DO NOT EDIT! !-->\n# Code of Conduct\n\n## Our Pledge\n\nIn the interest of fostering an open "
  },
  {
    "path": "CONTRIBUTING.md",
    "chars": 1128,
    "preview": "# How to contribute\n\nWe'd love to accept your patches and contributions to this project.\n\n## Before you begin\n\n### Sign "
  },
  {
    "path": "Dockerfile",
    "chars": 901,
    "preview": "# Use the official lightweight Python image.\n# https://hub.docker.com/_/python\nFROM python:3.9-slim\n# Allow statements a"
  },
  {
    "path": "LICENSE",
    "chars": 11358,
    "preview": "\n                                 Apache License\n                           Version 2.0, January 2004\n                  "
  },
  {
    "path": "MANIFEST.in",
    "chars": 895,
    "preview": "# -*- coding: utf-8 -*-\n#\n# Copyright 2023 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\")"
  },
  {
    "path": "OWNERS",
    "chars": 112,
    "preview": "msubasioglu@google.com\nsteveswalker@google.com\nkpatlolla@google.com\nsrilakshmil@google.com\nmokshazna@google.com\n"
  },
  {
    "path": "README.md",
    "chars": 22557,
    "preview": "<p align=\"center\">\n    <a href=\"utilities/imgs/aaie.png\">\n        <img src=\"utilities/imgs/aaie.png\" alt=\"aaie image\" wi"
  },
  {
    "path": "SECURITY.md",
    "chars": 329,
    "preview": "# Security Policy\n\nTo report a security issue, please use [g.co/vulnz](https://g.co/vulnz).\n\nThe Google Security Team wi"
  },
  {
    "path": "agents/BuildSQLAgent.py",
    "chars": 6514,
    "preview": "from abc import ABC\nfrom vertexai.language_models import CodeChatModel\nfrom vertexai.generative_models import Generative"
  },
  {
    "path": "agents/DebugSQLAgent.py",
    "chars": 10452,
    "preview": "from abc import ABC\n\nimport vertexai\nfrom vertexai.language_models import CodeChatModel\nfrom vertexai.generative_models "
  },
  {
    "path": "agents/DescriptionAgent.py",
    "chars": 6662,
    "preview": "from abc import ABC\nfrom .core import Agent \n\n\nclass DescriptionAgent(Agent, ABC):\n    \"\"\"\n    An agent specialized in g"
  },
  {
    "path": "agents/EmbedderAgent.py",
    "chars": 2758,
    "preview": "from abc import ABC\nfrom .core import Agent \nfrom vertexai.language_models import TextEmbeddingModel\n\n\n\nclass EmbedderAg"
  },
  {
    "path": "agents/ResponseAgent.py",
    "chars": 2371,
    "preview": "import json \nfrom abc import ABC\nfrom .core import Agent\nfrom utilities import PROMPTS, format_prompt \nfrom vertexai.gen"
  },
  {
    "path": "agents/ValidateSQLAgent.py",
    "chars": 2858,
    "preview": "import json \nfrom abc import ABC\nfrom .core import Agent\nfrom utilities import PROMPTS, format_prompt \n\n\n\nclass Validate"
  },
  {
    "path": "agents/VisualizeAgent.py",
    "chars": 6566,
    "preview": "#This agent generates google charts code for displaying charts on web application\n\n#Generates two charts with elements \""
  },
  {
    "path": "agents/__init__.py",
    "chars": 442,
    "preview": "from .BuildSQLAgent import BuildSQLAgent\nfrom .ValidateSQLAgent import ValidateSQLAgent\nfrom .DebugSQLAgent import Debug"
  },
  {
    "path": "agents/core.py",
    "chars": 5048,
    "preview": "\"\"\"\nProvides the base class for all Agents \n\"\"\"\n\nfrom abc import ABC\nimport vertexai\nfrom google.cloud.aiplatform import"
  },
  {
    "path": "app.py",
    "chars": 5956,
    "preview": "import streamlit as st\nimport pandas as pd\nimport json\nfrom streamlit.components.v1 import html\nfrom streamlit.logger im"
  },
  {
    "path": "backend-apis/README.md",
    "chars": 13395,
    "preview": "\n\n\n\n<h3 style=\"text-align:center;\"> Create Endpoints </h3>\n\n   Here we are going to create publicly accessible endpoints"
  },
  {
    "path": "backend-apis/__init__.py",
    "chars": 0,
    "preview": ""
  },
  {
    "path": "backend-apis/main.py",
    "chars": 14160,
    "preview": "# -*- coding: utf-8 -*-\n\n\n# Copyright 2024 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\")"
  },
  {
    "path": "backend-apis/policy.yaml",
    "chars": 85,
    "preview": "constraint: constraints/iam.allowedPolicyMemberDomains\nlistPolicy:\n  allValues: ALLOW"
  },
  {
    "path": "config.ini",
    "chars": 521,
    "preview": "[CONFIG]\nembedding_model = vertex\ndescription_model = gemini-1.5-pro\nvector_store = bigquery-vector\ndebugging = yes\nlogg"
  },
  {
    "path": "dbconnectors/BQConnector.py",
    "chars": 22033,
    "preview": "\"\"\"\nBigQuery Connector Class\n\"\"\"\nfrom google.cloud import bigquery\nfrom google.cloud import bigquery_connection_v1 as bq"
  },
  {
    "path": "dbconnectors/FirestoreConnector.py",
    "chars": 2339,
    "preview": "from google.cloud import firestore \nfrom google.cloud.exceptions import NotFound\nimport time\nfrom dbconnectors import DB"
  },
  {
    "path": "dbconnectors/PgConnector.py",
    "chars": 20465,
    "preview": "\"\"\"\nPostgreSQL Connector Class \n\"\"\"\nimport asyncpg\nfrom google.cloud.sql.connector import Connector\nfrom sqlalchemy impo"
  },
  {
    "path": "dbconnectors/__init__.py",
    "chars": 760,
    "preview": "from .core import DBConnector\nfrom .PgConnector import PgConnector, pg_specific_data_types\nfrom .BQConnector import BQCo"
  },
  {
    "path": "dbconnectors/core.py",
    "chars": 888,
    "preview": "\"\"\"\nProvides the base class for all Connectors \n\"\"\"\n\n\nfrom abc import ABC\n\n\nclass DBConnector(ABC):\n    \"\"\"\n    The core"
  },
  {
    "path": "docs/README.md",
    "chars": 1310,
    "preview": "This directory contains documentation and resources to help you understand and use the Open Data QnA library effectively"
  },
  {
    "path": "docs/architecture.md",
    "chars": 2578,
    "preview": "Architecture\n-------------\n<p align=\"center\">\n    <a href=\"/utilities/imgs/OpenDataQnA_architecture.png\">\n        <img s"
  },
  {
    "path": "docs/best_practices.md",
    "chars": 6845,
    "preview": "# Open Data QnA: Best Practices\r\n\r\n## General Usage \r\n\r\n### Select the Right Database Connector: \r\nChoose between `PgCon"
  },
  {
    "path": "docs/changelog.md",
    "chars": 5124,
    "preview": "# Release Notes - Open Data QnA v2.0.0\nThis major release brings significant improvements and new features to Open Data "
  },
  {
    "path": "docs/config_guide.md",
    "chars": 1605,
    "preview": "## Follow the below guide to populate your config.ini file: \n\n______________\n\n**[CONFIG]**\n\n**embedding_model = vertex**"
  },
  {
    "path": "docs/faq.md",
    "chars": 4267,
    "preview": "# Open Data QnA: FAQ\r\n\r\n## Source and Vector Store Setup\r\n**Q: If new to the vector store concept, which vector store wo"
  },
  {
    "path": "docs/repo_structure.md",
    "chars": 3434,
    "preview": "Repository Structure \n-------------\n\n```\n.\n├── agents\n  └── __init__.py\n  └── core.py\n  └── BuildSQLAgent.py\n  └── Debug"
  },
  {
    "path": "embeddings/__init__.py",
    "chars": 306,
    "preview": "from .retrieve_embeddings import retrieve_embeddings\nfrom .store_embeddings import store_schema_embeddings\nfrom .kgq_emb"
  },
  {
    "path": "embeddings/kgq_embeddings.py",
    "chars": 9488,
    "preview": "import os\nimport asyncio\nimport asyncpg\nimport pandas as pd\nimport numpy as np\nfrom pgvector.asyncpg import register_vec"
  },
  {
    "path": "embeddings/retrieve_embeddings.py",
    "chars": 7947,
    "preview": "import re\nimport io\nimport sys \nimport pandas as pd\nfrom dbconnectors import pgconnector,bqconnector\nfrom agents import "
  },
  {
    "path": "embeddings/store_embeddings.py",
    "chars": 13796,
    "preview": "import asyncio\nimport asyncpg\nimport pandas as pd\nimport numpy as np\nfrom pgvector.asyncpg import register_vector\nfrom g"
  },
  {
    "path": "env_setup.py",
    "chars": 22227,
    "preview": "\nimport asyncio\nfrom google.cloud import bigquery\nimport google.api_core \n\nfrom embeddings import retrieve_embeddings, s"
  },
  {
    "path": "frontend/.gitignore",
    "chars": 541,
    "preview": "# See http://help.github.com/ignore-files/ for more about ignoring files.\n\n# Compiled output\n/tmp\n/out-tsc\n/bazel-out\n\n#"
  },
  {
    "path": "frontend/README.md",
    "chars": 12146,
    "preview": "<h3 style=\"text-align:center;\"> Deploy Frontend Demo UI </h3>\n\n**Technologies and Components**\n\n* **Framework:** Angular"
  },
  {
    "path": "frontend/angular.json",
    "chars": 3106,
    "preview": "{\n  \"$schema\": \"./node_modules/@angular/cli/lib/config/schema.json\",\n  \"version\": 1,\n  \"newProjectRoot\": \"projects\",\n  \""
  },
  {
    "path": "frontend/database.indexes.json",
    "chars": 333,
    "preview": "{\n  \"indexes\": [\n    {\n      \"collectionGroup\": \"session_logs\",\n      \"queryScope\": \"COLLECTION\",\n      \"fields\": [\n    "
  },
  {
    "path": "frontend/database.rules.json",
    "chars": 183,
    "preview": "rules_version = '2';\nservice cloud.firestore {\n  match /databases/{database}/documents {\n    match /{document=**} {\n    "
  },
  {
    "path": "frontend/firebase_setup.json",
    "chars": 411,
    "preview": "{\n  \"hosting\": {\n    \"public\": \"/dist/frontend/browser\",\n    \"ignore\": [\n      \"firebase.json\",\n      \"**/.*\",\n      \"**"
  },
  {
    "path": "frontend/frontend-flutter/.flutter-plugins",
    "chars": 5035,
    "preview": "# This is a generated file; do not edit or check into version control.\naudio_waveforms=/Users/raimeur/.pub-cache/hosted/"
  },
  {
    "path": "frontend/frontend-flutter/.flutter-plugins-dependencies",
    "chars": 14593,
    "preview": "{\"info\":\"This is a generated file; do not edit or check into version control.\",\"plugins\":{\"ios\":[{\"name\":\"audio_waveform"
  },
  {
    "path": "frontend/frontend-flutter/Open Data QnA - Working Sheet V2 - sample_questions_UI copy.csv",
    "chars": 1285,
    "preview": "user_grouping,scenario,question\nMovieExplorer-bigquery,Genres,What are the top 5 most common movie genres in the dataset"
  },
  {
    "path": "frontend/frontend-flutter/Open_Data_QnA_sample_questions_v3 copy.csv",
    "chars": 1722,
    "preview": "user_grouping,scenario,question,main_question\nMovieExplorer-bigquery,Genres,What are the top 5 most common movie genres "
  },
  {
    "path": "frontend/frontend-flutter/README.md",
    "chars": 19131,
    "preview": "# Deploy the Flutter-based Frontend demo UI\n\n<p align=\"center\">\n    <a>\n        <img src=\"readme_images/opendataqna_logo"
  },
  {
    "path": "frontend/frontend-flutter/analysis_options.yaml",
    "chars": 1420,
    "preview": "# This file configures the analyzer, which statically analyzes Dart code to\n# check for errors, warnings, and lints.\n#\n#"
  },
  {
    "path": "frontend/frontend-flutter/android/.gitignore",
    "chars": 285,
    "preview": "gradle-wrapper.jar\n/.gradle\n/captures/\n/gradlew\n/gradlew.bat\n/local.properties\nGeneratedPluginRegistrant.java\n\n# Remembe"
  },
  {
    "path": "frontend/frontend-flutter/android/app/build.gradle",
    "chars": 2026,
    "preview": "plugins {\n    id \"com.android.application\"\n    // START: FlutterFire Configuration\n    id 'com.google.gms.google-service"
  },
  {
    "path": "frontend/frontend-flutter/android/app/google-services.json",
    "chars": 657,
    "preview": "{\n  \"project_info\": {\n    \"project_number\": \"7413174684\",\n    \"project_id\": \"dic-caa-ra1\",\n    \"storage_bucket\": \"dic-ca"
  },
  {
    "path": "frontend/frontend-flutter/android/app/src/debug/AndroidManifest.xml",
    "chars": 378,
    "preview": "<manifest xmlns:android=\"http://schemas.android.com/apk/res/android\">\n    <!-- The INTERNET permission is required for d"
  },
  {
    "path": "frontend/frontend-flutter/android/app/src/main/AndroidManifest.xml",
    "chars": 1631,
    "preview": "<manifest xmlns:android=\"http://schemas.android.com/apk/res/android\">\n    <application\n        android:label=\"ttmd\"\n    "
  },
  {
    "path": "frontend/frontend-flutter/android/app/src/main/kotlin/com/pilotcap/ttmd/MainActivity.kt",
    "chars": 122,
    "preview": "package com.pilotcap.ttmd\n\nimport io.flutter.embedding.android.FlutterActivity\n\nclass MainActivity: FlutterActivity() {\n"
  },
  {
    "path": "frontend/frontend-flutter/android/app/src/main/res/drawable/launch_background.xml",
    "chars": 434,
    "preview": "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n<!-- Modify this file to customize your launch splash screen -->\n<layer-list xmln"
  },
  {
    "path": "frontend/frontend-flutter/android/app/src/main/res/drawable-v21/launch_background.xml",
    "chars": 438,
    "preview": "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n<!-- Modify this file to customize your launch splash screen -->\n<layer-list xmln"
  },
  {
    "path": "frontend/frontend-flutter/android/app/src/main/res/values/styles.xml",
    "chars": 996,
    "preview": "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n<resources>\n    <!-- Theme applied to the Android Window while the process is sta"
  },
  {
    "path": "frontend/frontend-flutter/android/app/src/main/res/values-night/styles.xml",
    "chars": 995,
    "preview": "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n<resources>\n    <!-- Theme applied to the Android Window while the process is sta"
  },
  {
    "path": "frontend/frontend-flutter/android/app/src/profile/AndroidManifest.xml",
    "chars": 378,
    "preview": "<manifest xmlns:android=\"http://schemas.android.com/apk/res/android\">\n    <!-- The INTERNET permission is required for d"
  },
  {
    "path": "frontend/frontend-flutter/android/build.gradle",
    "chars": 542,
    "preview": "buildscript {\n    ext.kotlin_version = '1.7.10'\n    repositories {\n        google()\n        mavenCentral()\n    }\n\n    de"
  },
  {
    "path": "frontend/frontend-flutter/android/gradle/wrapper/gradle-wrapper.properties",
    "chars": 200,
    "preview": "distributionBase=GRADLE_USER_HOME\ndistributionPath=wrapper/dists\nzipStoreBase=GRADLE_USER_HOME\nzipStorePath=wrapper/dist"
  },
  {
    "path": "frontend/frontend-flutter/android/gradle.properties",
    "chars": 79,
    "preview": "org.gradle.jvmargs=-Xmx4G\nandroid.useAndroidX=true\nandroid.enableJetifier=true\n"
  },
  {
    "path": "frontend/frontend-flutter/android/nl2sql_oss_android.iml",
    "chars": 1600,
    "preview": "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<module type=\"JAVA_MODULE\" version=\"4\">\n  <component name=\"FacetManager\">\n    <fa"
  },
  {
    "path": "frontend/frontend-flutter/android/settings.gradle",
    "chars": 967,
    "preview": "pluginManagement {\n    def flutterSdkPath = {\n        def properties = new Properties()\n        file(\"local.properties\")"
  },
  {
    "path": "frontend/frontend-flutter/build/web/.last_build_id",
    "chars": 32,
    "preview": "5389598c2a36d064db55a8edf57e320f"
  },
  {
    "path": "frontend/frontend-flutter/ios/.gitignore",
    "chars": 569,
    "preview": "**/dgph\n*.mode1v3\n*.mode2v3\n*.moved-aside\n*.pbxuser\n*.perspectivev3\n**/*sync/\n.sconsign.dblite\n.tags*\n**/.vagrant/\n**/De"
  },
  {
    "path": "frontend/frontend-flutter/ios/Flutter/AppFrameworkInfo.plist",
    "chars": 774,
    "preview": "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<!DOCTYPE plist PUBLIC \"-//Apple//DTD PLIST 1.0//EN\" \"http://www.apple.com/DTDs/P"
  },
  {
    "path": "frontend/frontend-flutter/ios/Flutter/Debug.xcconfig",
    "chars": 107,
    "preview": "#include? \"Pods/Target Support Files/Pods-Runner/Pods-Runner.debug.xcconfig\"\n#include \"Generated.xcconfig\"\n"
  },
  {
    "path": "frontend/frontend-flutter/ios/Flutter/Release.xcconfig",
    "chars": 109,
    "preview": "#include? \"Pods/Target Support Files/Pods-Runner/Pods-Runner.release.xcconfig\"\n#include \"Generated.xcconfig\"\n"
  },
  {
    "path": "frontend/frontend-flutter/ios/Podfile",
    "chars": 1414,
    "preview": "# Uncomment this line to define a global platform for your project\n# platform :ios, '12.0'\n\n# CocoaPods analytics sends "
  },
  {
    "path": "frontend/frontend-flutter/ios/Runner/AppDelegate.swift",
    "chars": 404,
    "preview": "import UIKit\nimport Flutter\n\n@UIApplicationMain\n@objc class AppDelegate: FlutterAppDelegate {\n  override func applicatio"
  },
  {
    "path": "frontend/frontend-flutter/ios/Runner/Assets.xcassets/AppIcon.appiconset/Contents.json",
    "chars": 2519,
    "preview": "{\n  \"images\" : [\n    {\n      \"size\" : \"20x20\",\n      \"idiom\" : \"iphone\",\n      \"filename\" : \"Icon-App-20x20@2x.png\",\n   "
  },
  {
    "path": "frontend/frontend-flutter/ios/Runner/Assets.xcassets/LaunchImage.imageset/Contents.json",
    "chars": 391,
    "preview": "{\n  \"images\" : [\n    {\n      \"idiom\" : \"universal\",\n      \"filename\" : \"LaunchImage.png\",\n      \"scale\" : \"1x\"\n    },\n  "
  },
  {
    "path": "frontend/frontend-flutter/ios/Runner/Assets.xcassets/LaunchImage.imageset/README.md",
    "chars": 336,
    "preview": "# Launch Screen Assets\n\nYou can customize the launch screen with your own desired assets by replacing the image files in"
  },
  {
    "path": "frontend/frontend-flutter/ios/Runner/Base.lproj/LaunchScreen.storyboard",
    "chars": 2377,
    "preview": "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?>\n<document type=\"com.apple.InterfaceBuilder3.CocoaTouch.Storyboard"
  },
  {
    "path": "frontend/frontend-flutter/ios/Runner/Base.lproj/Main.storyboard",
    "chars": 1605,
    "preview": "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?>\n<document type=\"com.apple.InterfaceBuilder3.CocoaTouch.Storyboard"
  },
  {
    "path": "frontend/frontend-flutter/ios/Runner/GoogleService-Info.plist",
    "chars": 859,
    "preview": "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<!DOCTYPE plist PUBLIC \"-//Apple//DTD PLIST 1.0//EN\" \"http://www.apple.com/DTDs/P"
  },
  {
    "path": "frontend/frontend-flutter/ios/Runner/Info.plist",
    "chars": 1635,
    "preview": "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<!DOCTYPE plist PUBLIC \"-//Apple//DTD PLIST 1.0//EN\" \"http://www.apple.com/DTDs/P"
  },
  {
    "path": "frontend/frontend-flutter/ios/Runner/Runner-Bridging-Header.h",
    "chars": 38,
    "preview": "#import \"GeneratedPluginRegistrant.h\"\n"
  },
  {
    "path": "frontend/frontend-flutter/ios/Runner.xcodeproj/project.pbxproj",
    "chars": 30883,
    "preview": "// !$*UTF8*$!\n{\n\tarchiveVersion = 1;\n\tclasses = {\n\t};\n\tobjectVersion = 54;\n\tobjects = {\n\n/* Begin PBXBuildFile section *"
  },
  {
    "path": "frontend/frontend-flutter/ios/Runner.xcodeproj/project.xcworkspace/contents.xcworkspacedata",
    "chars": 135,
    "preview": "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<Workspace\n   version = \"1.0\">\n   <FileRef\n      location = \"self:\">\n   </FileRef"
  },
  {
    "path": "frontend/frontend-flutter/ios/Runner.xcodeproj/project.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist",
    "chars": 238,
    "preview": "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<!DOCTYPE plist PUBLIC \"-//Apple//DTD PLIST 1.0//EN\" \"http://www.apple.com/DTDs/P"
  },
  {
    "path": "frontend/frontend-flutter/ios/Runner.xcodeproj/project.xcworkspace/xcshareddata/WorkspaceSettings.xcsettings",
    "chars": 226,
    "preview": "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<!DOCTYPE plist PUBLIC \"-//Apple//DTD PLIST 1.0//EN\" \"http://www.apple.com/DTDs/P"
  },
  {
    "path": "frontend/frontend-flutter/ios/Runner.xcodeproj/xcshareddata/xcschemes/Runner.xcscheme",
    "chars": 3647,
    "preview": "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<Scheme\n   LastUpgradeVersion = \"1510\"\n   version = \"1.3\">\n   <BuildAction\n      "
  },
  {
    "path": "frontend/frontend-flutter/ios/Runner.xcworkspace/contents.xcworkspacedata",
    "chars": 224,
    "preview": "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<Workspace\n   version = \"1.0\">\n   <FileRef\n      location = \"group:Runner.xcodepr"
  },
  {
    "path": "frontend/frontend-flutter/ios/Runner.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist",
    "chars": 238,
    "preview": "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<!DOCTYPE plist PUBLIC \"-//Apple//DTD PLIST 1.0//EN\" \"http://www.apple.com/DTDs/P"
  },
  {
    "path": "frontend/frontend-flutter/ios/Runner.xcworkspace/xcshareddata/WorkspaceSettings.xcsettings",
    "chars": 226,
    "preview": "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<!DOCTYPE plist PUBLIC \"-//Apple//DTD PLIST 1.0//EN\" \"http://www.apple.com/DTDs/P"
  },
  {
    "path": "frontend/frontend-flutter/ios/RunnerTests/RunnerTests.swift",
    "chars": 285,
    "preview": "import Flutter\nimport UIKit\nimport XCTest\n\nclass RunnerTests: XCTestCase {\n\n  func testExample() {\n    // If you add cod"
  },
  {
    "path": "frontend/frontend-flutter/lib/firebase_options.dart",
    "chars": 2526,
    "preview": "// File generated by FlutterFire CLI.\n// ignore_for_file: type=lint\nimport 'package:firebase_core/firebase_core.dart' sh"
  },
  {
    "path": "frontend/frontend-flutter/lib/main.dart",
    "chars": 126855,
    "preview": "import 'dart:ui' as ui;\nimport 'package:file_picker/file_picker.dart';\nimport 'package:flutter/cupertino.dart';\nimport '"
  },
  {
    "path": "frontend/frontend-flutter/lib/screens/bot.dart",
    "chars": 72354,
    "preview": "import 'dart:typed_data';\nimport 'package:cloud_firestore/cloud_firestore.dart';\nimport 'package:flutter/cupertino.dart'"
  },
  {
    "path": "frontend/frontend-flutter/lib/screens/bot_chat_view.dart",
    "chars": 2633,
    "preview": "import 'package:chatview/chatview.dart';\nimport 'package:flutter/material.dart';\nimport 'package:flutter/services.dart';"
  },
  {
    "path": "frontend/frontend-flutter/lib/screens/disclaimer.dart",
    "chars": 19362,
    "preview": "import 'package:firebase_auth/firebase_auth.dart';\nimport 'package:firebase_core/firebase_core.dart';\nimport 'package:fl"
  },
  {
    "path": "frontend/frontend-flutter/lib/screens/settings.dart",
    "chars": 19107,
    "preview": "import 'dart:convert';\nimport 'package:cloud_firestore/cloud_firestore.dart';\nimport 'package:file_picker/file_picker.da"
  },
  {
    "path": "frontend/frontend-flutter/lib/services/display_stepper/display_stepper_cubit.dart",
    "chars": 715,
    "preview": "import 'package:bloc/bloc.dart';\nimport 'package:cloud_firestore/cloud_firestore.dart';\nimport 'package:ttmd/services/di"
  },
  {
    "path": "frontend/frontend-flutter/lib/services/display_stepper/display_stepper_state.dart",
    "chars": 566,
    "preview": "import 'package:equatable/equatable.dart';\n\n\nenum displayStepperStatus {display_stepper,remove_stepper}\n\nclass DisplaySt"
  },
  {
    "path": "frontend/frontend-flutter/lib/services/first_question/first_question_cubit.dart",
    "chars": 560,
    "preview": "import 'package:bloc/bloc.dart';\nimport 'package:ttmd/services/first_question/first_question_state.dart';\n\n\nclass FirstQ"
  },
  {
    "path": "frontend/frontend-flutter/lib/services/first_question/first_question_state.dart",
    "chars": 698,
    "preview": "import 'package:equatable/equatable.dart';\n\n\nenum firstQuestionStatus {display_welcome_message,remove_welcome_message}\n\n"
  },
  {
    "path": "frontend/frontend-flutter/lib/services/load_question/load_question_cubit.dart",
    "chars": 634,
    "preview": "import 'package:bloc/bloc.dart';\nimport 'package:ttmd/services/load_question/load_question_state.dart';\nimport 'package:"
  },
  {
    "path": "frontend/frontend-flutter/lib/services/load_question/load_question_state.dart",
    "chars": 769,
    "preview": "import 'package:equatable/equatable.dart';\n\nenum LoadQuestionStatus {initial,loaded, error}\n\nclass LoadQuestionState ext"
  },
  {
    "path": "frontend/frontend-flutter/lib/services/new_suggestions/new_suggestion_cubit.dart",
    "chars": 12870,
    "preview": "import 'dart:convert';\nimport 'dart:math';\n\nimport 'package:bloc/bloc.dart';\nimport 'package:intl/intl.dart';\nimport 'pa"
  },
  {
    "path": "frontend/frontend-flutter/lib/services/new_suggestions/new_suggestion_state.dart",
    "chars": 1224,
    "preview": "import 'package:equatable/equatable.dart';\n\n\nenum NewSuggestionStateStatus {initial,loading,loaded, all_questions_loaded"
  },
  {
    "path": "frontend/frontend-flutter/lib/services/text_to_doc_question/text_to_doc_question_cubit.dart",
    "chars": 826,
    "preview": "import 'package:bloc/bloc.dart';\nimport 'package:ttmd/services/text_to_doc_question/text_to_doc_question_state.dart';\n\n\n"
  },
  {
    "path": "frontend/frontend-flutter/lib/services/text_to_doc_question/text_to_doc_question_state.dart",
    "chars": 736,
    "preview": "import 'package:equatable/equatable.dart';\n\nenum textToDocStatus {not_text_to_doc,text_to_doc}\n\nclass TextToDocQuestionS"
  },
  {
    "path": "frontend/frontend-flutter/lib/services/update_expert_mode/update_expert_mode_cubit.dart",
    "chars": 689,
    "preview": "import 'package:bloc/bloc.dart';\nimport 'package:ttmd/services/update_expert_mode/update_expert_mode_state.dart';\n\nimpor"
  },
  {
    "path": "frontend/frontend-flutter/lib/services/update_expert_mode/update_expert_mode_state.dart",
    "chars": 585,
    "preview": "import 'package:equatable/equatable.dart';\n\n\nenum updateExpertModeStatus {expert_mode_on,expert_mode_off}\n\nclass UpdateE"
  },
  {
    "path": "frontend/frontend-flutter/lib/services/update_popular_questions/update_popular_questions_cubit.dart",
    "chars": 970,
    "preview": "import 'package:bloc/bloc.dart';\nimport 'package:ttmd/services/update_popular_questions/update_popular_questions_state.d"
  },
  {
    "path": "frontend/frontend-flutter/lib/services/update_popular_questions/update_popular_questions_state.dart",
    "chars": 1097,
    "preview": "import 'package:equatable/equatable.dart';\n\nimport '../../utils/most_popular_questions.dart';\n\n\nenum UpdateMostPopularQu"
  },
  {
    "path": "frontend/frontend-flutter/lib/services/update_stepper/update_stepper_cubit.dart",
    "chars": 2085,
    "preview": "import 'package:bloc/bloc.dart';\nimport 'package:ttmd/services/update_stepper/update_stepper_state.dart';\nimport 'packag"
  },
  {
    "path": "frontend/frontend-flutter/lib/services/update_stepper/update_stepper_state.dart",
    "chars": 1568,
    "preview": "import 'package:equatable/equatable.dart';\nimport 'package:flutter/material.dart';\n\nimport '../../utils/stepper_expert_i"
  },
  {
    "path": "frontend/frontend-flutter/lib/utils/Input_custom.dart",
    "chars": 750,
    "preview": "import 'package:flutter/material.dart';\nimport 'package:flutter_chat_ui/flutter_chat_ui.dart';\nimport 'package:flutter_c"
  },
  {
    "path": "frontend/frontend-flutter/lib/utils/TextToDocParameter.dart",
    "chars": 952,
    "preview": "class TextToDocParameter {\n  static bool isTextTodocGlobal = false;\n  static bool isAuthenticated = false;\n  static bool"
  },
  {
    "path": "frontend/frontend-flutter/lib/utils/custom_input_field.dart",
    "chars": 23322,
    "preview": "import 'dart:convert';\n\nimport 'package:cloud_firestore/cloud_firestore.dart';\nimport 'package:flutter/foundation.dart';"
  },
  {
    "path": "frontend/frontend-flutter/lib/utils/most_popular_questions.dart",
    "chars": 230,
    "preview": "class MostPopularQ {\n  int count;\n  String time;\n  String question;\n  MostPopularQ(this.question,this.count, this.time);"
  },
  {
    "path": "frontend/frontend-flutter/lib/utils/pdf_viewer.dart",
    "chars": 1471,
    "preview": "import 'package:flutter/material.dart';\nimport 'package:flutter/services.dart';\n\n\n//Not a usable class to display PDF fi"
  },
  {
    "path": "frontend/frontend-flutter/lib/utils/stepper_expert_info.dart",
    "chars": 866,
    "preview": "class StepperExpertInfo {\n  final String? uri;\n  final String? body;\n  final String? header;\n  final String? response;\n "
  },
  {
    "path": "frontend/frontend-flutter/lib/utils/tabbed_container.dart",
    "chars": 1340,
    "preview": "\nimport 'package:flutter/material.dart';\n\nclass TabbedContainer extends StatefulWidget {\n  final List<Widget> tabs;\n  fi"
  },
  {
    "path": "frontend/frontend-flutter/nl2sql_oss.iml",
    "chars": 6296,
    "preview": "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<module type=\"JAVA_MODULE\" version=\"4\">\n  <component name=\"NewModuleRootManager\" "
  },
  {
    "path": "frontend/frontend-flutter/pubspec.yaml",
    "chars": 4786,
    "preview": "name: ttmd\ndescription: \"A new Flutter project.\"\n# The following line prevents the package from being accidentally publi"
  },
  {
    "path": "frontend/frontend-flutter/test/widget_test.dart",
    "chars": 1055,
    "preview": "// This is a basic Flutter widget test.\n//\n// To perform an interaction with a widget in your test, use the WidgetTester"
  },
  {
    "path": "frontend/frontend-flutter/web/index 01.49.28.html",
    "chars": 3250,
    "preview": "<!DOCTYPE html>\n<html>\n<head>\n  <!--\n    If you are serving your web app in a path other than the root, change the\n    h"
  },
  {
    "path": "frontend/frontend-flutter/web/index.html",
    "chars": 2000,
    "preview": "<!DOCTYPE html>\n<html>\n<head>\n  <!--\n    If you are serving your web app in a path other than the root, change the\n    h"
  },
  {
    "path": "frontend/frontend-flutter/web/manifest.json",
    "chars": 928,
    "preview": "{\n    \"name\": \"frontend_flutter\",\n    \"short_name\": \"frontend_flutter\",\n    \"start_url\": \".\",\n    \"display\": \"standalone"
  },
  {
    "path": "frontend/frontend.yaml",
    "chars": 855,
    "preview": "steps:\n  - name: 'node:21'\n    args:\n      - install\n      - '--legacy-peer-deps'\n    dir: frontend\n    entrypoint: npm\n"
  },
  {
    "path": "frontend/package.json",
    "chars": 1598,
    "preview": "{\n  \"name\": \"frontend\",\n  \"version\": \"0.0.0\",\n  \"scripts\": {\n    \"ng\": \"ng\",\n    \"start\": \"ng serve\",\n    \"build\": \"ng b"
  },
  {
    "path": "frontend/server.ts",
    "chars": 1703,
    "preview": "import { APP_BASE_HREF } from '@angular/common';\nimport { CommonEngine } from '@angular/ssr';\nimport express from 'expre"
  },
  {
    "path": "frontend/src/app/agent-chat/agent-chat.component.html",
    "chars": 10061,
    "preview": "<ng-container *ngIf=\"msg\">\n    <mat-stepper id=\"stepper\" *ngIf=\"msg?.author=='agent'\" labelPosition=\"bottom\">\n        <n"
  },
  {
    "path": "frontend/src/app/agent-chat/agent-chat.component.scss",
    "chars": 7659,
    "preview": "  input {\n      font-family: 'Google Sans';\n      font-style: normal;\n      font-weight: 500;\n      font-size: 14px;\n  }"
  },
  {
    "path": "frontend/src/app/agent-chat/agent-chat.component.spec.ts",
    "chars": 618,
    "preview": "import { ComponentFixture, TestBed } from '@angular/core/testing';\n\nimport { AgentChatComponent } from './agent-chat.com"
  },
  {
    "path": "frontend/src/app/agent-chat/agent-chat.component.ts",
    "chars": 7657,
    "preview": "import { AfterViewInit, ChangeDetectorRef, Component, ElementRef, Input, ViewChild, signal } from '@angular/core';\nimpor"
  },
  {
    "path": "frontend/src/app/app-routing.module.ts",
    "chars": 735,
    "preview": "import { NgModule } from '@angular/core';\nimport { RouterModule, Routes } from '@angular/router';\nimport { LoginComponen"
  },
  {
    "path": "frontend/src/app/app.component.html",
    "chars": 32,
    "preview": "<router-outlet></router-outlet>\n"
  },
  {
    "path": "frontend/src/app/app.component.scss",
    "chars": 0,
    "preview": ""
  },
  {
    "path": "frontend/src/app/app.component.spec.ts",
    "chars": 943,
    "preview": "import { TestBed } from '@angular/core/testing';\nimport { AppComponent } from './app.component';\n\ndescribe('AppComponent"
  },
  {
    "path": "frontend/src/app/app.component.ts",
    "chars": 275,
    "preview": "import { Component } from '@angular/core';\n\n@Component({\n  selector: 'app-root',\n  templateUrl: './app.component.html',\n"
  },
  {
    "path": "frontend/src/app/app.module.server.ts",
    "chars": 317,
    "preview": "import { NgModule } from '@angular/core';\nimport { ServerModule } from '@angular/platform-server';\nimport { AppComponent"
  },
  {
    "path": "frontend/src/app/app.module.ts",
    "chars": 5704,
    "preview": "import { CUSTOM_ELEMENTS_SCHEMA, NgModule, importProvidersFrom } from \"@angular/core\";\nimport { AppComponent } from \"./a"
  },
  {
    "path": "frontend/src/app/business-user/business-user.component.html",
    "chars": 4124,
    "preview": "<div class=\"container-fluid\">\n  <div *ngIf=\"isSuggestions\" class=\"summarize-results\">\n    <div class=\"insight-results-er"
  },
  {
    "path": "frontend/src/app/business-user/business-user.component.scss",
    "chars": 12179,
    "preview": "input {\n  font-family: 'Google Sans';\n  font-style: normal;\n  font-weight: 500;\n  font-size: 14px;\n}\n\n.name {\n  border-r"
  },
  {
    "path": "frontend/src/app/business-user/business-user.component.spec.ts",
    "chars": 635,
    "preview": "import { ComponentFixture, TestBed } from '@angular/core/testing';\n\nimport { BusinessUserComponent } from './business-us"
  },
  {
    "path": "frontend/src/app/business-user/business-user.component.ts",
    "chars": 5451,
    "preview": "import { Component, ChangeDetectorRef, Output, EventEmitter, Input, SimpleChanges, inject } from '@angular/core';\nimport"
  },
  {
    "path": "frontend/src/app/grouping-modal/grouping-modal.component.html",
    "chars": 209,
    "preview": "<div class=\"popup\">\n    <div>\n        <span class=\"grouping-msg\">Please select grouping before proceeding any query.</sp"
  },
  {
    "path": "frontend/src/app/grouping-modal/grouping-modal.component.scss",
    "chars": 440,
    "preview": ".grouping-msg {\n    font-family: \"Google Sans\";\n    font-size: \"16px\";\n    color : #d93035\n}\n\n.closeBtn {\n    font-famil"
  },
  {
    "path": "frontend/src/app/grouping-modal/grouping-modal.component.spec.ts",
    "chars": 646,
    "preview": "import { ComponentFixture, TestBed } from '@angular/core/testing';\n\nimport { GroupingModalComponent } from './grouping-m"
  },
  {
    "path": "frontend/src/app/grouping-modal/grouping-modal.component.ts",
    "chars": 449,
    "preview": "import { Component } from '@angular/core';\nimport { MatDialogRef } from '@angular/material/dialog';\n\n@Component({\n  sele"
  },
  {
    "path": "frontend/src/app/header/header.component.html",
    "chars": 42,
    "preview": "<span class=\"title\">Open Data QnA</span>\n\n"
  },
  {
    "path": "frontend/src/app/header/header.component.scss",
    "chars": 150,
    "preview": ".title {\n    color: #000;\n    font-family: Google Sans;\n    font-size: 20px;\n    font-style: normal;\n    font-weight: 40"
  },
  {
    "path": "frontend/src/app/header/header.component.spec.ts",
    "chars": 596,
    "preview": "import { ComponentFixture, TestBed } from '@angular/core/testing';\n\nimport { HeaderComponent } from './header.component'"
  },
  {
    "path": "frontend/src/app/header/header.component.ts",
    "chars": 240,
    "preview": "import { Component } from '@angular/core';\n\n@Component({\n  selector: 'app-header',\n  templateUrl: './header.component.ht"
  },
  {
    "path": "frontend/src/app/home/home.component.html",
    "chars": 2508,
    "preview": "<div class=\"grid-container\">\n  <mat-toolbar class=\"toolbar\">\n    <button *ngIf=\"isMobile\" mat-icon-button aria-label=\"Me"
  },
  {
    "path": "frontend/src/app/home/home.component.scss",
    "chars": 2462,
    "preview": "h1 {\n  padding: 0 1rem;\n}\n\nh2 {\n  padding: 1rem;\n}\n\nmat-toolbar {\n  position: fixed;\n  top: 0;\n  z-index: 2;\n}\n\n// Move "
  },
  {
    "path": "frontend/src/app/home/home.component.spec.ts",
    "chars": 582,
    "preview": "import { ComponentFixture, TestBed } from '@angular/core/testing';\n\nimport { HomeComponent } from './home.component';\n\nd"
  },
  {
    "path": "frontend/src/app/home/home.component.ts",
    "chars": 4278,
    "preview": "import { Component, ViewChild } from '@angular/core';\nimport { FormControl } from '@angular/forms';\nimport { HomeService"
  },
  {
    "path": "frontend/src/app/http.interceptor.ts",
    "chars": 1536,
    "preview": "import { Injectable } from \"@angular/core\";\nimport { HttpEvent, HttpHandler, HttpInterceptor, HttpRequest, HttpResponse "
  },
  {
    "path": "frontend/src/app/login/login.component.html",
    "chars": 25719,
    "preview": "<div class=\"p-4 login-page-background\" id=\"loginpage\">\n    <div>\n        <span class=\"search\">\n            <!-- Google C"
  },
  {
    "path": "frontend/src/app/login/login.component.scss",
    "chars": 3489,
    "preview": ".search {\n    color: #FFFFFF;\n}\n\n.login-user {\n    float: right;\n    padding-right: 20px;\n}\n\n.login-container .mat-mdc-d"
  },
  {
    "path": "frontend/src/app/login/login.component.spec.ts",
    "chars": 589,
    "preview": "import { ComponentFixture, TestBed } from '@angular/core/testing';\n\nimport { LoginComponent } from './login.component';\n"
  },
  {
    "path": "frontend/src/app/login/login.component.ts",
    "chars": 2102,
    "preview": "import { Component, ElementRef } from '@angular/core';\nimport { LoginButtonComponent } from '../login-button/login-butto"
  },
  {
    "path": "frontend/src/app/login-button/login-button.component.html",
    "chars": 1589,
    "preview": "<ng-container *ngIf=\"!userLoggedIn\">\n<div class=\"popup\">\n  <!-- <button class=\"login-btn\" (click)=\"getLogin()\">Login</bu"
  },
  {
    "path": "frontend/src/app/login-button/login-button.component.scss",
    "chars": 4118,
    "preview": ".login-btn {\n  align-items: center;\n  margin-left: 75px;\n  margin-top: 110px;\n\n  /* label-text */\n  width: 90px;\n  heigh"
  },
  {
    "path": "frontend/src/app/login-button/login-button.component.spec.ts",
    "chars": 632,
    "preview": "import { ComponentFixture, TestBed } from '@angular/core/testing';\n\nimport { LoginButtonComponent } from './login-button"
  },
  {
    "path": "frontend/src/app/login-button/login-button.component.ts",
    "chars": 875,
    "preview": "import { Dialog } from '@angular/cdk/dialog';\nimport { Component } from '@angular/core';\nimport { LoginService } from '."
  },
  {
    "path": "frontend/src/app/menu/menu.component.html",
    "chars": 2826,
    "preview": "<mat-nav-list class=\"app-sidenav pr-4 \" disableRipple>\n  <mat-list-item\n    [ngClass]=\"(clickedMenuItem === 'Query' || c"
  },
  {
    "path": "frontend/src/app/menu/menu.component.scss",
    "chars": 2625,
    "preview": "mat-icon {\n  vertical-align: bottom;\n}\n\n.app-sidenav {\n  color: #000;\n  font-feature-settings: 'clig' off, 'liga' off;\n "
  },
  {
    "path": "frontend/src/app/menu/menu.component.spec.ts",
    "chars": 582,
    "preview": "import { ComponentFixture, TestBed } from '@angular/core/testing';\n\nimport { MenuComponent } from './menu.component';\n\nd"
  },
  {
    "path": "frontend/src/app/menu/menu.component.ts",
    "chars": 6209,
    "preview": "import { Component, Input, EventEmitter, Output, signal, SimpleChanges, inject, ViewChild } from '@angular/core';\nimport"
  },
  {
    "path": "frontend/src/app/prism/prism.component.html",
    "chars": 199,
    "preview": "<pre *ngIf=\"language\" class=\"language-{{ language }}\" style=\"background: none; white-space: pre-line;\">\n  <code #codeEle"
  },
  {
    "path": "frontend/src/app/prism/prism.component.scss",
    "chars": 256,
    "preview": "mat-icon {\n  vertical-align: bottom;\n\n}\n\n.app-sidenav {\n  font-family: \"Google Sans\";\n  font-size: 14px;\n  width: 200px;"
  },
  {
    "path": "frontend/src/app/prism/prism.component.spec.ts",
    "chars": 588,
    "preview": "import { ComponentFixture, TestBed } from '@angular/core/testing';\n\nimport { PrismComponent } from './prism.component';\n"
  },
  {
    "path": "frontend/src/app/prism/prism.component.ts",
    "chars": 728,
    "preview": "import { Component, Input, EventEmitter, Output, ViewChild, ElementRef } from '@angular/core';\nimport * as Prism from 'p"
  },
  {
    "path": "frontend/src/app/prism/prism.d.ts",
    "chars": 25,
    "preview": "declare module 'prismjs'\n"
  },
  {
    "path": "frontend/src/app/scenario-list/scenario-list.component.html",
    "chars": 1527,
    "preview": "<div *ngFor=\"let sc of scenarioData\">\n    {{sc.name}}\n    <mat-tree [dataSource]=\"sc.dataSource\" [treeControl]=\"treeCont"
  },
  {
    "path": "frontend/src/app/scenario-list/scenario-list.component.scss",
    "chars": 873,
    "preview": ".example-tree-invisible {\n  display: none;\n}\n\n.example-tree ul,\n.example-tree li {\n  margin-top: 0;\n  margin-bottom: 0;\n"
  },
  {
    "path": "frontend/src/app/scenario-list/scenario-list.component.spec.ts",
    "chars": 639,
    "preview": "import { ComponentFixture, TestBed } from '@angular/core/testing';\n\nimport { ScenarioListComponent } from './scenario-li"
  },
  {
    "path": "frontend/src/app/scenario-list/scenario-list.component.ts",
    "chars": 4149,
    "preview": "import { Component, EventEmitter, Input, Output, SimpleChanges } from '@angular/core';\nimport { NestedTreeControl, TreeC"
  },
  {
    "path": "frontend/src/app/shared/services/chat.service.spec.ts",
    "chars": 347,
    "preview": "import { TestBed } from '@angular/core/testing';\n\nimport { ChatService } from './chat.service';\n\ndescribe('ChatService',"
  },
  {
    "path": "frontend/src/app/shared/services/chat.service.ts",
    "chars": 4284,
    "preview": "import { Injectable } from '@angular/core';\nimport { Subject, takeUntil } from 'rxjs';\nimport { BehaviorSubject } from '"
  },
  {
    "path": "frontend/src/app/shared/services/home.service.spec.ts",
    "chars": 347,
    "preview": "import { TestBed } from '@angular/core/testing';\n\nimport { HomeService } from './home.service';\n\ndescribe('HomeService',"
  },
  {
    "path": "frontend/src/app/shared/services/home.service.ts",
    "chars": 5451,
    "preview": "import { Injectable, inject } from '@angular/core';\nimport { HttpHeaders, HttpClient, HttpErrorResponse } from '@angular"
  },
  {
    "path": "frontend/src/app/shared/services/login.service.spec.ts",
    "chars": 352,
    "preview": "import { TestBed } from '@angular/core/testing';\n\nimport { LoginService } from './login.service';\n\ndescribe('LoginServic"
  },
  {
    "path": "frontend/src/app/shared/services/login.service.ts",
    "chars": 850,
    "preview": "import { Injectable } from '@angular/core';\nimport { Observable, ReplaySubject } from 'rxjs';\n\n@Injectable({\n  providedI"
  },
  {
    "path": "frontend/src/app/shared/services/shared.service.spec.ts",
    "chars": 357,
    "preview": "import { TestBed } from '@angular/core/testing';\n\nimport { SharedService } from './shared.service';\n\ndescribe('SharedSer"
  },
  {
    "path": "frontend/src/app/shared/services/shared.service.ts",
    "chars": 1208,
    "preview": "import { Injectable, inject } from '@angular/core';\nimport { LoginService } from './login.service';\nimport { GoogleAuthP"
  },
  {
    "path": "frontend/src/app/upload-template/upload-template.component.html",
    "chars": 1702,
    "preview": "<div class=\"popup\">\n    <div>\n        <span class=\"grouping-msg\">Please upload a csv file with the details below.</span>"
  },
  {
    "path": "frontend/src/app/upload-template/upload-template.component.scss",
    "chars": 1100,
    "preview": ".grouping-msg {\n  font-family: \"Google Sans\";\n  font-size: 16px;\n  color: #d93035;\n  margin-bottom: 20px;\n}\n\n.closeBtn {"
  },
  {
    "path": "frontend/src/app/upload-template/upload-template.component.spec.ts",
    "chars": 653,
    "preview": "import { ComponentFixture, TestBed } from '@angular/core/testing';\n\nimport { UploadTemplateComponent } from './upload-te"
  },
  {
    "path": "frontend/src/app/upload-template/upload-template.component.ts",
    "chars": 458,
    "preview": "import { Component } from '@angular/core';\nimport { MatDialogRef } from '@angular/material/dialog';\n\n@Component({\n  sele"
  },
  {
    "path": "frontend/src/app/user-journey/user-journey.component.html",
    "chars": 25302,
    "preview": "<div class=\"d-flex flex-column p-4 gap-4 list-css\">\n    <div>\n        <span class=\"search\">\n            <svg width=\"416\""
  },
  {
    "path": "frontend/src/app/user-journey/user-journey.component.scss",
    "chars": 2579,
    "preview": ".list-css {\n  background-image: radial-gradient(grey, black);\n  min-width: 100%;\n  min-height: 100%;\n}\n\n.search {\n  colo"
  },
  {
    "path": "frontend/src/app/user-journey/user-journey.component.spec.ts",
    "chars": 632,
    "preview": "import { ComponentFixture, TestBed } from '@angular/core/testing';\n\nimport { UserJourneyComponent } from './user-journey"
  },
  {
    "path": "frontend/src/app/user-journey/user-journey.component.ts",
    "chars": 2084,
    "preview": "import { AfterViewInit, Component } from '@angular/core';\nimport { LoginService } from '../shared/services/login.service"
  },
  {
    "path": "frontend/src/app/user-photo/user-photo.component.html",
    "chars": 153,
    "preview": "<span class=\"login-user\" *ngIf=\"userLoggedIn\"> <img src=\"{{photoURL}}\" class=\"rounded-circle\"\n  alt=\"User Profile Image\""
  },
  {
    "path": "frontend/src/app/user-photo/user-photo.component.scss",
    "chars": 56,
    "preview": "\n.login-user {\n  float: right;\n  padding-right: 20px;\n}\n"
  },
  {
    "path": "frontend/src/app/user-photo/user-photo.component.spec.ts",
    "chars": 618,
    "preview": "import { ComponentFixture, TestBed } from '@angular/core/testing';\n\nimport { UserPhotoComponent } from './user-photo.com"
  },
  {
    "path": "frontend/src/app/user-photo/user-photo.component.ts",
    "chars": 1814,
    "preview": "import { Component, inject } from '@angular/core';\nimport { LoginButtonComponent } from '../login-button/login-button.co"
  }
]

// ... and 53 more files (download for full content)

About this extraction

This page contains the full source code of the GoogleCloudPlatform/Open_Data_QnA GitHub repository, extracted and formatted as plain text for AI agents and large language models (LLMs). The extraction includes 253 files (1.0 MB), approximately 271.4k tokens, and a symbol index with 457 extracted functions, classes, methods, constants, and types. Use this with OpenClaw, Claude, ChatGPT, Cursor, Windsurf, or any other AI tool that accepts text input. You can copy the full output to your clipboard or download it as a .txt file.

Extracted by GitExtract — free GitHub repo to text converter for AI. Built by Nikandr Surkov.

Extract another repo