Repository: premAI-io/premsql Branch: main Commit: 7041239e5ce1 Files: 90 Total size: 593.0 KB Directory structure: gitextract_acehzbqp/ ├── .gitignore ├── README.md ├── examples/ │ ├── agent_server.ipynb │ ├── agents.ipynb │ ├── datasets.ipynb │ ├── error_dataset.ipynb │ ├── evaluation.ipynb │ ├── finetuning.ipynb │ ├── generators.ipynb │ ├── lora_tuning.py │ └── simple_pipeline.ipynb ├── premsql/ │ ├── __init__.py │ ├── agents/ │ │ ├── __init__.py │ │ ├── base.py │ │ ├── baseline/ │ │ │ ├── __init__.py │ │ │ ├── main.py │ │ │ ├── prompts.py │ │ │ └── workers/ │ │ │ ├── __init__.py │ │ │ ├── analyser.py │ │ │ ├── followup.py │ │ │ ├── plotter.py │ │ │ └── text2sql.py │ │ ├── memory.py │ │ ├── models.py │ │ ├── router.py │ │ ├── tools/ │ │ │ ├── __init__.py │ │ │ └── plot/ │ │ │ ├── base.py │ │ │ └── matplotlib_tool.py │ │ └── utils.py │ ├── cli.py │ ├── datasets/ │ │ ├── __init__.py │ │ ├── base.py │ │ ├── collator.py │ │ ├── error_dataset.py │ │ ├── real/ │ │ │ ├── bird.py │ │ │ ├── domains.py │ │ │ └── spider.py │ │ └── synthetic/ │ │ └── gretel.py │ ├── evaluator/ │ │ ├── README.md │ │ ├── __init__.py │ │ └── base.py │ ├── executors/ │ │ ├── __init__.py │ │ ├── base.py │ │ ├── from_langchain.py │ │ └── from_sqlite.py │ ├── generators/ │ │ ├── __init__.py │ │ ├── base.py │ │ ├── huggingface.py │ │ ├── mlx.py │ │ ├── ollama_model.py │ │ ├── openai.py │ │ └── premai.py │ ├── logger.py │ ├── playground/ │ │ ├── __init__.py │ │ ├── backend/ │ │ │ ├── api/ │ │ │ │ ├── __init__.py │ │ │ │ ├── admin.py │ │ │ │ ├── apps.py │ │ │ │ ├── migrations/ │ │ │ │ │ ├── 0001_initial.py │ │ │ │ │ └── __init__.py │ │ │ │ ├── models.py │ │ │ │ ├── pydantic_models.py │ │ │ │ ├── serializers.py │ │ │ │ ├── services.py │ │ │ │ ├── tests.py │ │ │ │ ├── urls.py │ │ │ │ ├── utils.py │ │ │ │ └── views.py │ │ │ ├── backend/ │ │ │ │ ├── __init__.py │ │ │ │ ├── asgi.py │ │ │ │ ├── settings.py │ │ │ │ ├── urls.py │ │ │ │ └── wsgi.py │ │ │ ├── backend_client.py │ │ │ └── manage.py │ │ ├── frontend/ │ │ │ ├── components/ │ │ │ │ ├── chat.py │ │ │ │ ├── session.py │ │ │ │ ├── streamlit_plot.py │ │ │ │ └── uploader.py │ │ │ ├── main.py │ │ │ └── utils.py │ │ └── inference_server/ │ │ ├── api_client.py │ │ └── service.py │ ├── prompts.py │ ├── tuner/ │ │ ├── __init__.py │ │ ├── callback.py │ │ ├── config.py │ │ ├── full.py │ │ └── peft.py │ └── utils.py └── pyproject.toml ================================================ FILE CONTENTS ================================================ ================================================ FILE: .gitignore ================================================ data experiments output test.py exps # Python specific *.pyc *.pyo __pycache__/ # Virtual environments venv/ env/ env.bak/ env1/ env2/ .env # IDE specific .idea/ .vscode/ # Compiled files *.pyc *.pyo *.pyd *.so *.dll *.exe *.out *.pyc *.whl # Logs and databases *.log *.sqlite3 *.db # Data science and ML specific data/ models/ *.h5 *.pkl *.joblib # Jupyter Notebook specific .ipynb_checkpoints/ ================================================ FILE: README.md ================================================ # PremSQL | Easy to use fully local RAG on Databases [![PyPI Downloads](https://static.pepy.tech/badge/premsql)](https://pepy.tech/projects/premsql) PremSQL is an open-source library designed to help developers create secure, fully local Text-to-SQL solutions using small language models. It provides all the essential tools to build and deploy end-to-end Text-to-SQL pipelines with customizable components, making it ideal for secure, autonomous AI-powered data analysis. ![alt architecture](/assets/architecture.png) ## New: PremSQL Playground, Agents and API We just rleased the latest version of PremSQL. It comes with the following: - **PremSQL Agents:** Using PremSQL agents you can make analysis, plot charts and query to databases all using Natural Language. For now it comes with a baseline level agent. Using our library you can customize agents and build on top of it. - **PremSQL API**: A self hosted API which can then be used using any language to make requests to use the deployed agents. - **PremSQL Playground**: A playground UI (self hosted) which you can use interact with Text to SQL agents for your analysis tasks. You can also test your customized agents using this playground as well. Watch it in action. https://github.com/user-attachments/assets/b6db9737-cd42-4848-8a44-f23a5de1f600 ## News and blogs - [Nov 18th 2024] [Prem-1B-SQL](https://huggingface.co/premai-io/prem-1B-SQL) reached 10K + downloads in HuggingFace - [Nov 7th 2024] Release of [Prem-1B-SQL Ollama](https://ollama.com/anindya/prem1b-sql-ollama-fp116) and Ollama support. - [Nov 5th 2024] Release of PremSQL agents, AgentServer and Playground - [Oct 30th] Prem-1B-SQL crossed 5K + downloads on Huggingface - [Sep 20th 2024] First release of [Prem-1B-SQL](https://huggingface.co/premai-io/prem-1B-SQL) (51.54% on BirdBench private dataset) | [Blog post](https://blog.premai.io/prem-1b-sql-fully-local-performant-slm-for-text-to-sql/) - [Sep 10th 2024] First release of PremSQL | [Blog Post](https://blog.premai.io/premsql-towards-end-to-end-local-text-to-sql-pipelines-2/) - [Blog post]: [Using PremSQL to evaluate different open and closed source models](https://blog.premai.io/premsql-towards-end-to-end-local-text-to-sql-pipelines-2/) - [Blog post]: [State of Text to SQL 2024](https://blog.premai.io/state-of-text2sql-2024/) ## 🚀 Features - **Local-First**: Avoid third-party closed-source providers and keep your data secure. - **Multiple connectors**: Supports [PremAI](https://app.premai.io/projects/), [Ollama](https://ollama.com/), [HuggingFace](https://huggingface.co/), [Apple MLX](https://huggingface.co/), [OpenAI](https://openai.com/). - **Customizable Datasets**: Create, fine-tune, and evaluate models with built-in or custom datasets. - **Robust Executors and Evaluators**: Easily connect to databases and assess model performance. - **Advanced Generators**: Convert natural language prompts into executable SQL queries. - **Error Handling and Self-Correction**: Automatically correct SQL queries during inference. - **Fine-Tuning Support**: Fine-tune models with LoRA, QLoRA, or full fine-tuning strategies. - **Agents**: Use PremSQL baseline agent to perform Text to SQL, write analysis reports and plot simple charts on databases. - **Playground**: Use our playground to do the same for agents but with a better ChatGPT UI like experience dedicated for AI powered data analysis. - **Importing CSVs or Kaggle CSV dataset directly to PremSQL playground**: You can analyse any CSV dataset from kaggle directly or from any folder using PremSQL. Last but not the least, all the features are extendible for your very own customization and private data. ## 📚 Table of Contents - [PremSQL](#premsql) - [🚀 Features](#-features) - [📚 Table of Contents](#-table-of-contents) - [🛠️ Installation](#️-installation) - [🚀 Quickstart](#-quickstart) - [📦 Components Overview](#-components-overview) - [Datasets](#datasets) - [Executors](#executors) - [Evaluators](#evaluators) - [Generators](#generators) - [Error Handling](#error-handling) - [Tuner](#tuner) - [Agents](#agents) - [AgentServer and Playground](#playground) - [🤝 Contributing](#-contributing) - [🛣️ Roadmap](#️-roadmap) - [📝 License](#-license) ## 🛠️ Installation PremSQL requires Python 3.8 or higher. Install the library via pip: ```bash pip install -U premsql ``` ## 🚀 Quickstart Here’s a quick example of how to use PremSQL to generate SQL queries, plot charts and analyse dataframes all in natural language. You can name this file as `start_agent.py` ```python start_agent.py import os from dotenv import load_dotenv from premsql.playground import AgentServer from premsql.agents import BaseLineAgent from premsql.generators import Text2SQLGeneratorPremAI from premsql.executors import ExecutorUsingLangChain from premsql.agents.tools import SimpleMatplotlibTool load_dotenv() text2sql_model = Text2SQLGeneratorPremAI( model_name="gpt-4o", experiment_name="text2sql_model", type="test", premai_api_key=os.environ.get("PREMAI_API_KEY"), project_id=os.environ.get("PREMAI_PROJECT_ID") ) analyser_plotter_model = Text2SQLGeneratorPremAI( model_name="gpt-4o", experiment_name="text2sql_model", type="test", premai_api_key=os.environ.get("PREMAI_API_KEY"), project_id=os.environ.get("PREMAI_PROJECT_ID") ) # Enter your Database path here. Supported SQLite, Postgres, MySQL and an unique session name. db_connection_uri = "" session_name = "" agent = BaseLineAgent( session_name=session_name, db_connection_uri=db_connection_uri, specialized_model1=text2sql_model, specialized_model2=analyser_plotter_model, executor=ExecutorUsingLangChain(), auto_filter_tables=False, plot_tool=SimpleMatplotlibTool() ) # Query the database response = agent( "/query show me the phone numbers of direct charter-funded schools opened after 2000/1/1" ) # Analyze the results analysis = agent( "/analyse what patterns do you see in the data?" ) # Create a visualization plot = agent( "/plot create a bar chart showing school counts by year" ) ``` You can launch the PremSQL Playground (as shown in the above video by adding these two additional lines after instantiating Agent) ```python agent_server = AgentServer(agent=agent, port={port}) agent_server.launch() ``` And then open two terminal. On one side write: ```bash premsql launch all ``` and on the second side of the terminal write: ```bash python start_agent.py ``` ## 📦 Components Overview ### [Datasets](https://docs.premai.io/premsql/introduction) PremSQL provides a simple API to use various pre-processed datasets for Text-to-SQL tasks. Text-to-SQL is complex as it requires data dependencies on databases and tables. The premsql datasets help streamline this by providing easy access to datasets and enabling you to create your own datasets with private databases. Currently, the following datasets are readily available: 1. [BirdBench Dataset](https://huggingface.co/datasets/premai-io/birdbench) 2. [Spider Unified Datasets](https://huggingface.co/datasets/premai-io/spider) 3. [Domains Dataset](https://huggingface.co/datasets/premai-io/domains) 4. [Gretel AI Dataset](https://huggingface.co/datasets/gretelai/synthetic_text_to_sql) **Example usage:** ```python from premsql.datasets import Text2SQLDataset bird_dataset = Text2SQLDataset( dataset_name='bird', split="train", force_download=False, dataset_folder="/path/to/your/data" # change this to the path where you want to store the dataset ) ``` ### Generators PremSQL generators are responsible for converting natural language questions into SQL queries. Think of these as modular inference APIs specific to text-to-SQL. You can integrate various third-party APIs, models, or custom pipelines. **Example:** ```python from premsql.generators import Text2SQLGeneratorHF from premsql.datasets import Text2SQLDataset # Define a dataset dataset = bird_dataset = Text2SQLDataset( dataset_name='bird', split="train", force_download=False, dataset_folder="/path/to/dataset" ).setup_dataset(num_rows=10, num_fewshot=3) # Define a generator generator = Text2SQLGeneratorHF( model_or_name_or_path="premai-io/prem-1B-SQL", experiment_name="test_generators", device="cuda:0", type="test" ) # Generate on the full dataset responses = generator.generate_and_save_results( dataset=bird_dataset, temperature=0.1, max_new_tokens=256 ) print(responses) ``` Results are saved in the experiment_path as predict.json. We also support execution guided decoding. This strategy executes the generated SQL against the DB and, if it fails, uses the error message for correction, repeating until it gets a valid result or the retries run out. ![alt text](/assets/execution_guided_decoding.png) A quick glance on execution guided decoding: ```python from premsql.executors import SQLiteExecutor executor = SQLiteExecutor() response = generator.generate_and_save_results( dataset=bird_dataset, temperature=0.1, max_new_tokens=256, force=True, executor=executor, max_retries=5 # this is optional (default is already set to 5) ) ``` ### [Executors](https://docs.premai.io/premsql/executors) An executor executes the generated SQL queries against the database and fetches the results. It is a crucial component in the Text-to-SQL pipeline, as it ensures that the generated SQL queries are valid and return the expected results. PremSQL supports a native executor for SQLite databases and also supports [LangChain's SQLDatabase](https://python.langchain.com/v0.2/docs/integrations/tools/sql_database/) as an executor. **Example usage** ```python from premsql.executors import SQLiteExecutor # Instantiate the executor executor = SQLiteExecutor() # Set a sample dataset path db_path = "./data/db/california_schools.sqlite" sql = 'SELECT movie_title FROM movies WHERE movie_release_year = 1945 ORDER BY movie_popularity DESC LIMIT 1' # execute the SQL result = executor.execute_sql( sql=sql, dsn_or_db_path=db_path ) print(result) ``` This will show: ```python {'result': [('Brief Encounter',)], 'error': None, 'execution_time': 0.03717160224914551} ``` ### [Evaluators](https://docs.premai.io/premsql/evaluators) Executors connect to databases and execute SQL, while evaluators assess the performance of your models against predefined metrics like Execution Accuracy (EX) and Valid Efficiency Score (VES). **Example Usage:** ```python from premsql.executors import SQLiteExecutor from premsql.evaluator import Text2SQLEvaluator # Define the executor executor = SQLiteExecutor() # Define the evaluator evaluator = Text2SQLEvaluator( executor=executor, experiment_path=generator.experiment_path ) # Now evaluate the models results = evaluator.execute( metric_name="accuracy", model_responses=response, filter_by="db_id", meta_time_out=10 ) print(results) ``` Using the `filter_by` option to filter results by `db_id` allows you to see overall accuracy and its distribution across different databases. If a key like `difficulty` is available, it will show performance distribution over various difficulty levels. Filtering evaluations by available keys helps in analyzing and understanding model performance empirically. Below is a visualization of model performance across different databases based on the applied filters. ![alt text](/assets/eval_result_filtered.png) ### [Error Handling](https://docs.premai.io/premsql/error_dataset) Error-handling prompts are crucial for refining model performance, especially in complex tasks like Text-to-SQL generation. The prompts help the model learn how to handle errors by providing additional context and guidance based on past mistakes. By training on these prompts, the model can self-correct during inference, improving the quality of its output. **Example Error Correction Prompt:** ```plaintext {existing_prompt} # Generated SQL: {sql} ## Error Message {error_msg} Carefully review the original question and error message, then rewrite the SQL query to address the identified issues. ``` To create a self-correction / error-correction dataset: - You start with an existing training dataset - You run an evaluation on that training dataset using an un-trained model. - You gather the data and pass it to the error-handling prompt - Finally, you save the results ready to be used for fine-tuning. Here is the code to get started to make a self-correction dataset using existing datasets: ```python from premsql.datasets.error_dataset import ErrorDatasetGenerator from premsql.generators.huggingface import Text2SQLGeneratorHF from premsql.executors.from_langchain import ExecutorUsingLangChain from premsql.datasets import BirdDataset generator = Text2SQLGeneratorHF( model_or_name_or_path="premai-io/prem-1B-SQL", experiment_name="testing_error_gen", type="train", # do not type: 'test' since this will be used during training device="cuda:0" ) executor = ExecutorUsingLangChain() bird_train = BirdDataset( split="train", dataset_folder="/path/to/dataset" ).setup_dataset(num_rows=10) error_dataset_gen = ErrorDatasetGenerator(generator=generator, executor=executor) error_dataset = error_dataset_gen.generate_and_save( datasets=bird_train, force=True ) ``` ### [Tuner](https://docs.premai.io/premsql/tuner) `premsql tuner` is a module designed to fine-tune models specifically for text-to-SQL tasks. The module offers multiple ways of fine-tuning, providing flexibility based on your project's needs. ### Supported Fine-Tuning Methods 1. **Full Fine-Tuning**: Standard model fine-tuning with all its parameters. 2. **PEFT using LoRA**: Parameter-efficient-fine-tuning with LoRA (Low-Rank Adaptation) for faster and more efficient training. 3. **PEFT using QLoRA**: Another PEFT approach using Quantized LoRA, optimizing resource use during training. In addition to these methods, you can create custom fine-tuning pipelines using the components and tools provided by premsql. ### Agents Agents has been quite popular for a while. Simply we can define agents as an orchestrated workflows between different LLMs/SLMs. PremSQL Agents are mainly focussed to execute tasks related to Databases. Breifly PremSQL agents can: ![](/assets/agent_flow.png) - Query (`/query`) to a database from user’s natural language input. - Analyse (`/analyse`) the database output and user query and give back a answer in natural language. - Plot (`/plot`) basic charts based on user’s query. - Lastly anything (`/followup`) which does not fit the above three categories, it can give you a followup on what do next. PremSQL comes with a minimal agentic implementation (more implementation variants will come in later versions), which can query to a DB, provide analysis over dataframes and answer user questions and plot simple graphs. This is how you use our baseline Text to SQL agent. ```python import os from dotenv import load_dotenv from premsql.playground import AgentServer from premsql.agents import BaseLineAgent from premsql.generators import Text2SQLGeneratorPremAI from premsql.executors import ExecutorUsingLangChain from premsql.agents.tools import SimpleMatplotlibTool load_dotenv() text2sql_model = Text2SQLGeneratorPremAI( model_name="gpt-4o", experiment_name="text2sql_model", type="test", premai_api_key=os.environ.get("PREMAI_API_KEY"), project_id=os.environ.get("PREMAI_PROJECT_ID") ) analyser_plotter_model = Text2SQLGeneratorPremAI( model_name="gpt-4o", experiment_name="text2sql_model", type="test", premai_api_key=os.environ.get("PREMAI_API_KEY"), project_id=os.environ.get("PREMAI_PROJECT_ID") ) # Enter your Database path here. Supported SQLite, Postgres, MySQL and an unique session name. db_connection_uri = "" session_name = "" agent = BaseLineAgent( session_name=session_name, db_connection_uri=db_connection_uri, specialized_model1=text2sql_model, specialized_model2=analyser_plotter_model, executor=ExecutorUsingLangChain(), auto_filter_tables=False, plot_tool=SimpleMatplotlibTool() ) # Query the database response = agent( "/query show me the phone numbers of direct charter-funded schools opened after 2000/1/1" ) # Analyze the results analysis = agent( "/analyse what patterns do you see in the data?" ) # Create a visualization plot = agent( "/plot create a bar chart showing school counts by year" ) ``` You can learn more about PremSQL agents and their design patterns in details in [the documentation](https://docs.premai.io/premsql/introduction). ### Playground You can think of Playground as a similar environment like chatGPT UI for specialized for RAGs on databases. There are different personas of usage of the PremSQL playground. To launch the Playground you need to write in the terminal: ```bash premsql launch all ``` This will run two things: - Django Backend API server (runing on port 8000) - Streamlit UI which is our Playground. In the above section you have see how we have defined our agent. You can deploy this agent anywhere using the AgentServer which is a fastapi wrapper. Using this you can either deploy as many instances of PremSQL Baseline agent or your own agent of your choice and connect it to the playground either to test it or use it for your internal database. Here is how you define your server and launch it. ```python # File name: start_agent_server.py from premsql.playground import AgentServer from premsql.agents import BaseLineAgent # Define your agent as shown above: agent = BaseLineAgent(...) agent_server = AgentServer(agent=agent, port={port}) agent_server.launch() ``` Now inside another terminal write: ```bash python start_agent_server.py ``` This can be any python file name. This will run a fastapi server. You need to paste the deployed url and paste it inside `Register New Session` part of the UI. Below shows, how the basic backend architecture looks like on how Playground communicates with the server. ![](/assets/agent_server.png) As you can see from the above architecture, you can create independent sessions using the starter script. You can do different levels of customization on this. For instance: - You can use different generators and different models - You can add your own DB executor - Last but not the least, you can add a new worker or make your own agent using combination of our pre-existing worker implementations and your own logics. So, you can add as many such agents with different customization or your own PremSQL compatible agents and test them and use them with PremSQL Playground. You can learn about more technical details in [the documentation](https://docs.premai.io/premsql/introduction). ## 🛣️ Roadmap PremSQL is continuously evolving, with exciting features planned for future releases: - **Synthesizer Component**: A tool to generate synthetic datasets from private data, enabling fully private text-to-SQL workflows and enhancing model fine-tuning capabilities. - **Training Better Small Language Models**: Ongoing training and optimization of small language models specifically tailored to PremSQL’s unique requirements, ensuring efficient and effective performance in text-to-SQL tasks. - **Optimization of Generators and Executors**: Improvements to enhance the robustness of existing components, including parallel processing to speed up generation and execution times. - **Standard Tests and Stability Improvements**: Introduction of comprehensive tests for greater stability of the library and the planned rollout of a simple user interface to improve the overall user experience. Stay tuned for these exciting updates! We encourage you to contribute and provide feedback to help us shape the future of PremSQL. ## 📝 License PremSQL is licensed under the MIT License. See the [LICENSE](LICENSE) file for more information. ## ☘️ Citation ``` @misc{Anindyadeep2024PremSQL, author = {Anindyadeep}, title = {PremSQL: End-to-End Local-First Text-to-SQL Pipelines}, year = {2024}, publisher = {GitHub}, journal = {GitHub repository}, howpublished = {\url{https://github.com/premAI-io/premsql}}, note = {Accessed: 2024-12-10} } ``` ================================================ FILE: examples/agent_server.ipynb ================================================ { "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "/Users/anindya/personal/PremSQL/v2_agent/premsql\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/Users/anindya/Library/Caches/pypoetry/virtualenvs/text2sql-jLjiS8B5-py3.11/lib/python3.11/site-packages/IPython/core/magics/osm.py:417: UserWarning: This is now an optional IPython functionality, setting dhist requires you to install the `pickleshare` library.\n", " self.shell.db['dhist'] = compress_dhist(dhist)[-100:]\n" ] } ], "source": [ "cd .." ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import random" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[7546]" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "random.sample(range(7000, 9000), k=1)" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "8194" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "random.choice(range(7000, 9000))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from premsql.generators import Text2SQLGeneratorOpenAI\n", "\n", "Text2SQLGeneratorOpenAI(openai_api_key=)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Create a file named `serve.py` (or it could be anything) and add the following lines there:\n", "\n", "```Python\n", "from premsql.playground import AgentServer\n", "from premsql.agents import BaseLineAgent\n", "from premsql.generators import Text2SQLGeneratorMLX\n", "from premsql.executors import ExecutorUsingLangChain\n", "from premsql.agents.tools import SimpleMatplotlibTool\n", "\n", "db_connection_uri = (\n", " \"sqlite://///Users/anindya/personal/PremSQL/v2_agent/premsql/codebase_community.sqlite\"\n", ")\n", "text2sql_model = Text2SQLGeneratorMLX(\n", " model_name_or_path=\"premai-io/prem-1B-SQL\", experiment_name=\"text2sql_model\", type=\"test\"\n", ")\n", "\n", "analyser_plotter_model = Text2SQLGeneratorMLX(\n", " model_name_or_path=\"meta-llama/Llama-3.2-1B-Instruct\", experiment_name=\"analyser_model\", type=\"test\",\n", ")\n", "\n", "baseline = BaseLineAgent(\n", " session_name=\"local_db_rag\", # An unique session name must be put\n", " db_connection_uri=db_connection_uri, # DB which needs to connect for Text to SQL \n", " specialized_model1=text2sql_model, # This referes to the Text to SQL model\n", " specialized_model2=analyser_plotter_model, # This refers to any model other than Text to SQL\n", " executor=ExecutorUsingLangChain(), # Which DB executor to use\n", " auto_filter_tables=False, # Whether to filter tables before Text to SQL or not (uses LLM)\n", " plot_tool=SimpleMatplotlibTool() # Matplotlib Tool which will be used by plotter worker\n", ")\n", "\n", "agent_server = AgentServer(agent=baseline, port=8263)\n", "agent_server.launch()\n", "```\n", "\n", "After this just run:\n", "\n", "```bash\n", "python serve.py\n", "```\n", "\n", "You will see a FastAPI server got started at your mentioned port with the following output:\n", "\n", "```bash\n", "INFO: Started server process [78518]\n", "INFO: Waiting for application startup.\n", "2024-10-28 00:29:46,953 - [FASTAPI-INFERENCE-SERVICE] - INFO - Starting up the application\n", "INFO: Application startup complete.\n", "INFO: Uvicorn running on http://0.0.0.0:8263 (Press CTRL+C to quit)\n", "```\n", "\n", "This means that our server has started now we can query it with our Terminal using Curl or Python requests or Javascript axios. " ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [], "source": [ "from premsql.playground import InferenceServerAPIClient\n", "from premsql.agents.tools import SimpleMatplotlibTool" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "text2sql-jLjiS8B5-py3.11", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.10" } }, "nbformat": 4, "nbformat_minor": 2 } ================================================ FILE: examples/agents.ipynb ================================================ { "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "/Users/anindya/personal/PremSQL/v2_agent/premsql\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/Users/anindya/Library/Caches/pypoetry/virtualenvs/text2sql-jLjiS8B5-py3.11/lib/python3.11/site-packages/IPython/core/magics/osm.py:417: UserWarning: This is now an optional IPython functionality, setting dhist requires you to install the `pickleshare` library.\n", " self.shell.db['dhist'] = compress_dhist(dhist)[-100:]\n" ] } ], "source": [ "cd .." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Agents\n", "\n", "We are all familiar about agents. Simply we can define agents as an orchestrated workflows between different LLMs. In PremSQL we are bringing the very first versions of Text2SQL agents. Agents in PremSQL are made using the available modular components like generators, executors etc. \n", "\n", "You can even extend agents with your custom logic and workflows with very less number of code using PremSQL. We will explore this in a the coming sections. Agents for Database specific RAGs mainly consist of the following tasks:\n", "\n", "1. Executing Queries to databases from natural language contexts (a.k.a Text to SQL).\n", "2. Analysing the table and giving out insights in natural language. \n", "3. Plotting different graphs to draw out relationship between entities from natural language questions. \n", "4. A followup which includes error handling of agents and asking followup questions from the user. \n", "\n", "Additionally we maintain a memory that keep tracks of the previous conversation which it uses as context to get the current result. So to summarise, PremSQL has four \"routes\" that it needs to define before running. Here is a schematic diagram to understand how\n", "PremSQL agents works. " ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/html": [ "" ], "text/plain": [ "" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from IPython.display import HTML\n", "HTML('')\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "So in any typical DB based Agentic RAG workflow, the following sequence of event happens:\n", "\n", "1. user asks a query. In PremSQL if you want to ask a query for:\n", " - for Text to SQL, then use `/query`\n", " - for analysing the output dataframe then use `/analyse` \n", " - for plotting something `/plot`\n", " - anything else goes under `/followup`. If you do not provide these markers, it goes to followup\n", " route by default. We can also implement an \"LLM\" based router, but we think it is an overkill. \n", "\n", "2. Once user provides a query specificying the proper routes, it goes to the following set of \"Workers\". Workers are the specialized components whose job is to complete one specific task. So each worker has some specific set of output schema. You can learn more about different output schema [here](/premsql/agents/models.py)\n", "\n", "3. Once the worker processes the input, it provides some output. Then our output parser parses the output and gives back the result to the user. Additionally it updates the memory. \n", "\n", "### Building on top of Workers\n", "\n", "So the above workflow is fixed in PremSQL. However you can create your custom Text to SQL / Analyser / Plotter or Followup worker. As long as it adheres with the [output schema](/premsql/agents/models.py), it will be compatible and used with other PremSQL features like Agent Server and Playground. \n", "\n", "### Now Let's watch Agents in action" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/Users/anindya/Library/Caches/pypoetry/virtualenvs/text2sql-jLjiS8B5-py3.11/lib/python3.11/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", " from .autonotebook import tqdm as notebook_tqdm\n" ] } ], "source": [ "# Since this demo is done on Mac, so I am using MLX. However same can be done with PremSDK, HF and OpenAI sdk. \n", "\n", "from premsql.agents import BaseLineAgent\n", "from premsql.generators import Text2SQLGeneratorMLX\n", "from premsql.executors import ExecutorUsingLangChain\n", "from premsql.agents.tools import SimpleMatplotlibTool" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "2024-10-28 00:03:38,828 - [GENERATOR] - INFO - Experiment folder found in: experiments/test/text2sql_model\n", "Fetching 9 files: 100%|██████████| 9/9 [00:00<00:00, 127100.12it/s]\n", "Fetching 9 files: 100%|██████████| 9/9 [00:00<00:00, 76260.07it/s]\n", "2024-10-28 00:03:40,709 - [GENERATOR] - INFO - Experiment folder found in: experiments/test/analyser_model\n", "Fetching 8 files: 100%|██████████| 8/8 [00:00<00:00, 68900.27it/s]\n", "Fetching 8 files: 100%|██████████| 8/8 [00:00<00:00, 57952.39it/s]\n" ] } ], "source": [ "# Define the generator that will do the Text to SQL task\n", "\n", "text2sql_model = Text2SQLGeneratorMLX(\n", " model_name_or_path=\"premai-io/prem-1B-SQL\", experiment_name=\"text2sql_model\", type=\"test\"\n", ")\n", "\n", "analyser_plotter_model = Text2SQLGeneratorMLX(\n", " model_name_or_path=\"meta-llama/Llama-3.2-1B-Instruct\", experiment_name=\"analyser_model\", type=\"test\",\n", ")" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "# Now define your agent\n", "db_connection_uri = (\n", " \"sqlite://///Users/anindya/personal/PremSQL/v2_agent/premsql/codebase_community.sqlite\"\n", ")\n", "\n", "baseline = BaseLineAgent(\n", " session_name=\"local_db_rag\", # An unique session name must be put\n", " db_connection_uri=db_connection_uri, # DB which needs to connect for Text to SQL \n", " specialized_model1=text2sql_model, # This referes to the Text to SQL model\n", " specialized_model2=analyser_plotter_model, # This refers to any model other than Text to SQL\n", " executor=ExecutorUsingLangChain(), # Which DB executor to use\n", " auto_filter_tables=False, # Whether to filter tables before Text to SQL or not (uses LLM)\n", " plot_tool=SimpleMatplotlibTool() # Matplotlib Tool which will be used by plotter worker\n", ")" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "2024-10-28 00:03:42,866 - [BASELINE-ROUTER] - INFO - Routing to: query\n", "2024-10-28 00:03:46,238 - [BASELINE-TEXT2SQL-WORKER] - INFO - Taking the following selected table in schema: ['badges', 'comments', 'posts', 'tags', 'users', 'votes']\n", "2024-10-28 00:03:49,252 - [PIPELINE-MEMORY] - INFO - Pushed to the database\n" ] } ], "source": [ "output = baseline(\n", " question=\"/query what all tables are present in the database\"\n", ")" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
typenametbl_namerootpagesql
0tablebadgesbadges4CREATE TABLE badges\\n(\\n Id INTEGER ...
1tablecommentscomments5645CREATE TABLE comments\\n(\\n Id ...
2tablepostHistorypostHistory5646CREATE TABLE postHistory\\n(\\n Id ...
3indexsqlite_autoindex_postHistory_1postHistory5647None
4tablepostLinkspostLinks5648CREATE TABLE postLinks\\n(\\n Id I...
5tablepostsposts5649CREATE TABLE posts\\n(\\n Id ...
6indexsqlite_autoindex_posts_1posts5650None
7tabletagstags5651CREATE TABLE tags\\n(\\n Id INTEGE...
8tableusersusers5652CREATE TABLE users\\n(\\n Id INT...
9indexsqlite_autoindex_users_1users5653None
10tablevotesvotes5656CREATE TABLE votes\\n(\\n Id INTEGE...
\n", "
" ], "text/plain": [ " type name tbl_name rootpage \\\n", "0 table badges badges 4 \n", "1 table comments comments 5645 \n", "2 table postHistory postHistory 5646 \n", "3 index sqlite_autoindex_postHistory_1 postHistory 5647 \n", "4 table postLinks postLinks 5648 \n", "5 table posts posts 5649 \n", "6 index sqlite_autoindex_posts_1 posts 5650 \n", "7 table tags tags 5651 \n", "8 table users users 5652 \n", "9 index sqlite_autoindex_users_1 users 5653 \n", "10 table votes votes 5656 \n", "\n", " sql \n", "0 CREATE TABLE badges\\n(\\n Id INTEGER ... \n", "1 CREATE TABLE comments\\n(\\n Id ... \n", "2 CREATE TABLE postHistory\\n(\\n Id ... \n", "3 None \n", "4 CREATE TABLE postLinks\\n(\\n Id I... \n", "5 CREATE TABLE posts\\n(\\n Id ... \n", "6 None \n", "7 CREATE TABLE tags\\n(\\n Id INTEGE... \n", "8 CREATE TABLE users\\n(\\n Id INT... \n", "9 None \n", "10 CREATE TABLE votes\\n(\\n Id INTEGE... " ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "output.show_output_dataframe()" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "2024-10-28 00:04:50,910 - [BASELINE-ROUTER] - INFO - Routing to: analyse\n", "2024-10-28 00:04:56,051 - [PIPELINE-MEMORY] - INFO - Pushed to the database\n" ] } ], "source": [ "analysis = baseline(\n", " question=\"/analyse Which tables I should use for understand relation about user votes\"\n", ")" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'Use the votes table to understand the relation about user votes.'" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "analysis.analysis" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "2024-10-28 00:05:45,790 - [BASELINE-ROUTER] - INFO - Routing to: query\n", "2024-10-28 00:05:48,085 - [BASELINE-TEXT2SQL-WORKER] - INFO - Taking the following selected table in schema: ['votes']\n", "2024-10-28 00:05:48,704 - [PIPELINE-MEMORY] - INFO - Pushed to the database\n" ] } ], "source": [ "output = baseline(\n", " question=\"/query show me the first 10 rows in votes\"\n", ")" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
IdPostIdVoteTypeIdCreationDateUserIdBountyAmount
01322010-07-19NoneNone
12222010-07-19NoneNone
23522010-07-19NoneNone
34522010-07-19NoneNone
45322010-07-19NoneNone
56422010-07-19NoneNone
67222010-07-19NoneNone
710322010-07-19NoneNone
811522010-07-19NoneNone
912622010-07-19NoneNone
\n", "
" ], "text/plain": [ " Id PostId VoteTypeId CreationDate UserId BountyAmount\n", "0 1 3 2 2010-07-19 None None\n", "1 2 2 2 2010-07-19 None None\n", "2 3 5 2 2010-07-19 None None\n", "3 4 5 2 2010-07-19 None None\n", "4 5 3 2 2010-07-19 None None\n", "5 6 4 2 2010-07-19 None None\n", "6 7 2 2 2010-07-19 None None\n", "7 10 3 2 2010-07-19 None None\n", "8 11 5 2 2010-07-19 None None\n", "9 12 6 2 2010-07-19 None None" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "output.show_output_dataframe()" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'SELECT * FROM votes LIMIT 10;'" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# You can also see what was the SQL used\n", "output.sql_string" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "2024-10-28 00:07:39,085 - [BASELINE-ROUTER] - INFO - Routing to: query\n", "2024-10-28 00:07:42,631 - [BASELINE-TEXT2SQL-WORKER] - INFO - Error while selecting table: 'include'\n", "2024-10-28 00:07:42,632 - [BASELINE-TEXT2SQL-WORKER] - INFO - Taking the following selected table in schema: ['votes']\n", "2024-10-28 00:07:43,507 - [PIPELINE-MEMORY] - INFO - Pushed to the database\n" ] } ], "source": [ "output = baseline(\"/query what is the max and min value of creation date in votes\")" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
max(CreationDate)min(CreationDate)
02011-05-012010-07-19
\n", "
" ], "text/plain": [ " max(CreationDate) min(CreationDate)\n", "0 2011-05-01 2010-07-19" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "output.show_output_dataframe()" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "2024-10-28 00:08:46,680 - [BASELINE-ROUTER] - INFO - Routing to: query\n", "2024-10-28 00:08:48,365 - [BASELINE-TEXT2SQL-WORKER] - INFO - Taking the following selected table in schema: ['votes']\n", "2024-10-28 00:08:49,891 - [PIPELINE-UTILS] - INFO - Truncating output table to first 200 rows only\n", "2024-10-28 00:08:49,893 - [PIPELINE-MEMORY] - INFO - Pushed to the database\n" ] } ], "source": [ "output = baseline(\"/query show me all the rows in votes where creation date was in the month of march 2011\")" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
IdPostIdVoteTypeIdCreationDateUserIdBountyAmount
033262767222011-03-01NaNNaN
133263764822011-03-01NaNNaN
233264772122011-03-01NaNNaN
333265767422011-03-01NaNNaN
433266768722011-03-01NaNNaN
.....................
19533483116422011-03-02NaNNaN
19633484116452011-03-021720.0NaN
19733485559122011-03-02NaNNaN
19833486559152011-03-021720.0NaN
19933487776422011-03-02NaNNaN
\n", "

200 rows × 6 columns

\n", "
" ], "text/plain": [ " Id PostId VoteTypeId CreationDate UserId BountyAmount\n", "0 33262 7672 2 2011-03-01 NaN NaN\n", "1 33263 7648 2 2011-03-01 NaN NaN\n", "2 33264 7721 2 2011-03-01 NaN NaN\n", "3 33265 7674 2 2011-03-01 NaN NaN\n", "4 33266 7687 2 2011-03-01 NaN NaN\n", ".. ... ... ... ... ... ...\n", "195 33483 1164 2 2011-03-02 NaN NaN\n", "196 33484 1164 5 2011-03-02 1720.0 NaN\n", "197 33485 5591 2 2011-03-02 NaN NaN\n", "198 33486 5591 5 2011-03-02 1720.0 NaN\n", "199 33487 7764 2 2011-03-02 NaN NaN\n", "\n", "[200 rows x 6 columns]" ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ "output.show_output_dataframe()" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "2024-10-28 00:10:06,403 - [BASELINE-ROUTER] - INFO - Routing to: plot\n", "2024-10-28 00:10:06,407 - [PLOT-WORKER] - INFO - Going for generation\n", "2024-10-28 00:10:07,197 - [PLOT-WORKER] - INFO - Plot config: {'x': 'VoteTypeId', 'y': 'CreationDate', 'plot_type': 'scatter'}\n", "2024-10-28 00:10:07,274 - [PLOT-WORKER] - INFO - Done base64 conversion\n", "2024-10-28 00:10:07,276 - [PIPELINE-MEMORY] - INFO - Pushed to the database\n" ] }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAA90AAAJOCAYAAACqS2TfAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAABTDElEQVR4nO3dd3RU1d7G8WfSQ0ICAQIEIQnNEAiCmCtFmnQQBESKSrVy6SoXUVqQot6roNJsF0QFBAWUJtJFmgiCdJCq9GZCaIHMef/gZl6GFJKQzSTh+1lrluScPef8Zs4k5sneZ2+bZVmWAAAAAABAlnNzdQEAAAAAAORWhG4AAAAAAAwhdAMAAAAAYAihGwAAAAAAQwjdAAAAAAAYQugGAAAAAMAQQjcAAAAAAIYQugEAAAAAMITQDQAAAACAIYRuAABScejQIdlsNk2ZMsXVpSCLhYWFqUuXLq4uI8dZuXKlbDabVq5c6epSACDHIHQDwD1o27ZtatOmjUJDQ+Xj46NixYqpQYMG+vDDD42dc9q0aRo7dmyy7ceOHdOwYcO0ZcsWY+e+VVJwSHp4enqqZMmS6tSpkw4cOJAl51i7dq2GDRumv//+O1PP37x5s2w2mwYNGpRqm3379slms+nll19O93FTuw7pMWzYMKf3LbVHnTp1MnX8O9WlSxf5+/u75Nx3Ii4uTjExMXrggQfk7+8vX19fVahQQQMGDNCxY8dcUtOECRNc+semmz9PHh4eCgoKUpUqVdSnTx/t3Lkz08e9dOmShg0bxh8NANxVHq4uAABwd61du1Z169ZViRIl9Pzzz6tIkSL6888/tX79er3//vvq1auXkfNOmzZN27dvV9++fZ22Hzt2TDExMQoLC1OlSpWMnDs1vXv3VnR0tK5du6bNmzfr448/1oIFC7Rt2zaFhITc0bHXrl2rmJgYdenSRfny5cvw8x988EFFRERo+vTpGjFiRIptpk2bJkl65pln0n3c1K5DerRu3VqlS5d2fB0fH6/u3burVatWat26tWN74cKFM3zse9WBAwdUv359HTlyRE8++aReeOEFeXl56ffff9dnn32mOXPmaO/evXe9rgkTJqhgwYLJRgPUqlVLly9flpeXl/EaGjRooE6dOsmyLMXGxmrr1q36/PPPNWHCBL399tsZ+mNTkkuXLikmJkaSXPbHIQD3HkI3ANxjRo4cqcDAQG3cuDFZGDx16pRrijLg4sWL8vPzS7NNzZo11aZNG0lS165dVbZsWfXu3Vuff/65Bg4ceDfKTNPTTz+twYMHa/369apatWqy/dOnT1dERIQefPDBu1JPxYoVVbFiRcfXZ86cUffu3VWxYsUMBX/ccP36dbVu3VonT57UypUr9cgjjzjtHzlypN5+++00j3Hp0iXlyZPHZJlO3Nzc5OPjc1fOVbZs2WSfq7feekvNmzfXK6+8ooiICDVt2vSu1AIAd4Lh5QBwj9m/f7/Kly+fYu9rcHBwsm1ffvml/vGPfyhPnjzKnz+/atWqpR9//NGx/7vvvlOzZs0UEhIib29vlSpVSm+++aYSExMdberUqaMFCxbo8OHDjiGjYWFhWrlypaKjoyXdCL1J+24e1rphwwY1btxYgYGBypMnj2rXrq01a9Y41Zg07Hnnzp166qmnlD9//mQBJj0effRRSdLBgwfTbLd8+XLVrFlTfn5+ypcvnx5//HHt2rXLqZ7+/ftLksLDwx2v69ChQ5JuhNXdu3fr0qVLaZ7n6aeflvT/Pdo327Rpk/bs2eNoI93onSxfvry8vb0VEhKiHj16OA1vT+06JLl69aqGDh2q0qVLy9vbW8WLF9e//vUvXb16Nc06kxw4cEA2m01jxoxJtm/t2rWy2WyaPn264z2y2WzavXu32rZtq4CAABUoUEB9+vTRlStXkj3/yy+/VJUqVeTr66ugoCC1b99ef/75521rsixLI0aM0H333ac8efKobt262rFjx22fd+3aNQUFBalr167J9sXFxcnHx0evvvqqY9uHH36o8uXLO75PHnrooRSv282+/fZbbd26VW+88UaKn9eAgACNHDnS8XWdOnVUoUIFbdq0SbVq1VKePHn0+uuvS0r/tZs8ebIeffRRBQcHy9vbW5GRkZo4caJTm7CwMO3YsUOrVq1KdstAavd0z5o1y3F9ChYsqGeeeUZHjx51apM0/P/o0aNq2bKl/P39VahQIb366qtOPy/SUqBAAc2YMUMeHh5O701CQoKGDBmiKlWqKDAwUH5+fqpZs6ZWrFjhaHPo0CEVKlRIkhQTE+N4bcOGDXO02b17t9q0aaOgoCD5+PjooYce0vfff5+u2gAgNfR0A8A9JjQ0VOvWrdP27dtVoUKFNNvGxMRo2LBhql69uoYPHy4vLy9t2LBBy5cvV8OGDSVJU6ZMkb+/v15++WX5+/tr+fLlGjJkiOLi4vTvf/9bkvTGG28oNjZWf/31lyOQ+fv7q1y5cho+fLiGDBmiF154QTVr1pQkVa9eXdKNcNukSRNVqVJFQ4cOlZubmyM0rF69Wv/4xz+c6n3yySdVpkwZjRo1SpZlZfi92b9/v6Qbv9inZunSpWrSpIlKliypYcOG6fLly/rwww9Vo0YNbd68WWFhYWrdurX27t2r6dOna8yYMSpYsKAkOX7hHzdunGJiYrRixYo0h7iGh4erevXqmjlzpsaMGSN3d3fHvqRA99RTT0m6EWJjYmJUv359de/eXXv27NHEiRO1ceNGrVmzRp6enqleB0my2+1q0aKFfv75Z73wwgsqV66ctm3bpjFjxmjv3r2aO3fubd+/kiVLqkaNGvrqq6/Ur18/p31fffWV8ubNq8cff9xpe9u2bRUWFqbRo0dr/fr1+uCDD3T+/HlNnTrV0WbkyJEaPHiw2rZtq+eee06nT5/Whx9+qFq1aum3335Lc/j+kCFDNGLECDVt2lRNmzbV5s2b1bBhQyUkJKT5Wjw9PdWqVSvNnj1bH330kdNw6rlz5+rq1atq3769JOmTTz5R79691aZNG8cfDX7//Xdt2LDBcX1SkhTmOnbsmGYtNzt79qyaNGmi9u3b65lnnlHhwoUzdO0mTpyo8uXLq0WLFvLw8NC8efP0z3/+U3a7XT169JAkjR07Vr169ZK/v7/eeOMNSWnfMjBlyhR17dpV0dHRGj16tE6ePKn3339fa9asSXZ9EhMT1ahRIz388MP6z3/+o6VLl+rdd99VqVKl1L1793S9ByVKlFDt2rW1YsUKxcXFKSAgQHFxcfr000/VoUMHPf/887pw4YI+++wzNWrUSL/88osqVaqkQoUKaeLEicluiUgavbFjxw7VqFFDxYoV02uvvSY/Pz/NnDlTLVu21LfffqtWrVql+zoBgBMLAHBP+fHHHy13d3fL3d3dqlatmvWvf/3LWrx4sZWQkODUbt++fZabm5vVqlUrKzEx0Wmf3W53/PvSpUvJzvHiiy9aefLksa5cueLY1qxZMys0NDRZ240bN1qSrMmTJyc7R5kyZaxGjRolO194eLjVoEEDx7ahQ4dakqwOHTqk6z1YsWKFJcn673//a50+fdo6duyYtWDBAissLMyy2WzWxo0bLcuyrIMHDyarrVKlSlZwcLB19uxZx7atW7dabm5uVqdOnRzb/v3vf1uSrIMHDyY7f1K9K1asuG2t48ePtyRZixcvdmxLTEy0ihUrZlWrVs2yLMs6deqU5eXlZTVs2NDpWo0bN87xOpOkdh2++OILy83NzVq9erXT9kmTJlmSrDVr1iR7zunTpy1J1tChQx3bPvroI0uStWvXLse2hIQEq2DBglbnzp2TvQctWrRwOuY///lPS5K1detWy7Is69ChQ5a7u7s1cuRIp3bbtm2zPDw8nLZ37tzZ8vPzc3yd9L40a9bM6TP0+uuvW5Kc6knJ4sWLLUnWvHnznLY3bdrUKlmypOPrxx9/3Cpfvnyax0pJ5cqVrcDAwHS3r127tiXJmjRpktP2jFy7lL5fGzVq5PR6LMuyypcvb9WuXTtZ26TvnaTPbkJCghUcHGxVqFDBunz5sqPd/PnzLUnWkCFDHNs6d+5sSbKGDx/udMzKlStbVapUcdomyerRo0cK78INffr0cfqcXL9+3bp69apTm/Pnz1uFCxe2unXr5tiW0mc2Sb169ayoqCinn1t2u92qXr26VaZMmVRrAYDbYXg5ANxjGjRooHXr1qlFixbaunWr3nnnHTVq1EjFihVzGkY5d+5c2e12DRkyRG5uzv+7sNlsjn/7+vo6/n3hwgWdOXNGNWvW1KVLl7R79+5M17llyxbt27dPTz31lM6ePaszZ87ozJkzunjxourVq6effvpJdrvd6TkvvfRShs7RrVs3FSpUSCEhIWrWrJkuXryozz//XA899FCK7Y8fP64tW7aoS5cuCgoKcmyvWLGiGjRooIULF6brvMOGDZNlWemayKldu3by9PR0Gqq8atUqHT161DG0fOnSpUpISFDfvn2drtXzzz+vgIAALViw4LbnmTVrlsqVK6eIiAjHe33mzBnHkPubh+mmpW3btvLx8dFXX33l2LZ48WKdOXMmxfu+k3pXkyRN5Jf0Xs6ePVt2u11t27Z1qqtIkSIqU6ZMmnUlvS+9evVy+symdxK5Rx99VAULFtTXX3/t2Hb+/HktWbJE7dq1c2zLly+f/vrrL23cuDFdx00SFxenvHnzZug53t7eyYa8Z+Ta3fz9GhsbqzNnzqh27do6cOCAYmNjM1SLJP366686deqU/vnPfzrd692sWTNFRESk+Nm79fu0Zs2aGV41IGmExoULFyRJ7u7ujtEIdrtd586d0/Xr1/XQQw9p8+bNtz3euXPntHz5crVt29bxc+zMmTM6e/asGjVqpH379iUbLg8A6cXwcgC4B0VHR2v27NlKSEjQ1q1bNWfOHI0ZM0Zt2rTRli1bFBkZqf3798vNzU2RkZFpHmvHjh0aNGiQli9frri4OKd9mfklPsm+ffskSZ07d061TWxsrPLnz+/4Ojw8PEPnGDJkiGrWrCl3d3cVLFhQ5cqVk4dH6v9rPHz4sCTp/vvvT7avXLlyWrx4cbomcMuIAgUKqFGjRpozZ44mTZokHx8fTZs2TR4eHmrbtm2adXl5ealkyZKO/WnZt2+fdu3a5RgCf6v0TrKXL18+NW/eXNOmTdObb74p6cbQ8mLFijlC4M3KlCnj9HWpUqXk5ubmuP993759siwrWbsknp6eqdaS9LpvfW6hQoWcPjep8fDw0BNPPKFp06bp6tWr8vb21uzZs3Xt2jWn0D1gwAAtXbpU//jHP1S6dGk1bNhQTz31lGrUqJHm8QMCAjIcNosVK5Zs5vCMXLs1a9Zo6NChWrduXbI5BWJjYxUYGJihetL6noiIiNDPP//stM3HxydZnfnz59f58+czdN74+HhJcvqjxeeff653331Xu3fv1rVr1xzb0/Nz4Y8//pBlWRo8eLAGDx6cYptTp06pWLFiGaoTACRCNwDc07y8vBQdHa3o6GiVLVtWXbt21axZszR06NB0Pf/vv/9W7dq1FRAQoOHDh6tUqVLy8fHR5s2bNWDAgGQ90RmR9Nx///vfqS4lduuazDf34qVHVFSU6tevn6n67qZnnnlG8+fP1/z589WiRQt9++23atiwYaohKzPsdruioqL03nvvpbi/ePHi6T5Wp06dNGvWLK1du1ZRUVH6/vvv9c9//jPZiImU3NwjnVSXzWbTokWLnO5pT2J6Xe727dvro48+0qJFi9SyZUvNnDlTEREReuCBBxxtypUrpz179mj+/Pn64Ycf9O2332rChAkaMmSIY3mqlEREROi3337Tn3/+me73N6XPeHqv3f79+1WvXj1FRETovffeU/HixeXl5aWFCxdqzJgxd/T9ml4pXcPM2L59u9zd3R2B+ssvv1SXLl3UsmVL9e/fX8HBwXJ3d9fo0aMdczWkJem1v/rqq2rUqFGKbW5eLg8AMoLQDQCQJMeQ6uPHj0u60eNot9u1c+fOVEPvypUrdfbsWc2ePVu1atVybE9p9u9bw9TttpcqVUrSjd7A7BKMQ0NDJUl79uxJtm/37t0qWLCgo5c7tdeVGS1atFDevHk1bdo0eXp66vz5806zlt9cV8mSJR3bExISdPDgQaf3L633e+vWrapXr94d1964cWMVKlRIX331lR5++GFdunQp1cnC9u3b59QT+ccff8hutztmVS9VqpQsy1J4eLjKli2boTqS3pd9+/Y5vS+nT59Od89qrVq1VLRoUX399dd65JFHtHz5csfkYjfz8/NTu3bt1K5dOyUkJKh169YaOXKkBg4cmOoSW82bN9f06dP15Zdf3tESdem9dvPmzdPVq1f1/fffq0SJEo7tKQ3RT+9n4ObP3q0jGfbs2ePYn5WOHDmiVatWqVq1ao6e7m+++UYlS5bU7NmznWq/9Q+Iqb2upM+Hp6dntvl5AyD34J5uALjHrFixIsWZvZPuoU0aJtqyZUu5ublp+PDhyXrAkp6f1Gt18/ESEhI0YcKEZMf38/NLcbh5Uki9eWkrSapSpYpKlSql//znP46hpDc7ffp0qq/RlKJFi6pSpUr6/PPPnerdvn27fvzxR6c1g1N7XVL6lwxL4uvrq1atWmnhwoWaOHGi/Pz8nGYBr1+/vry8vPTBBx84XYvPPvtMsbGxatasmVNdKV2Htm3b6ujRo/rkk0+S7bt8+bIuXryYrlqlG8OyO3TooJkzZ2rKlCmKiopyWt/7ZuPHj3f6+sMPP5QkNWnSRJLUunVrubu7KyYmJtnn1rIsnT17NtU66tevL09PT3344YdOzx07dmy6X4ubm5vatGmjefPm6YsvvtD169edhpZLSlaDl5eXIiMjZVmW0zDnW7Vp00ZRUVEaOXKk1q1bl2z/hQsXUgz4t0rvtUvp+zU2NlaTJ09O9jw/P78UP7u3euihhxQcHKxJkyY5LU+2aNEi7dq1y+mzlxXOnTunDh06KDEx0em9Sem1bdiwIdn7mrSm+a2vLTg4WHXq1NFHH33k+MPjzVzx8wZA7kFPNwDcY3r16qVLly6pVatWioiIUEJCgtauXauvv/5aYWFhjkmaSpcurTfeeENvvvmmatasqdatW8vb21sbN25USEiIRo8ererVqyt//vzq3LmzevfuLZvNpi+++CLFUF+lShV9/fXXevnllxUdHS1/f381b95cpUqVUr58+TRp0iTlzZtXfn5+evjhhxUeHq5PP/1UTZo0Ufny5dW1a1cVK1ZMR48e1YoVKxQQEKB58+bd7bdP//73v9WkSRNVq1ZNzz77rGPJsMDAQKf1fqtUqSLpxnJp7du3l6enp5o3by4/P790Lxl2s2eeeUZTp07V4sWL9fTTTzvdN16oUCENHDhQMTExaty4sVq0aKE9e/ZowoQJio6OdprALLXr0LFjR82cOVMvvfSSVqxYoRo1aigxMVG7d+/WzJkztXjx4lQnmEtJp06d9MEHH2jFihV6++23U2138OBBtWjRQo0bN9a6dev05Zdf6qmnnnIM3y5VqpRGjBihgQMH6tChQ2rZsqXy5s2rgwcPas6cOXrhhRec1su+WdIa0KNHj9Zjjz2mpk2b6rffftOiRYscy7ilR7t27fThhx9q6NChioqKUrly5Zz2N2zYUEWKFFGNGjVUuHBh7dq1S+PGjVOzZs3SnCjN09NTs2fPVv369VWrVi21bdtWNWrUkKenp3bs2KFp06Ypf/78TutRpyS9165hw4by8vJS8+bN9eKLLyo+Pl6ffPKJgoODkwXNKlWqaOLEiRoxYoRKly6t4ODgFO/J9/T01Ntvv62uXbuqdu3a6tChg2PJsLCwsGRLx2XE3r179eWXX8qyLMXFxWnr1q2aNWuW4uPj9d5776lx48aOto899phmz56tVq1aqVmzZjp48KAmTZqkyMhIpz/a+fr6KjIyUl9//bXKli2roKAgVahQQRUqVND48eP1yCOPKCoqSs8//7xKliypkydPat26dfrrr7+0devWTL8WAPe4uz9hOgDAlRYtWmR169bNioiIsPz9/S0vLy+rdOnSVq9evayTJ08ma//f//7Xqly5suXt7W3lz5/fql27trVkyRLH/jVr1lhVq1a1fH19rZCQEMcSZLplSaz4+HjrqaeesvLly2dJclq26rvvvrMiIyMtDw+PZEt0/fbbb1br1q2tAgUKWN7e3lZoaKjVtm1ba9myZY42SctPnT59Ol3vQdKyR7NmzUqzXUpLhlmWZS1dutSqUaOG5evrawUEBFjNmze3du7cmez5b775plWsWDHLzc3NafmwjCwZluT69etW0aJFLUnWwoULU2wzbtw4KyIiwvL09LQKFy5sde/e3Tp//rxTm7SuQ0JCgvX2229b5cuXd1zvKlWqWDExMVZsbGyy86W1/JJl3Vh2ys3Nzfrrr7+S7Ut6D3bu3Gm1adPGyps3r5U/f36rZ8+eTktPJfn222+tRx55xPLz87P8/PysiIgIq0ePHtaePXscbW5dMsyybiyvFhMTYxUtWtTy9fW16tSpY23fvt0KDQ297ZJhSex2u1W8eHFLkjVixIhk+z/66COrVq1ajs9oqVKlrP79+6f4nqXk/Pnz1pAhQ6yoqCgrT548lo+Pj1WhQgVr4MCB1vHjxx3tateunerSZOm9dt9//71VsWJFy8fHxwoLC7Pefvtt67///W+y5e1OnDhhNWvWzMqbN68lybF82K1LhiX5+uuvHT8ngoKCrKeffjrZdU/p+ljW/38WbibJ8XBzc7Py5ctnVa5c2erTp4+1Y8eOZMew2+3WqFGjrNDQUMvb29uqXLmyNX/+fKtz587Jlshbu3atVaVKFcvLyyvZ53f//v1Wp06drCJFilienp5WsWLFrMcee8z65ptvUnzfASA9bJaVQncEAADAHapcubKCgoK0bNmyZPuGDRummJgYnT59OkO9zgAA5DTc0w0AALLcr7/+qi1btqhTp06uLgUAAJfinm4AAJBltm/frk2bNundd99V0aJFk006BgDAvYaebgAAkGW++eYbde3aVdeuXdP06dNTXS4LAIB7Bfd0AwAAAABgCD3dAAAAAAAYQugGAAAAAMAQJlLLpex2u44dO6a8efPKZrO5uhwAAAAAyFUsy9KFCxcUEhIiN7fU+7MJ3bnUsWPHVLx4cVeXAQAAAAC52p9//qn77rsv1f2E7lwqb968km58AAICAlxcDQAAAADkLnFxcSpevLgje6WG0J1LJQ0pDwgIIHQDAAAAgCG3u52XidQAAAAAADCE0A0AAAAAgCGEbgAAAAAADCF0AwAAAABgCKEbAAAAAABDCN0AAAAAABhC6AYAAAAAwBBCNwAAAAAAhhC6AQAAAAAwhNANAAAAAIAhhG4AAAAAAAwhdAMAAAAAYAihGwAAAAAAQwjdAAAAAAAYQugGAAAAAMAQD1cXAOR05+IT1P7jtTp1IUHBeb0044XqCvL3cnVZyKUuJyRq1MKdOnT2ksIK5NHrTSPl6+Xu6rIAAACQCpf2dI8ePVrR0dHKmzevgoOD1bJlS+3Zs8epzZUrV9SjRw8VKFBA/v7+euKJJ3Ty5EmnNr1791aVKlXk7e2tSpUqJTvPlStX1KVLF0VFRcnDw0MtW7ZMd42zZs1SRESEfHx8FBUVpYULFzrtHzZsmCIiIuTn56f8+fOrfv362rBhw22Pe+TIETVr1kx58uRRcHCw+vfvr+vXrzv2z549Ww0aNFChQoUUEBCgatWqafHixemuG3dH9IglenDEEu09dVF/X76mvacu6sERSxQ9YomrS0Mu9PzUjSo35Ad9sf6IVu87oy/WH1G5IT/o+akbXV0aAAAAUuHS0L1q1Sr16NFD69ev15IlS3Tt2jU1bNhQFy9edLTp16+f5s2bp1mzZmnVqlU6duyYWrdunexY3bp1U7t27VI8T2Jionx9fdW7d2/Vr18/3fWtXbtWHTp00LPPPqvffvtNLVu2VMuWLbV9+3ZHm7Jly2rcuHHatm2bfv75Z4WFhalhw4Y6ffp0qsdNTExUs2bNlJCQoLVr1+rzzz/XlClTNGTIEEebn376SQ0aNNDChQu1adMm1a1bV82bN9dvv/2W7vphVvSIJTodn5DivtPxCQRvZKnnp27Ukp2nUty3ZOcpgjcAAEA2ZbMsy3J1EUlOnz6t4OBgrVq1SrVq1VJsbKwKFSqkadOmqU2bNpKk3bt3q1y5clq3bp2qVq3q9Pxhw4Zp7ty52rJlS6rn6NKli/7++2/NnTv3tvW0a9dOFy9e1Pz58x3bqlatqkqVKmnSpEkpPicuLk6BgYFaunSp6tWrl2KbRYsW6bHHHtOxY8dUuHBhSdKkSZM0YMAAnT59Wl5eKQ9NLl++vNq1a+cUzlOTVEdsbKwCAgJu2x4Zcy4+QQ+mI1RvHtSAoea4Y5cTElVuyA+3bbdreGOGmgMAANwl6c1c2WoitdjYWElSUFCQJGnTpk26du2aU+90RESESpQooXXr1hmvZ926dcl6xhs1apTquRMSEvTxxx8rMDBQDzzwQJrHjYqKcgTupOPGxcVpx44dKT7HbrfrwoULjvfmVlevXlVcXJzTA+a0/3htlrYD0jJq4c4sbQcAAIC7J9uEbrvdrr59+6pGjRqqUKGCJOnEiRPy8vJSvnz5nNoWLlxYJ06cMF7TiRMnnIJxaueeP3++/P395ePjozFjxmjJkiUqWLBgho+btC8l//nPfxQfH6+2bdumuH/06NEKDAx0PIoXL37b14fMO3Uh5WHlmW0HpOXQ2UtZ2g4AAAB3T7YJ3T169ND27ds1Y8aMu37uI0eOyN/f3/EYNWpUhp5ft25dbdmyRWvXrlXjxo3Vtm1bnTp1497LJk2aOI5bvnz5TNU3bdo0xcTEaObMmQoODk6xzcCBAxUbG+t4/Pnnn5k6F9InOG/6hoyntx2QlrACebK0HQAAAO6ebLFkWM+ePTV//nz99NNPuu+++xzbixQpooSEBP39999Ovd0nT55UkSJFsuz8ISEhTveBJw3hLlKkSLKZ0lM6t5+fn0qXLq3SpUuratWqKlOmjD777DMNHDhQn376qS5fvixJ8vT0dBz3l19+SXbcpH03mzFjhp577jnNmjUrzUngvL295e3tnYFXjTsx44Xq6bqne8YL1e9CNcjtXm8aqS/WH0lXOwAAAGQvLu3ptixLPXv21Jw5c7R8+XKFh4c77a9SpYo8PT21bNkyx7Y9e/boyJEjqlatWpbV4eHh4QjNpUuXdoTuatWqOZ1bkpYsWXLbc9vtdl29elWSVKxYMcdxQ0NDHcfdtm2bozc86bgBAQGKjPz/X5qnT5+url27avr06WrWrFmWvFZkjSB/LxW6zQRphfy9mEQNWcLXy10NIlMe5ZKkQWQwk6gBAABkQy7t6e7Ro4emTZum7777Tnnz5nXczxwYGChfX18FBgbq2Wef1csvv6ygoCAFBASoV69eqlatmtPM5X/88Yfi4+N14sQJXb582dFrHRkZ6ZgJfOfOnUpISNC5c+d04cIFR5uU1vVO0qdPH9WuXVvvvvuumjVrphkzZujXX3/Vxx9/LEm6ePGiRo4cqRYtWqho0aI6c+aMxo8fr6NHj+rJJ59M9bgNGzZUZGSkOnbsqHfeeUcnTpzQoEGD1KNHD0dv9bRp09S5c2e9//77evjhhx3vTdL7AtfbOKhBqsuGFfL30sZBDVxQFXKrTzpFp7psWIPIYH3SKdoFVQEAAOB2XLpkmM1mS3H75MmT1aVLF0nSlStX9Morr2j69Om6evWqGjVqpAkTJjgNw65Tp45WrVqV7DgHDx5UWFiYJCksLEyHDx9O1uZ2L3/WrFkaNGiQDh06pDJlyuidd95R06ZNHbU99dRT2rBhg86cOaMCBQooOjpagwYNUnR02r8AHz58WN27d9fKlSvl5+enzp0766233pKHh0ear6lz586aMmVKmseWWDLsbjoXn6D2H6/VqQsJCs7rpRkvVKeHG8ZcTkjUqIU7dejsJYUVyKPXm0bSww0AAOAC6c1c2WqdbmQdQjcAAAAAmJMj1+kGAAAAACA3IXQDAAAAAGAIoRsAAAAAAEMI3QAAAAAAGELoBgAAAADAEEI3AAAAAACGELoBAAAAADCE0A0AAAAAgCGEbgAAAAAADCF0AwAAAABgCKEbAAAAAABDCN0AAAAAABhC6AYAAAAAwBBCNwAAAAAAhhC6AQAAAAAwhNANAAAAAIAhhG4AAAAAAAwhdAMAAAAAYAihGwAAAAAAQwjdAAAAAAAYQugGAAAAAMAQQjcAAAAAAIYQugEAAAAAMITQDQAAAACAIYRuAAAAAAAMIXQDAAAAAGAIoRsAAAAAAEMI3QAAAAAAGELoBgAAAADAEEI3AAAAAACGELoBAAAAADCE0A0AAAAAgCGEbgAAAAAADCF0AwAAAABgCKEbAAAAAABDCN0AAAAAABhC6AYAAAAAwBBCNwAAAAAAhhC6AQAAAAAwhNANAAAAAIAhhG4AAAAAAAwhdAMAAAAAYAihGwAAAAAAQwjdAAAAAAAYQugGAAAAAMAQQjcAAAAAAIYQugEAAAAAMITQDQAAAACAIYRuAAAAAAAMIXQDAAAAAGAIoRsAAAAAAEMI3QAAAAAAGELoBgAAAADAEEI3AAAAAACGELoBAAAAADCE0A0AAAAAgCGEbgAAAAAADCF0AwAAAABgCKEbAAAAAABDCN0AAAAAABhC6AYAAAAAwBBCNwAAAAAAhhC6AQAAAAAwhNANAAAAAIAhhG4AAAAAAAwhdAMAAAAAYAihGwAAAAAAQwjdAAAAAAAYQugGAAAAAMAQQjcAAAAAAIYQugEAAAAAMITQDQAAAACAIYRuAAAAAAAMIXQDAAAAAGAIoRsAAAAAAEMI3QAAAAAAGELoBgAAAADAEEI3AAAAAACGELoBAAAAADCE0A0AAAAAgCGEbgAAAAAADCF0AwAAAABgCKEbAAAAAABDCN0AAAAAABhC6AYAAAAAwBBCNwAAAAAAhhC6AQAAAAAwhNANAAAAAIAhhG4AAAAAAAwhdAMAAAAAYAihGwAAAAAAQwjdAAAAAAAYQugGAAAAAMAQQjcAAAAAAIYQugEAAAAAMITQDQAAAACAIYRuAAAAAAAMIXQDAAAAAGAIoRsAAAAAAEMI3QAAAAAAGELoBgAAAADAEEI3AAAAAACGELoBAAAAADCE0A0AAAAAgCGEbgAAAAAADCF0AwAAAABgCKEbAAAAAABDCN0AAAAAABhC6AYAAAAAwBBCNwAAAAAAhhC6AQAAAAAwhNANAAAAAIAhhG4AAAAAAAwhdAMAAAAAYAihGwAAAAAAQwjdAAAAAAAYQugGAAAAAMAQQjcAAAAAAIYQugEAAAAAMITQDQAAAACAIYRuAAAAAAAMIXQDAAAAAGAIoRsAAAAAAEMI3QAAAAAAGELoBgAAAADAEEI3AAAAAACGELoBAAAAADCE0A0AAAAAgCGEbgAAAAAADCF0AwAAAABgCKEbAAAAAABDCN0AAAAAABhC6AYAAAAAwBBCNwAAAAAAhhC6AQAAAAAwhNANAAAAAIAhhG4AAAAAAAwhdAMAAAAAYAihGwAAAAAAQwjdAAAAAAAYQugGAAAAAMAQQjcAAAAAAIYQugEAAAAAMITQDQAAAACAIYRuAAAAAAAMIXQDAAAAAGAIoRsAAAAAAEMI3QAAAAAAGELoBgAAAADAEEI3AAAAAACGELoBAAAAADCE0A0AAAAAgCGEbgAAAAAADCF0AwAAAABgCKEbAAAAAABDCN0AAAAAABhC6AYAAAAAwBBCNwAAAAAAhhC6AQAAAAAwhNANAAAAAIAhhG4AAAAAAAwhdAMAAAAAYAihGwAAAAAAQwjdAAAAAAAYQugGAAAAAMAQQjcAAAAAAIYQugEAAAAAMITQDQAAAACAIYRuAAAAAAAMIXQDAAAAAGAIoRsAAAAAAEMI3QAAAAAAGELoBgAAAADAEEI3AAAAAACGELoBAAAAADCE0A0AAAAAgCGEbgAAAAAADCF0AwAAAABgCKEbAAAAAABDMh269+/fr0GDBqlDhw46deqUJGnRokXasWNHlhUHAAAAAEBOlqnQvWrVKkVFRWnDhg2aPXu24uPjJUlbt27V0KFDs7RAAAAAAAByqkyF7tdee00jRozQkiVL5OXl5dj+6KOPav369VlWHAAAAAAAOVmmQve2bdvUqlWrZNuDg4N15syZOy4KAAAAAIDcIFOhO1++fDp+/Hiy7b/99puKFSt2x0UBAAAAAJAbZCp0t2/fXgMGDNCJEydks9lkt9u1Zs0avfrqq+rUqVNW1wgAAAAAQI6UqdA9atQoRUREqHjx4oqPj1dkZKRq1aql6tWra9CgQVldIwAAAAAAOZLNsiwrs0/+888/tW3bNsXHx6ty5coqU6ZMVtaGOxAXF6fAwEDFxsYqICDA1eUAAAAAQK6S3syVqZ7u4cOH69KlSypevLiaNm2qtm3bqkyZMrp8+bKGDx+e6aIBAAAAAMhNMtXT7e7uruPHjys4ONhp+9mzZxUcHKzExMQsKxCZQ083AAAAAJhjtKfbsizZbLZk27du3aqgoKDMHBIAAAAAgFzHIyON8+fPL5vNJpvNprJlyzoF78TERMXHx+ull17K8iIBAAAAAMiJMhS6x44dK8uy1K1bN8XExCgwMNCxz8vLS2FhYapWrVqWFwkAAAAAQE6UodDduXNnSVJ4eLiqV68uT09PI0UBAAAAAJAbZCh0J6ldu7bj31euXFFCQoLTfibuAgAAAAAgkxOpXbp0ST179lRwcLD8/PyUP39+pwcAAAAAAMhk6O7fv7+WL1+uiRMnytvbW59++qliYmIUEhKiqVOnZnWNAAAAAADkSJkaXj5v3jxNnTpVderUUdeuXVWzZk2VLl1aoaGh+uqrr/T0009ndZ0AAAAAAOQ4merpPnfunEqWLCnpxv3b586dkyQ98sgj+umnn7KuOgAAAAAAcrBMhe6SJUvq4MGDkqSIiAjNnDlT0o0e8Hz58mVZcQAAAAAA5GSZCt1du3bV1q1bJUmvvfaaxo8fLx8fH/Xr10/9+/fP0gIBAAAAAMipbJZlWXd6kMOHD2vTpk0qXbq0KlasmBV14Q7FxcUpMDBQsbGxLOEGAAAAAFksvZkrUxOp3So0NFShoaFZcSgAAAAAAHKNDIduu92uKVOmaPbs2Tp06JBsNpvCw8PVpk0bdezYUTabzUSdAAAAAADkOBm6p9uyLLVo0ULPPfecjh49qqioKJUvX16HDx9Wly5d1KpVK1N1AgAAAACQ42Sop3vKlCn66aeftGzZMtWtW9dp3/Lly9WyZUtNnTpVnTp1ytIiAQAAAADIiTLU0z19+nS9/vrryQK3JD366KN67bXX9NVXX2VZcQAAAAAA5GQZCt2///67GjdunOr+Jk2aOJYSAwAAAADgXpeh0H3u3DkVLlw41f2FCxfW+fPn77goAAAAAABygwyF7sTERHl4pH4buLu7u65fv37HRQEAAAAAkBtkaCI1y7LUpUsXeXt7p7j/6tWrWVIUAAAAAAC5QYZCd+fOnW/bhpnLAQAAAAC4IUOhe/LkyabqAAAAAAAg18nQPd0AAAAAACD9MtTTneTixYt66623tGzZMp06dUp2u91p/4EDB7KkOAAAAAAAcrJMhe7nnntOq1atUseOHVW0aFHZbLasrgsAAAAAgBwvU6F70aJFWrBggWrUqJHV9QAAAAAAkGtk6p7u/PnzKygoKKtrAQAAAAAgV8lU6H7zzTc1ZMgQXbp0KavrAQAAAAAg18jU8PJ3331X+/fvV+HChRUWFiZPT0+n/Zs3b86S4gAAAAAAyMkyFbpbtmyZxWUAAAAAAJD72CzLslxdBLJeXFycAgMDFRsbq4CAAFeXAwAAAAC5SnozV6Z6upNs2rRJu3btkiSVL19elStXvpPDAQAAAACQq2QqdJ86dUrt27fXypUrlS9fPknS33//rbp162rGjBkqVKhQVtYIAAAAAECOlKnZy3v16qULFy5ox44dOnfunM6dO6ft27crLi5OvXv3zuoaAQAAAADIkTJ1T3dgYKCWLl2q6Ohop+2//PKLGjZsqL///jur6kMmcU83AAAAAJiT3syVqZ5uu92ebJkwSfL09JTdbs/MIQEAAAAAyHUyFbofffRR9enTR8eOHXNsO3r0qPr166d69eplWXEAAAAAAORkmQrd48aNU1xcnMLCwlSqVCmVKlVK4eHhiouL04cffpjVNQIAAAAAkCNlavby4sWLa/PmzVq6dKl2794tSSpXrpzq16+fpcUBAAAAAJCTZWoiNWR/TKQGAAAAAOakN3Olu6f7gw8+0AsvvCAfHx998MEHabZl2TAAAAAAADLQ0x0eHq5ff/1VBQoUUHh4eOoHtNl04MCBLCsQmUNPNwAAAACYk+U93QcPHkzx3wAAAAAAIGWZmr18+PDhunTpUrLtly9f1vDhw++4KAAAAAAAcoNMTaTm7u6u48ePKzg42Gn72bNnFRwcrMTExCwrEJnD8HIAAAAAMCe9mStTPd2WZclmsyXbvnXrVgUFBWXmkAAAAAAA5DoZWqc7f/78stlsstlsKlu2rFPwTkxMVHx8vF566aUsLxIAAAAAgJwoQ6F77NixsixL3bp1U0xMjAIDAx37vLy8FBYWpmrVqmV5kQAAAAAA5EQZCt2dO3eWdGP5sOrVq8vT09NIUQAAAAAA5AYZCt1Jateu7fj3lStXlJCQ4LSfibsAAAAAAMjkRGqXLl1Sz549FRwcLD8/P+XPn9/pAQAAAAAAMhm6+/fvr+XLl2vixIny9vbWp59+qpiYGIWEhGjq1KlZXSMAAAAAADlSpoaXz5s3T1OnTlWdOnXUtWtX1axZU6VLl1ZoaKi++uorPf3001ldJwAAAAAAOU6merrPnTunkiVLSrpx//a5c+ckSY888oh++umnrKsOAAAAAIAcLFOhu2TJkjp48KAkKSIiQjNnzpR0owc8X758WVYcAAAAAAA5WaZCd9euXbV161ZJ0muvvabx48fLx8dH/fr1U//+/bO0QAAAAAAAciqbZVnWnR7k8OHD2rRpk0qXLq2KFStmRV24Q3FxcQoMDFRsbCxLuAEAAABAFktv5srURGo3u3LlikJDQxUaGnqnhwIAAAAAIFfJ1PDyxMREvfnmmypWrJj8/f114MABSdLgwYP12WefZWmBAAAAAADkVJkK3SNHjtSUKVP0zjvvyMvLy7G9QoUK+vTTT7OsOAAAAAAAcrJMhe6pU6fq448/1tNPPy13d3fH9gceeEC7d+/OsuIAAAAAAMjJMhW6jx49qtKlSyfbbrfbde3atTsuCgAAAACA3CBToTsyMlKrV69Otv2bb75R5cqV77goAAAAAAByg0zNXj5kyBB17txZR48eld1u1+zZs7Vnzx5NnTpV8+fPz+oaAQAAAADIkTLV0/34449r3rx5Wrp0qfz8/DRkyBDt2rVL8+bNU4MGDbK6RgAAAAAAcqQM93Rfv35do0aNUrdu3bRkyRITNQEAAAAAkCtkuKfbw8ND77zzjq5fv26iHgAAAAAAco1MDS+vV6+eVq1aldW1AAAAAACQq2RqIrUmTZrotdde07Zt21SlShX5+fk57W/RokWWFAcAAAAAQE5msyzLyuiT3NxS7yC32WxKTEy8o6Jw5+Li4hQYGKjY2FgFBAS4uhwAAAAAyFXSm7ky1dNtt9szXRgAAAAAAPeKDN3TvXz5ckVGRiouLi7ZvtjYWJUvX16rV6/OsuIAAAAAAMjJMhS6x44dq+effz7FrvPAwEC9+OKLeu+997KsOAAAAAAAcrIMhe6tW7eqcePGqe5v2LChNm3adMdFAQAAAACQG2QodJ88eVKenp6p7vfw8NDp06fvuCgAAAAAAHKDDIXuYsWKafv27anu//3331W0aNE7LgoAAAAAgNwgQ6G7adOmGjx4sK5cuZJs3+XLlzV06FA99thjWVYcAAAAAAA5WYbW6T558qQefPBBubu7q2fPnrr//vslSbt379b48eOVmJiozZs3q3DhwsYKRvqwTjcAAAAAmGNkne7ChQtr7dq16t69uwYOHKikvG6z2dSoUSONHz+ewA0AAAAAwP9kKHRLUmhoqBYuXKjz58/rjz/+kGVZKlOmjPLnz2+iPgAAAAAAcqwMh+4k+fPnV3R0dFbWAgAAAABArpKhidQAAAAAAED6EboBAAAAADCE0A0AAAAAgCGEbgAAAAAADCF0AwAAAABgCKEbAAAAAABDCN0AAAAAABhC6AYAAAAAwBBCNwAAAAAAhhC6AQAAAAAwhNANAAAAAIAhhG4AAAAAAAwhdAMAAAAAYAihGwAAAAAAQwjdAAAAAAAYQugGAAAAAMAQQjcAAAAAAIYQugEAAAAAMITQDQAAAACAIYRuAAAAAAAMIXQDAAAAAGAIoRsAAAAAAEMI3QAAAAAAGELoBgAAAADAEEI3AAAAAACGELoBAAAAADCE0A0AAAAAgCGEbgAAAAAADCF0AwAAAABgCKEbAAAAAABDCN0AAAAAABhC6AYAAAAAwBBCNwAAAAAAhhC6AQAAAAAwhNANAAAAAIAhhG4AAAAAAAwhdAMAAAAAYAihGwAAAAAAQwjdAAAAAAAYQugGAAAAAMAQQjcAAAAAAIYQugEAAAAAMITQDQAAAACAIYRuAAAAAAAMIXQDAAAAAGAIoRsAAAAAAEMI3QAAAAAAGELoBgAAAADAEEI3AAAAAACGELoBAAAAADCE0A0AAAAAgCGEbgAAAAAADCF0AwAAAABgCKEbAAAAAABDCN0AAAAAABhC6AYAAAAAwBBCNwAAAAAAhhC6AQAAAAAwhNANAAAAAIAhhG4AAAAAAAwhdAMAAAAAYAihGwAAAAAAQwjdAAAAAAAYQugGAAAAAMAQQjcAAAAAAIYQugEAAAAAMITQDQAAAACAIYRuAAAAAAAMIXQDAAAAAGAIoRsAAAAAAEMI3QAAAAAAGELoBgAAAADAEEI3AAAAAACGELoBAAAAADCE0A0AAAAAgCGEbgAAAAAADCF0AwAAAABgCKEbAAAAAABDCN0AAAAAABhC6AYAAAAAwBBCNwAAAAAAhhC6AQAAAAAwhNANAAAAAIAhhG4AAAAAAAwhdAMAAAAAYAihGwAAAAAAQwjdAAAAAAAYQugGAAAAAMAQQjcAAAAAAIYQugEAAAAAMITQDQAAAACAIYRuAAAAAAAMIXQDAAAAAGAIoRsAAAAAAEMI3QAAAAAAGELoBgAAAADAEEI3AAAAAACGELoBAAAAADCE0A0AAAAAgCGEbgAAAAAADCF0AwAAAABgCKEbAAAAAABDCN0AAAAAABhC6AYAAAAAwBBCNwAAAAAAhhC6AQAAAAAwhNANAAAAAIAhhG4AAAAAAAwhdAMAAAAAYAihGwAAAAAAQwjdAAAAAAAYQugGAAAAAMAQQjcAAAAAAIYQugEAAAAAMITQDQAAAACAIYRuAAAAAAAMIXQDAAAAAGAIoRsAAAAAAEMI3QAAAAAAGELoBgAAAADAEEI3AAAAAACGELoBAAAAADCE0A0AAAAAgCGEbgAAAAAADCF0AwAAAABgCKEbAAAAAABDCN0AAAAAABhC6AYAAAAAwBBCNwAAAAAAhhC6AQAAAAAwhNANAAAAAIAhhG4AAAAAAAwhdAMAAAAAYAihGwAAAAAAQwjdAAAAAAAYQugGAAAAAMAQQjcAAAAAAIYQugEAAAAAMITQDQAAAACAIYRuAAAAAAAMIXQDAAAAAGAIoRsAAAAAAEMI3QAAAAAAGELoBgAAAADAEEI3AAAAAACGELoBAAAAADCE0A0AAAAAgCGEbgAAAAAADCF0AwAAAABgCKEbAAAAAABDCN0AAAAAABhC6AYAAAAAwBBCNwAAAAAAhhC6AQAAAAAwhNANAAAAAIAhhG4AAAAAAAwhdAMAAAAAYAihGwAAAAAAQwjdAAAAAAAYQugGAAAAAMAQQjcAAAAAAIYQugEAAAAAMITQDQAAAACAIYRuAAAAAAAMIXQDAAAAAGAIoRsAAAAAAEMI3QAAAAAAGELoBgAAAADAEEI3AAAAAACGELoBAAAAADCE0A0AAAAAgCGEbgAAAAAADCF0AwAAAABgCKEbAAAAAABDCN0AAAAAABhC6AYAAAAAwBCXhu7Ro0crOjpaefPmVXBwsFq2bKk9e/Y4tbly5Yp69OihAgUKyN/fX0888YROnjzp1KZ3796qUqWKvL29ValSpWTnuXLlirp06aKoqCh5eHioZcuW6a5x1qxZioiIkI+Pj6KiorRw4UKn/cOGDVNERIT8/PyUP39+1a9fXxs2bLjtcY8cOaJmzZopT548Cg4OVv/+/XX9+nXH/uPHj+upp55S2bJl5ebmpr59+6a75uwu4bpdn60+oCHfbddnqw8o4brd1SXdkbDXFiR7AKas33vW6bO2fu9ZV5cEAACyiUS7pXX7z+q7LUe1bv9ZJdotV5d0Ry4nJGrw3G3q+NkGDZ67TZcTEl1dUqZ4uPLkq1atUo8ePRQdHa3r16/r9ddfV8OGDbVz5075+flJkvr166cFCxZo1qxZCgwMVM+ePdW6dWutWbPG6VjdunXThg0b9Pvvvyc7T2Jionx9fdW7d299++236a5v7dq16tChg0aPHq3HHntM06ZNU8uWLbV582ZVqFBBklS2bFmNGzdOJUuW1OXLlzVmzBg1bNhQf/zxhwoVKpTicRMTE9WsWTMVKVJEa9eu1fHjx9WpUyd5enpq1KhRkqSrV6+qUKFCGjRokMaMGZPumrO70Qt36pPVB3Xz9//Ihbv0fM1wDWwa6brCMim1gB322gIdeqvZXa4GuV1Kn7f2/10vSXzeAAC4x/2w/bhi5u3U8dgrjm1FA300tHmkGlco6sLKMuf5qRu1ZOcpx9er90lfrD+iBpHB+qRTtAsryzibZVnZ5s8fp0+fVnBwsFatWqVatWopNjZWhQoV0rRp09SmTRtJ0u7du1WuXDmtW7dOVatWdXr+sGHDNHfuXG3ZsiXVc3Tp0kV///235s6de9t62rVrp4sXL2r+/PmObVWrVlWlSpU0adKkFJ8TFxenwMBALV26VPXq1UuxzaJFi/TYY4/p2LFjKly4sCRp0qRJGjBggE6fPi0vLy+n9nXq1FGlSpU0duzY29Z8ax2xsbEKCAhI9/NMGr1wpz766WCq+1+slbOCd3p6tAlCyCp83gAAQGp+2H5c3b/crFuDne1//534zIM5KnjfGrhvlV2Cd3ozV7a6pzs2NlaSFBQUJEnatGmTrl27pvr16zvaREREqESJElq3bp3xetatW+d0bklq1KhRqudOSEjQxx9/rMDAQD3wwANpHjcqKsoRuJOOGxcXpx07dmRN8dlMwnW7PlmdeuCWpE9WH8wxQ83TO4ScoebICukdQs5QcwAA7j2Jdksx83YmC9ySHNti5u3MMUPNLyckphm4JWnJzlM5aqh5tgnddrtdffv2VY0aNRxDt0+cOCEvLy/ly5fPqW3hwoV14sQJ4zWdOHHCKRindu758+fL399fPj4+GjNmjJYsWaKCBQtm+LhJ+zLj6tWriouLc3pkJ1+sO6TbfZ/brRvtADhLGkKeVe0AAEDu8cvBc05Dym9lSToee0W/HDx394q6A6MW7szSdtlBtgndPXr00Pbt2zVjxoy7fu4jR47I39/f8Ui6rzq96tatqy1btmjt2rVq3Lix2rZtq1Onbvx1pkmTJo7jli9f3kT5km5MShcYGOh4FC9e3Ni5MuPwuUtZ2g4AAACAdOpC6oE7M+1c7dDZ9OWB9LbLDlw6kVqSnj17av78+frpp5903333ObYXKVJECQkJ+vvvv516u0+ePKkiRYpk2flDQkKc7gNPGt5epEiRZDOlp3RuPz8/lS5dWqVLl1bVqlVVpkwZffbZZxo4cKA+/fRTXb58WZLk6enpOO4vv/yS7LhJ+zJj4MCBevnllx1fx8XFZavgHRqUJ0vbAQAAAJCC8/pkaTtXCyuQR6v3pa9dTuHSnm7LstSzZ0/NmTNHy5cvV3h4uNP+KlWqyNPTU8uWLXNs27Nnj44cOaJq1aplWR0eHh6O0Fy6dGlH6K5WrZrTuSVpyZIltz233W7X1atXJUnFihVzHDc0NNRx3G3btjl6w5OOGxAQoMjIzE0k5u3trYCAAKdHdtKxWpjcbGm3cbPdaAfA2YxuVW/fKAPtAABA7vGP8CAVDfRRar9q23RjFvN/hAfdzbIy7fV0Tqyc3nbZgUtDd48ePfTll19q2rRpyps3r06cOKETJ044eoYDAwP17LPP6uWXX9aKFSu0adMmde3aVdWqVXOaufyPP/7Qli1bHM/dsmWLtmzZooSEBEebnTt3asuWLTp37pxiY2MdbdLSp08f/fDDD3r33Xe1e/duDRs2TL/++qt69uwpSbp48aJef/11rV+/XocPH9amTZvUrVs3HT16VE8++WSqx23YsKEiIyPVsWNHbd26VYsXL9agQYPUo0cPeXt7O9ol1RgfH6/Tp09ry5Yt2rkz59y7cDMvDzc9XzM8zTbP1wyXl0e2ueMhTemdJZrZpJEVqpYtkKXtAABA7uHuZtPQ5jcC6K3BO+nroc0j5X67HrBswtfLXQ0ig9Ns0yAyWL5e7nepojvn0iXDbLaUL/zkyZPVpUsXSdKVK1f0yiuvaPr06bp69aoaNWqkCRMmOA3DrlOnjlatWpXsOAcPHlRYWJgkKSwsTIcPH07W5nYvf9asWRo0aJAOHTqkMmXK6J133lHTpk0dtT311FPasGGDzpw5owIFCig6OlqDBg1SdHTaU9gfPnxY3bt318qVK+Xn56fOnTvrrbfekofH/4/4T+n9CQ0N1aFDh9I8tpQ9lwyTUl6n282mXLdOt0TgRtbj8wYAAFKT29fpTpJdlguT0p+5stU63cg62TV0SzeWD/ti3SEdPndJoUF51LFaWI7p4U5JSkGIAART1u896zRL+YxuVenhBgAAkm4sH/bLwXM6deGKgvPeGFKeU3q4U3I5IVGjFu7UobOXFFYgj15vGpmtergJ3fe47By6AQAAACCnS2/myrndiwAAAAAAZHOEbgAAAAAADCF0AwAAAABgCKEbAAAAAABDCN0AAAAAABhC6AYAAAAAwBBCNwAAAAAAhhC6AQAAAAAwhNANAAAAAIAhhG4AAAAAAAwhdAMAAAAAYAihGwAAAAAAQwjdAAAAAAAYQugGAAAAAMAQQjcAAAAAAIZ4uLoAmGFZliQpLi7OxZUAAAAAQO6TlLWSsldqCN251IULFyRJxYsXd3ElAAAAAJB7XbhwQYGBganut1m3i+XIkex2u44dO6a8efPKZrO5upxcLy4uTsWLF9eff/6pgIAAV5eDW3B9sjeuT/bHNcreuD7ZG9cn++MaZW/Z+fpYlqULFy4oJCREbm6p37lNT3cu5ebmpvvuu8/VZdxzAgICst0PA/w/rk/2xvXJ/rhG2RvXJ3vj+mR/XKPsLbten7R6uJMwkRoAAAAAAIYQugEAAAAAMITQDWQBb29vDR06VN7e3q4uBSng+mRvXJ/sj2uUvXF9sjeuT/bHNcrecsP1YSI1AAAAAAAMoacbAAAAAABDCN0AAAAAABhC6AYAAAAAwBBCN5BJo0ePVnR0tPLmzavg4GC1bNlSe/bscXVZSMVbb70lm82mvn37uroU3OTo0aN65plnVKBAAfn6+ioqKkq//vqrq8uCpMTERA0ePFjh4eHy9fVVqVKl9Oabb4qpYFznp59+UvPmzRUSEiKbzaa5c+c67bcsS0OGDFHRokXl6+ur+vXra9++fa4p9h6U1vW5du2aBgwYoKioKPn5+SkkJESdOnXSsWPHXFfwPeZ23z83e+mll2Sz2TR27Ni7Vh/Sd4127dqlFi1aKDAwUH5+foqOjtaRI0fufrEZROgGMmnVqlXq0aOH1q9fryVLlujatWtq2LChLl686OrScIuNGzfqo48+UsWKFV1dCm5y/vx51ahRQ56enlq0aJF27typd999V/nz53d1aZD09ttva+LEiRo3bpx27dqlt99+W++8844+/PBDV5d2z7p48aIeeOABjR8/PsX977zzjj744ANNmjRJGzZskJ+fnxo1aqQrV67c5UrvTWldn0uXLmnz5s0aPHiwNm/erNmzZ2vPnj1q0aKFCyq9N93u+yfJnDlztH79eoWEhNylypDkdtdo//79euSRRxQREaGVK1fq999/1+DBg+Xj43OXK804Zi8Hssjp06cVHBysVatWqVatWq4uB/8THx+vBx98UBMmTNCIESNUqVIl/nKdTbz22mtas2aNVq9e7epSkILHHntMhQsX1meffebY9sQTT8jX11dffvmlCyuDJNlsNs2ZM0ctW7aUdKOXOyQkRK+88opeffVVSVJsbKwKFy6sKVOmqH379i6s9t5z6/VJycaNG/WPf/xDhw8fVokSJe5ecUj1+hw9elQPP/ywFi9erGbNmqlv376MkHORlK5R+/bt5enpqS+++MJ1hWUSPd1AFomNjZUkBQUFubgS3KxHjx5q1qyZ6tev7+pScIvvv/9eDz30kJ588kkFBwercuXK+uSTT1xdFv6nevXqWrZsmfbu3StJ2rp1q37++Wc1adLExZUhJQcPHtSJEyecftYFBgbq4Ycf1rp161xYGVITGxsrm82mfPnyuboUSLLb7erYsaP69++v8uXLu7oc3MJut2vBggUqW7asGjVqpODgYD388MNp3iaQnRC6gSxgt9vVt29f1ahRQxUqVHB1OfifGTNmaPPmzRo9erSrS0EKDhw4oIkTJ6pMmTJavHixunfvrt69e+vzzz93dWnQjZEI7du3V0REhDw9PVW5cmX17dtXTz/9tKtLQwpOnDghSSpcuLDT9sKFCzv2Ifu4cuWKBgwYoA4dOiggIMDV5UA3bqnx8PBQ7969XV0KUnDq1CnFx8frrbfeUuPGjfXjjz+qVatWat26tVatWuXq8m7Lw9UFALlBjx49tH37dv3888+uLgX/8+eff6pPnz5asmRJjrjX515kt9v10EMPadSoUZKkypUra/v27Zo0aZI6d+7s4uowc+ZMffXVV5o2bZrKly+vLVu2qG/fvgoJCeH6AHfg2rVratu2rSzL0sSJE11dDiRt2rRJ77//vjZv3iybzebqcpACu90uSXr88cfVr18/SVKlSpW0du1aTZo0SbVr13ZlebdFTzdwh3r27Kn58+drxYoVuu+++1xdDv5n06ZNOnXqlB588EF5eHjIw8NDq1at0gcffCAPDw8lJia6usR7XtGiRRUZGem0rVy5cjliFtJ7Qf/+/R293VFRUerYsaP69evHyJFsqkiRIpKkkydPOm0/efKkYx9cLylwHz58WEuWLKGXO5tYvXq1Tp06pRIlSjh+Zzh8+LBeeeUVhYWFubo8SCpYsKA8PDxy7O8N9HQDmWRZlnr16qU5c+Zo5cqVCg8Pd3VJuEm9evW0bds2p21du3ZVRESEBgwYIHd3dxdVhiQ1atRItsze3r17FRoa6qKKcLNLly7Jzc35b/Pu7u6O3gZkL+Hh4SpSpIiWLVumSpUqSZLi4uK0YcMGde/e3bXFQdL/B+59+/ZpxYoVKlCggKtLwv907Ngx2dwvjRo1UseOHdW1a1cXVYWbeXl5KTo6Osf+3kDoBjKpR48emjZtmr777jvlzZvXcc9cYGCgfH19XVwd8ubNm+z+ej8/PxUoUID77rOJfv36qXr16ho1apTatm2rX375RR9//LE+/vhjV5cGSc2bN9fIkSNVokQJlS9fXr/99pvee+89devWzdWl3bPi4+P1xx9/OL4+ePCgtmzZoqCgIJUoUUJ9+/bViBEjVKZMGYWHh2vw4MEKCQlJcwZtZJ20rk/RokXVpk0bbd68WfPnz1diYqLj94agoCB5eXm5qux7xu2+f279I4inp6eKFCmi+++//26Xes+63TXq37+/2rVrp1q1aqlu3br64YcfNG/ePK1cudJ1RaeXBSBTJKX4mDx5sqtLQypq165t9enTx9Vl4Cbz5s2zKlSoYHl7e1sRERHWxx9/7OqS8D9xcXFWnz59rBIlSlg+Pj5WyZIlrTfeeMO6evWqq0u7Z61YsSLF/+907tzZsizLstvt1uDBg63ChQtb3t7eVr169aw9e/a4tuh7SFrX5+DBg6n+3rBixQpXl35PuN33z61CQ0OtMWPG3NUa73XpuUafffaZVbp0acvHx8d64IEHrLlz57qu4AxgnW4AAAAAAAxhIjUAAAAAAAwhdAMAAAAAYAihGwAAAAAAQwjdAAAAAAAYQugGAAAAAMAQQjcAAAAAAIYQugEAAAAAMITQDQAAAACAIYRuAACAW4SFhWns2LGuLgMAkAsQugEAuAc1b95cjRs3TnHf6tWrZbPZ9Pvvv6d5jIwG00OHDslms6X5mDJlSgZeRebUqVNHffv2NX4eAAAkycPVBQAAgLvv2Wef1RNPPKG//vpL9913n9O+yZMn66GHHlLFihWz9JzFixfX8ePHHV//5z//0Q8//KClS5c6tgUGBmbpOQEAcDV6ugEAuAc99thjKlSoULKe5fj4eM2aNUvPPvusvv32W5UvX17e3t4KCwvTu+++62hXp04dHT58WP369XP0Uif5+eefVbNmTfn6+qp48eLq3bu3Ll68KHd3dxUpUsTx8Pf3l4eHh4oUKaIrV64oJCREO3bscKpn7NixCg0Nld1u18qVK2Wz2bRgwQJVrFhRPj4+qlq1qrZv3+70nNTOn5pTp06pefPm8vX1VXh4uL766qs7eGcBAHBG6AYA4B7k4eGhTp06acqUKbIsy7F91qxZSkxMVLly5dS2bVu1b99e27Zt07BhwzR48GBHSJ89e7buu+8+DR8+XMePH3f0YO/fv1+NGzfWE088od9//11ff/21fv75Z/Xs2TPNesLCwlS/fn1NnjzZafvkyZPVpUsXubn9/68s/fv317vvvquNGzeqUKFCat68ua5du5bp83fp0kV//vmnVqxYoW+++UYTJkzQqVOnMvR+AgCQGpt18/9pAQDAPWP37t0qV66cVqxYoTp16kiSatWq5ehZPn36tH788UdH+3/9619asGCBozc6LCxMffv2dbo/+rnnnpO7u7s++ugjx7aff/5ZtWvX1sWLF+Xj4+PYPmzYMM2dO1dbtmyRJM2cOVMvvfSSjh8/Lm9vb23evFkPPfSQDhw4oLCwMK1cuVJ169bVjBkz1K5dO0nSuXPndN9992nKlClq27Ztus5fp04dVapUSWPHjtXevXt1//3365dfflF0dLTT+zJmzBju/QYA3DF6ugEAuEdFRESoevXq+u9//ytJ+uOPP7R69Wo9++yz2rVrl2rUqOHUvkaNGtq3b58SExNTPebWrVs1ZcoU+fv7Ox6NGjWS3W7XwYMH06ynZcuWcnd315w5cyRJU6ZMUd26dRUWFubUrlq1ao5/BwUF6f7779euXbsydf5du3bJw8NDVapUcXpf8uXLl2atAACkFxOpAQBwD3v22WfVq1cvjR8/XpMnT1apUqVUu3btTB8vPj5eL774onr37p1sX4kSJdJ8rpeXlzp16qTJkyerdevWmjZtmt5///27dn4AAEwgdAMAcA9r27at+vTpo2nTpmnq1Knq3r27bDabypUrpzVr1ji1XbNmjcqWLSt3d3dJN0Lyrb3eDz74oHbu3KnSpUtnqp7nnntOFSpU0IQJE3T9+nW1bt06WZv169c7AvT58+e1d+9elStXLlPnj4iI0PXr17Vp0ybH8PI9e/bo77//zlT9AADciuHlAADcw/z9/dWuXTsNHDhQx48fV5cuXSRJr7zyipYtW6Y333xTe/fu1eeff65x48bp1VdfdTw3LCxMP/30k44ePaozZ85IkgYMGKC1a9eqZ8+e2rJli/bt26fvvvvuthOpJSlXrpyqVq2qAQMGqEOHDvL19U3WZvjw4Vq2bJm2b9+uLl26qGDBgmrZsmWmzn///fercePGevHFF7VhwwZt2rRJzz33XIrnBQAgMwjdAADc45599lmdP39ejRo1UkhIiKQbPcYzZ87UjBkzVKFCBQ0ZMkTDhw93hHLpRvg9dOiQSpUqpUKFCkmSKlasqFWrVmnv3r2qWbOmKleurCFDhjiOm956EhIS1K1btxT3v/XWW+rTp4+qVKmiEydOaN68efLy8sr0+SdPnqyQkBDVrl1brVu31gsvvKDg4OB01wsAQFqYvRwAAGQrb775pmbNmqXff//daXvS7OXnz59nojMAQI5BTzcAAMgW4uPjtX37do0bN069evVydTkAAGQJQjcAAMgWevbsqSpVqqhOnTqpDi0HACCnYXg5AAAAAACG0NMNAAAAAIAhhG4AAAAAAAwhdAMAAAAAYAihGwAAAAAAQwjdAAAAAAAYQugGAAAAAMAQQjcAAAAAAIYQugEAAAAAMITQDQAAAACAIf8Hn3rI71df6W8AAAAASUVORK5CYII=", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# Now let's plot a time series between creation date and vote type id\n", "\n", "plot = baseline(question=\"/plot a relation between vote type and creation date in scatter\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Super cool right? Remmember, all of these things are done locally in a single MacBook M3 Pro. We are loading two 1B parameter model and seeing the magic at the same levels of GPT-3.5 and so on. However PremSQL also supports closed models too. So if your data is not sensitive then you can surely go for those models as well. " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Customization over PremSQL Agents\n", "\n", "You can customize lot of things in PremSQL. For starters, you can put any type of generators in PremSQL. Here we are\n", "using MLX. You can use huggingface or Prem AI SDK (which provides different models) or other APIs as well. You can also build your own worker from scratch. As you have seen here, that we are using MatplotLib tool, you can also make your seaboarn / Plotly tool for the same thing for more interactive visualization. \n", "\n", "You can put as many number of arguments in your custom agent constructors and workers. As long as it adheres with the output schema, you can enjoy other functionalities like AgentServer and Playground. " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### A note about PremSQL Memory, Router and other limitations:\n", "\n", "Since this is the first version, where we are introducing agents and it's capabilities, so it comes with certain limitations as follows:\n", "\n", "1. Abscence of a Planner. In other words, we do not support \"multi-agent\" workflows. For example, after connecting to the database, if you directly ask something complex to plot, as of now, it will not able to plot things. In ideal case, it should \"plan\" what all things it needs to query, and then which columns needs to be used for plotting. However we are going to support multi-agent framework in coming versions. PRs are welcomed.\n", "\n", "2. Context handling in memory. Memory has also a very simple implementation. When you instantiate an agent to work with some \"session_name\" then it captures all the history and saves it inside a local \"sqlite\" database in the name of \"premsql_pipeline_memory.db\" (However you can change the path and name of the db). However, if you want to \"analyse\" or \"plot\" something over your previous output, the way it works is, it searches for the latest output dataframe and take that as an input and then output the plot or analysis. As of now, it can not understand history in a semantic sense. " ] } ], "metadata": { "kernelspec": { "display_name": "text2sql-jLjiS8B5-py3.11", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.10" } }, "nbformat": 4, "nbformat_minor": 2 } ================================================ FILE: examples/datasets.ipynb ================================================ { "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "/root/anindya/Submission/text2sql/text2sql\n" ] } ], "source": [ "cd .." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Datasets\n", "\n", "premsql datasets helps to use different already available and pre-processed datasets in a simple way. Since Text-to-SQL is a complex task and requires data which has a depdenency of database and tables. \n", "\n", "premsql datasets provides simple APIs to use those and also helps you to create your own dataset using your own private databases. " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Currently the following datasets are readily available:\n", "\n", "1. [BirdBench Dataset](https://huggingface.co/datasets/premai-io/birdbench)\n", "2. [Spider Unified Datasets](https://huggingface.co/datasets/premai-io/spider)\n", "3. [Domains](https://huggingface.co/datasets/premai-io/domains)\n", "4. [Gretel AI Dataset](https://huggingface.co/datasets/gretelai/synthetic_text_to_sql) (A synthetic text to SQL dataset by Gretel AI)\n", "\n", "Now we are going to see how to use these datasets in a simple way." ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/root/miniconda3/envs/deep/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", " from .autonotebook import tqdm as notebook_tqdm\n", "2024-09-09 04:26:34,697 - [BIRD-DATASET] - INFO - Loaded Bird Dataset\n" ] } ], "source": [ "from premsql.datasets import Text2SQLDataset\n", "from premsql.utils import print_data\n", "# load the bird dataset\n", "\n", "bird_dataset = Text2SQLDataset(\n", " dataset_name='bird', split=\"train\", force_download=False,\n", " dataset_folder=\"/root/anindya/text2sql/data\"\n", ")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Currently, this is just the object which has the raw the data. This object consist of two methods: \n", "\n", "1. `raw_dataset`: This will return a dict containing the raw data opened form the json file. \n", "2. `filters_available`: This will return the list of filters available for the dataset.\n", "\n", "So for our train dataset here is how we can see the raw data." ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'db_id': 'movie_platform',\n", " 'question': 'Name movie titles released in year 1945. Sort the listing by the descending order of movie popularity.',\n", " 'evidence': 'released in the year 1945 refers to movie_release_year = 1945;',\n", " 'SQL': 'SELECT movie_title FROM movies WHERE movie_release_year = 1945 ORDER BY movie_popularity DESC LIMIT 1'}" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "raw_bird_training_dataset = bird_dataset.raw_dataset\n", "raw_bird_training_dataset[0]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Now, we can also see what all filters are available for the dataset. You can simply use `.filters_available` to see the available filters." ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['db_id']" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "bird_dataset.filter_availables" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Now, in order to load the processed dataset, you can simply call `setup_dataset` method. This will load the processed dataset and return the dataset object. \n", "\n", "This dataset has certain (optional) methods available for furthur customization:\n", "\n", "- filter_by: tuple | None: This will filter the dataset based on the given filter.\n", "\n", "- num_rows: int | None: This will return the number of rows from the dataset.\n", "\n", "- num_fewshot: int | None: This will determine how many few shot examples to create in the prompt\n", "\n", "- model_name_or_path: str | None: This will apply the prompt template of the model you choose. For example, if you want to finetune a llama model then it will wrap the prompt with the llama model prompt template.\n", "\n", "Also if this is not provided then it will not tokenize the dataset. \n", "\n", "- prompt_template: str | None: If you want to use any other kind of prompt template then you can provide that here. You can check out the default prompt template [here](/premsql/datasets/prompts.py). \n", "\n", "**Note**:\n", "If `model_name_or_path` is provided then it will automatically use the prompt template of that model and tokenize, otherwise it will not." ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "2024-09-09 04:26:49,099 - [BIRD-DATASET] - INFO - Setting up Bird Dataset\n", "Applying prompt: 100%|██████████| 3/3 [00:00<00:00, 1865.52it/s]\n", "2024-09-09 04:26:49,509 - [DATASET] - INFO - Casted dataset with model chat template\n", "2024-09-09 04:26:49,510 - [DATASET] - INFO - Starting Tokenization ...\n", "Tokenizing: 100%|██████████| 3/3 [00:00<00:00, 105.07it/s]\n", "Tokenizing: 100%|██████████| 3/3 [00:00<00:00, 179.29it/s]\n" ] }, { "data": { "text/plain": [ "{'input_ids': tensor([32013, 32013, 2042, ..., 207, 16, 32021]),\n", " 'labels': tensor([ -100, -100, -100, ..., 207, 16, 32021]),\n", " 'raw': {'db_id': 'movie_platform',\n", " 'question': 'Name movie titles released in year 1945. Sort the listing by the descending order of movie popularity.',\n", " 'evidence': 'released in the year 1945 refers to movie_release_year = 1945;',\n", " 'SQL': 'SELECT movie_title FROM movies WHERE movie_release_year = 1945 ORDER BY movie_popularity DESC LIMIT 1',\n", " 'db_path': '/root/anindya/text2sql/data/bird/train/train_databases/movie_platform/movie_platform.sqlite',\n", " 'prompt': '<|begin▁of▁sentence|>You are an AI programming assistant, utilizing the Deepseek Coder model, develo....tles released in year 1945. Sort the listing by the descending order of movie popularity.\\n\\n# SQL: \\n\\n'}}" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Now let's setup the bird dataset \n", "\n", "bird_dataset = bird_dataset.setup_dataset(\n", " model_name_or_path=\"premai-io/prem-1B-SQL\", \n", " num_fewshot=3, \n", " num_rows=3\n", ")\n", "\n", "print_data(bird_dataset[0])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Sometimes tokenization could be time consuming, and it could be computation heavt. So, you can also preview the dataset without even tokenizing first. Here is\n", "how you do it. " ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "2024-09-09 04:27:12,344 - [BIRD-DATASET] - INFO - Loaded Bird Dataset\n", "2024-09-09 04:27:12,345 - [BIRD-DATASET] - INFO - Setting up Bird Dataset\n", "Applying prompt: 100%|██████████| 3/3 [00:00<00:00, 1908.24it/s]\n" ] }, { "data": { "text/plain": [ "{'db_id': 'movie_platform',\n", " 'question': 'Name movie titles released in year 1945. Sort the listing by the descending order of movie popularity.',\n", " 'evidence': 'released in the year 1945 refers to movie_release_year = 1945;',\n", " 'SQL': 'SELECT movie_title FROM movies WHERE movie_release_year = 1945 ORDER BY movie_popularity DESC LIMIT 1',\n", " 'db_path': '/root/anindya/text2sql/data/bird/train/train_databases/movie_platform/movie_platform.sqlite',\n", " 'prompt': '\\n# Follow these instruction:\\nYou will be given schemas of tables of a database. Your job is to write....itles released in year 1945. Sort the listing by the descending order of movie popularity.\\n\\n# SQL: \\n'}" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "bird_dataset_without_tokenization = Text2SQLDataset(\n", " dataset_name='bird', split=\"train\", force_download=False,\n", " dataset_folder=\"/root/anindya/text2sql/data\"\n", ").setup_dataset(\n", " model_name_or_path=None, num_fewshot=3, num_rows=3\n", ")\n", "\n", "print_data(bird_dataset_without_tokenization[0])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "BirdDataset has two instance, a `train` and `validation` instance. For train dataset, you can only filter by `db_id`. This will only return results which are belonging to that database id. \n", "\n", "For BirdDevDataset you can filter by `db_id` and `difficulty`. Here is how you load a validation dataset and then filter by `difficulty`. " ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "2024-09-09 04:27:20,270 - [BIRD-DATASET] - INFO - Loaded Bird Dataset\n", "2024-09-09 04:27:20,271 - [BIRD-DATASET] - INFO - Setting up Bird Dataset\n", "Applying prompt: 100%|██████████| 100/100 [00:00<00:00, 2101.37it/s]\n" ] }, { "data": { "text/plain": [ "100" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Load the BirdBench dev dataset and filter the dataset by \n", "# difficulty\n", "\n", "bird_validation = Text2SQLDataset(\n", " dataset_name='bird', split=\"validation\", force_download=False,\n", " dataset_folder=\"/root/anindya/text2sql/data\"\n", ").setup_dataset(\n", " model_name_or_path=None, \n", " num_fewshot=3, \n", " num_rows=100,\n", " filter_by=(\"difficulty\", \"simple\")\n", ")\n", "\n", "# count the number of examples in the dataset which has \n", "# difficulty level as simple\n", "\n", "len([\n", " example for example in bird_validation \n", " if example[\"difficulty\"] == \"simple\"\n", "])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Similarly we can also filter by the dataset by `db_id`. " ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "2024-09-09 04:27:28,490 - [BIRD-DATASET] - INFO - Loaded Bird Dataset\n", "2024-09-09 04:27:28,491 - [BIRD-DATASET] - INFO - Setting up Bird Dataset\n", "Applying prompt: 100%|██████████| 1534/1534 [00:00<00:00, 2537.01it/s]\n", "2024-09-09 04:27:29,414 - [DATASET] - INFO - Casted dataset with model chat template\n", "2024-09-09 04:27:29,415 - [DATASET] - INFO - Starting Tokenization ...\n", "Tokenizing: 100%|██████████| 1534/1534 [00:09<00:00, 161.93it/s]\n", "Tokenizing: 100%|██████████| 1534/1534 [00:09<00:00, 161.71it/s]\n" ] }, { "data": { "text/plain": [ "{'input_ids': tensor([32013, 32013, 2042, ..., 207, 16, 32021]),\n", " 'labels': tensor([ -100, -100, -100, ..., 207, 16, 32021]),\n", " 'raw': {'question_id': 0,\n", " 'db_id': 'california_schools',\n", " 'question': 'What is the highest eligible free rate for K-12 students in the schools in Alameda County?',\n", " 'evidence': 'Eligible free rate for K-12 = `Free Meal Count (K-12)` / `Enrollment (K-12)`',\n", " 'SQL': \"SELECT `Free Meal Count (K-12)` / `Enrollment (K-12)` FROM frpm WHERE `County Name` = 'Alameda' ORDER BY (CAST(`Free Meal Count (K-12)` AS REAL) / `Enrollment (K-12)`) DESC LIMIT 1\",\n", " 'difficulty': 'simple',\n", " 'db_path': '/root/anindya/text2sql/data/bird/validation/dev_databases/california_schools/california_schools.sqlite',\n", " 'prompt': '<|begin▁of▁sentence|>You are an AI programming assistant, utilizing the Deepseek Coder model, develo....hat is the highest eligible free rate for K-12 students in the schools in Alameda County?\\n\\n# SQL: \\n\\n'}}" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "bird_validation = Text2SQLDataset(\n", " dataset_name='bird', split=\"validation\", force_download=False,\n", " dataset_folder=\"/root/anindya/text2sql/data\"\n", ").setup_dataset(\n", " model_name_or_path=\"premai-io/prem-1B-SQL\",\n", ")\n", "print_data(bird_validation[0])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "That's it, thats how easy it is to use the datasets. Similarly you can also use other available datasets" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Fetching 401 files: 100%|██████████| 401/401 [00:17<00:00, 22.75it/s]\n", "2024-09-09 04:28:35,739 - [SPIDER-DATASET] - INFO - Loaded Spider Dataset\n", "2024-09-09 04:28:35,744 - [SPIDER-DATASET] - INFO - Setting up Spider Dataset\n", "Applying prompt: 100%|██████████| 3/3 [00:00<00:00, 1572.67it/s]\n", "2024-09-09 04:28:36,088 - [DATASET] - INFO - Casted dataset with model chat template\n", "2024-09-09 04:28:36,089 - [DATASET] - INFO - Starting Tokenization ...\n", "Tokenizing: 100%|██████████| 3/3 [00:00<00:00, 248.45it/s]\n", "Tokenizing: 100%|██████████| 3/3 [00:00<00:00, 276.99it/s]\n" ] } ], "source": [ "# Loading Spider Dataset\n", "\n", "spider_dataset = Text2SQLDataset(\n", " dataset_name=\"spider\",\n", " split=\"train\",\n", " dataset_folder=\"../data\",\n", ").setup_dataset(\n", " num_fewshot=3,\n", " num_rows=3,\n", " model_name_or_path=\"premai-io/prem-1B-SQL\",\n", ")" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Fetching 6 files: 100%|██████████| 6/6 [00:05<00:00, 1.03it/s]\n", "2024-09-09 04:38:55,864 - [DOMAINS-DATASET] - INFO - Loaded Domains Dataset\n", "2024-09-09 04:38:55,867 - [DOMAINS-DATASET] - INFO - Setting up Domains Dataset\n", "Applying prompt: 100%|██████████| 3/3 [00:00<00:00, 1377.59it/s]\n", "2024-09-09 04:38:56,437 - [DATASET] - INFO - Casted dataset with model chat template\n", "2024-09-09 04:38:56,438 - [DATASET] - INFO - Starting Tokenization ...\n", "Tokenizing: 100%|██████████| 3/3 [00:00<00:00, 145.70it/s]\n", "Tokenizing: 100%|██████████| 3/3 [00:00<00:00, 160.63it/s]\n" ] } ], "source": [ "## Loading Domains dataset\n", "\n", "domains = Text2SQLDataset(\n", " dataset_name=\"domains\",\n", " split=\"train\",\n", " dataset_folder=\"../data\",\n", ").setup_dataset(\n", " num_fewshot=3,\n", " num_rows=3,\n", " model_name_or_path=\"premai-io/prem-1B-SQL\",\n", ")" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "2024-09-09 04:38:45,602 - [UTILS] - INFO - Saved JSON in: ../data/gretel/train.json\n", "Applying prompt: 100%|██████████| 3/3 [00:00<00:00, 1909.97it/s]\n", "2024-09-09 04:38:46,543 - [DATASET] - INFO - Casted dataset with model chat template\n", "2024-09-09 04:38:46,543 - [DATASET] - INFO - Starting Tokenization ...\n", "Tokenizing: 100%|██████████| 3/3 [00:00<00:00, 326.80it/s]\n", "Tokenizing: 100%|██████████| 3/3 [00:00<00:00, 400.61it/s]\n" ] } ], "source": [ "# Loading Gretel AI Dataset (This is a synthetic dataset)\n", "\n", "gretel_dataset = Text2SQLDataset(\n", " dataset_name=\"gretel\",\n", " split=\"train\",\n", " dataset_folder=\"../data\",\n", ").setup_dataset(\n", " num_fewshot=3,\n", " num_rows=3,\n", " model_name_or_path=\"premai-io/prem-1B-SQL\",\n", ")" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'id': 5097,\n", " 'question': 'What is the total volume of timber sold by each salesperson, sorted by salesperson?',\n", " 'schema': \"CREATE TABLE salesperson (salesperson_id INT, name TEXT, region TEXT); INSERT INTO salesperson (salesperson_id, name, region) VALUES (1, 'John Doe', 'North'), (2, 'Jane Smith', 'South'); CREATE TABLE timber_sales (sales_id INT, salesperson_id INT, volume REAL, sale_date DATE); INSERT INTO timber_sales (sales_id, salesperson_id, volume, sale_date) VALUES (1, 1, 120, '2021-01-01'), (2, 1, 150, '2021-02-01'), (3, 2, 180, '2021-01-01');\",\n", " 'SQL': 'SELECT salesperson_id, name, SUM(volume) as total_volume FROM timber_sales JOIN salesperson ON timber_sales.salesperson_id = salesperson.salesperson_id GROUP BY salesperson_id, name ORDER BY total_volume DESC;',\n", " 'context': \"CREATE TABLE salesperson (salesperson_id INT, name TEXT, region TEXT); INSERT INTO salesperson (salesperson_id, name, region) VALUES (1, 'John Doe', 'North'), (2, 'Jane Smith', 'South'); CREATE TABLE timber_sales (sales_id INT, salesperson_id INT, volume REAL, sale_date DATE); INSERT INTO timber_sales (sales_id, salesperson_id, volume, sale_date) VALUES (1, 1, 120, '2021-01-01'), (2, 1, 150, '2021-02-01'), (3, 2, 180, '2021-01-01');\",\n", " 'task_type': 'analytics and reporting',\n", " 'complexity': 'single join',\n", " 'db_id': 'forestry',\n", " 'db_path': None,\n", " 'prompt': '<|begin▁of▁sentence|>You are an AI programming assistant, utilizing the Deepseek Coder model, develo....tion: What is the total volume of timber sold by each salesperson, sorted by salesperson?\\n\\n# SQL: \\n\\n'}" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "print_data(gretel_dataset[0][\"raw\"])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "One of the best things of the premsql datasets is that it supports packing. This means you can pack multiple datasets together and use them as a single dataset. This is very useful when you want to train on multiple datasets." ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Length of bird dataset: 3\n", "Length of spider dataset: 3\n", "Length of domains dataset: 3\n", "Length of gretel dataset: 3\n", "Length of merged dataset: 12\n" ] } ], "source": [ "# Merge all the datasets\n", "\n", "print(f\"Length of bird dataset: {len(bird_dataset)}\")\n", "print(f\"Length of spider dataset: {len(spider_dataset)}\")\n", "print(f\"Length of domains dataset: {len(domains)}\")\n", "print(f\"Length of gretel dataset: {len(gretel_dataset)}\")\n", "\n", "merged_dataset = [*bird_dataset, *spider_dataset, *domains, *gretel_dataset]\n", "print(f\"Length of merged dataset: {len(merged_dataset)}\")" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'input_ids': tensor([32013, 32013, 2042, ..., 207, 16, 32021]),\n", " 'labels': tensor([ -100, -100, -100, ..., 207, 16, 32021]),\n", " 'raw': {'db_id': 'movie_platform',\n", " 'question': 'Name movie titles released in year 1945. Sort the listing by the descending order of movie popularity.',\n", " 'evidence': 'released in the year 1945 refers to movie_release_year = 1945;',\n", " 'SQL': 'SELECT movie_title FROM movies WHERE movie_release_year = 1945 ORDER BY movie_popularity DESC LIMIT 1',\n", " 'db_path': '/root/anindya/text2sql/data/bird/train/train_databases/movie_platform/movie_platform.sqlite',\n", " 'prompt': '<|begin▁of▁sentence|>You are an AI programming assistant, utilizing the Deepseek Coder model, develo....tles released in year 1945. Sort the listing by the descending order of movie popularity.\\n\\n# SQL: \\n\\n'}}" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "print_data(merged_dataset[0])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### How does a prompt looks like in premsql\n", "\n", "You might wonder how does a prompt looks like in premsql. This is how a single prompt looks like when wrapped around a model's prompt template. " ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "<|begin▁of▁sentence|>You are an AI programming assistant, utilizing the Deepseek Coder model, developed by Deepseek Company, and you only answer questions related to computer science. For politically sensitive questions, security and privacy issues, and other non-computer science questions, you will refuse to answer\n", "### Instruction:\n", "\n", "# Follow these instruction:\n", "You will be given schemas of tables of a database. Your job is to write correct\n", "error free SQL query based on the question asked. Please make sure:\n", "\n", "1. Do not add ``` at start / end of the query. It should be a single line query in a single line (string format)\n", "2. Make sure the column names are correct and exists in the table\n", "3. For column names which has a space with it, make sure you have put `` in that column name\n", "4. Think step by step and always check schema and question and the column names before writing the\n", "query. \n", "\n", "# Database and Table Schema:\n", "CREATE TABLE salesperson (salesperson_id INT, name TEXT, region TEXT); INSERT INTO salesperson (salesperson_id, name, region) VALUES (1, 'John Doe', 'North'), (2, 'Jane Smith', 'South'); CREATE TABLE timber_sales (sales_id INT, salesperson_id INT, volume REAL, sale_date DATE); INSERT INTO timber_sales (sales_id, salesperson_id, volume, sale_date) VALUES (1, 1, 120, '2021-01-01'), (2, 1, 150, '2021-02-01'), (3, 2, 180, '2021-01-01');\n", "\n", "\n", "\n", "# Here are some Examples on how to generate SQL statements and use column names:\n", "\n", "Question: What is the total volume of timber sold by each salesperson, sorted by salesperson?\n", "SQL: SELECT salesperson_id, name, SUM(volume) as total_volume FROM timber_sales JOIN salesperson ON timber_sales.salesperson_id = salesperson.salesperson_id GROUP BY salesperson_id, name ORDER BY total_volume DESC;\n", "\n", "\n", "# Question: What is the total volume of timber sold by each salesperson, sorted by salesperson?\n", "\n", "# SQL: \n", "\n", "\n" ] } ], "source": [ "print(gretel_dataset[0][\"raw\"][\"prompt\"])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Creating your own dataset" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "In this section, we are going to see how we can make our own dataset similar like the above. Creating your own dataset could come with several customization and variables. One of the easiest ways to create your own dataset is to simply annotate the dataset in the given file structure:\n", "\n", "```\n", "├── databases\n", "│ ├── california_schools\n", "│ ├── california_schools.sqlite\n", "│ ├── card_games\n", "│ ├── codebase_community\n", "│ ├── debit_card_specializing\n", "│ ├── european_football_2\n", "│ ├── financial\n", "│ ├── formula_1\n", "│ ├── student_club\n", "│ ├── superhero\n", "│ ├── thrombosis_prediction\n", "│ └── toxicology\n", "├── train.json \n", "├── validation.json # Optional \n", "```\n", "\n", "The reason we do this hierchy is, in a real world scenerio, we can have \n", "multiple databases, and each databases could be multiple tables. So this is how we organize them.\n", "\n", "Suppose you are saving everything inside `./data` folder then inside that folder you should have a `databases` folder (you can name it something else too) and a `train/validation.json` file. \n", "\n", "Inside the databases folder you should have multple sub folders where under each sub-folder you should have a `.sqlite` file of the same name. For example: if the db name is `california_schools` then you should have a .sqlite file inside `california_schools` folder. \n", "\n", "The `train` or `validation` JSON file, should be a list of dictionaries, having the following (required) keys:\n", "\n", "1. `db_id`: this represent the folder and the `.sqlite` file name.\n", "2. `question`: this represent the question asked by the user.\n", "3. `SQL`: This is the ground truth SQL.\n", "\n", "**Please note:** All the keys are case sensitive. Here is an example of a single datapoint. \n", "\n", "```json\n", "\"question_id\": 0,\n", "\"db_id\": \"california_schools\",\n", "\"question\": \"What is the highest eligible free rate for K-12 students in the schools in Alameda County?\",\n", "\"evidence\": \"Eligible free rate for K-12 = `Free Meal Count (K-12)` / `Enrollment (K-12)`\",\n", "\"SQL\": \"SELECT `Free Meal Count (K-12)` / `Enrollment (K-12)` FROM frpm WHERE `County Name` = 'Alameda' ORDER BY (CAST(`Free Meal Count (K-12)` AS REAL) / `Enrollment (K-12)`) DESC LIMIT 1\",\n", "\"difficulty\": \"simple\"\n", "```\n", "\n", "You can also keep other keys too, those will be automatically used as filter keys. Now you can use the code to automatically load your dataset from the folder. " ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [], "source": [ "from premsql.datasets import StandardDataset\n", "\n", "path = \"../data/bird/validation\"\n", "dataset = StandardDataset(\n", " split=\"validation\",\n", " dataset_path=path,\n", " database_folder_name=\"dev_databases\",\n", " json_file_name=\"validation.json\",\n", ")" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n"
      ],
      "text/plain": []
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/plain": [
       "\u001b[1m[\u001b[0m\u001b[32m'db_id'\u001b[0m, \u001b[32m'difficulty'\u001b[0m\u001b[1m]\u001b[0m"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "dataset.filter_availables"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "We have loaded our Bird dev database but this time we have used the `StandardDataset` class. A `StandardDataset` class acts like a template for all text2sql compatible datasets when following the above structure. "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Towards more customization"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Last but not the least, there is one more level of customization that you can do while creating text-to-sql datasets. Till now all of these use cases shown above were tightly coupled with `.sqlite` specific databases. However if you:\n",
    "\n",
    "1. have different databases (like postgres or any cloud DB instance)\n",
    "2. or want to have lot of custom logics, before making prompts\n",
    "3. or add more utility on top of premsql\n",
    "\n",
    "This section will help you to achieve that. \n",
    "\n",
    "**Note** In case of the point number one, you can also migrate one subset of the dataset to SQLite. Once you have migrated a subset of your database content to SQLite and have done annotations for that, you can then go for the first route to create a Text2SQL compatible dataset for fine-tuning and inference. \n",
    "\n",
    "If you still want to go for full customization then you can achieve this with three steps. A detailed tutorial on this will be coming on future versions. However in short, you need to define two things for making a premsql fully custom dataset.\n",
    "\n",
    "**DatasetInstance:** A dataset instance helps to operations on individual datapoints. You need to extend `premsql.datasets.base.Text2SQLBaseInstance` class to define your own. Here is how a blueprint looks like:\n",
    "\n",
    "```python\n",
    "\n",
    "class CustomDataInstance(Text2SQLBaseInstance):\n",
    "    def __init__(self, dataset: list[dict]) -> None:\n",
    "        super().__init__(dataset=dataset)\n",
    "\n",
    "    def schema_prompt(self, db_path: str) -> str:\n",
    "        # write your schema prompt here\n",
    "        # you need to fetch the schema from your database\n",
    "        # and format it. For sqlite database it would look\n",
    "        # like this: SELECT sql FROM sqlite_master WHERE type='table' AND name='{table_name}\n",
    "        # check out Text2SQLBaseInstance premsql/datasets/base for more details\n",
    "```\n",
    "\n",
    "Additionally this class some more methods: `additional_prompt` `apply_prompt` those have some db agnostic default implementation, however you can change those too if you want. \n",
    "\n",
    "Once you have your instance defined, you can now define your custom class by inheriting from\n",
    "`premsql.datasets.base.Text2SQLBaseDataset` class, like this:\n",
    "\n",
    "\n",
    "```python\n",
    "class CustomText2SQLDataset(Text2SQLBaseDataset):\n",
    "    def __init__(\n",
    "        self,\n",
    "        split: str,\n",
    "        dataset_folder: Optional[Union[str, Path]] = \"./data\",\n",
    "        hf_token: Optional[str] = None,\n",
    "        force_download: Optional[bool] = False,\n",
    "    ):\n",
    "        # Define your logic here\n",
    "        pass \n",
    "\n",
    "    def setup_dataset(\n",
    "        self,\n",
    "        filter_by: tuple | None = None,\n",
    "        num_rows: int | None = None,\n",
    "        num_fewshot: int | None = None,\n",
    "        model_name_or_path: str | None = None,\n",
    "        prompt_template: str | None = None,\n",
    "    ):\n",
    "        logger.info(\"Setting up Spider Dataset\")\n",
    "        return super().setup_dataset(\n",
    "            filter_by, num_rows, num_fewshot, model_name_or_path, prompt_template\n",
    "        )\n",
    "```\n",
    "\n",
    "Based on your requirements you can define all the necessary things in __init__ method and `setup_dataset` method. You can checkout `Text2SQLBaseDataset` class to see how things are defined. We will roll out a detailed tutorial on how to make a dataset for a different database very soon. "
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.12"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}


================================================
FILE: examples/error_dataset.ipynb
================================================
{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "/root/anindya/Submission/text2sql/text2sql\n"
     ]
    }
   ],
   "source": [
    "# cd text2sql"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Error Handling Datasets and Prompts\n",
    "\n",
    "In this section we are going to discuss on how you can create error handling prompt which you can pass it to the models during inference for self-correction from errors, or make error handling prompts to fine-tune your models furthur to make them learn how to handle errors. "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[2024-09-09 13:55:27,850] [INFO] [real_accelerator.py:203:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/root/miniconda3/envs/deep/compiler_compat/ld: cannot find -laio: No such file or directory\n",
      "collect2: error: ld returned 1 exit status\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\u001b[93m [WARNING] \u001b[0m async_io requires the dev libaio .so object and headers but these were not found.\n",
      "\u001b[93m [WARNING] \u001b[0m async_io: please install the libaio-dev package with apt\n",
      "\u001b[93m [WARNING] \u001b[0m If libaio is already installed (perhaps from source), try setting the CFLAGS and LDFLAGS environment variables to where it can be found.\n",
      "\u001b[93m [WARNING] \u001b[0m Please specify the CUTLASS repo directory as environment variable $CUTLASS_PATH\n",
      "\u001b[93m [WARNING] \u001b[0m sparse_attn requires a torch version >= 1.5 and < 2.0 but detected 2.4\n",
      "\u001b[93m [WARNING] \u001b[0m using untested triton version (3.0.0), only 1.0.0 is known to be compatible\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/root/miniconda3/envs/deep/lib/python3.10/site-packages/deepspeed/runtime/zero/linear.py:49: FutureWarning: `torch.cuda.amp.custom_fwd(args...)` is deprecated. Please use `torch.amp.custom_fwd(args..., device_type='cuda')` instead.\n",
      "  def forward(ctx, input, weight, bias=None):\n",
      "/root/miniconda3/envs/deep/lib/python3.10/site-packages/deepspeed/runtime/zero/linear.py:67: FutureWarning: `torch.cuda.amp.custom_bwd(args...)` is deprecated. Please use `torch.amp.custom_bwd(args..., device_type='cuda')` instead.\n",
      "  def backward(ctx, grad_output):\n"
     ]
    }
   ],
   "source": [
    "from premsql.datasets.error_dataset import ErrorDatasetGenerator\n",
    "from premsql.generators.huggingface import Text2SQLGeneratorHF\n",
    "from premsql.executors.from_langchain import ExecutorUsingLangChain"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "In order to make a error handling dataset or error handling prompt, make sure the data entity has: `db_id`, `db_path` and existing `prompt` which was used earlier to generate results from the model.  Let's see an example to understand this better. We will be using our standard BirdBench dataset for this. We also define our generators in this case it will be [Prem-1B-SQL](https://huggingface.co/premai-io/prem-1B-SQL) model and a DB executor from langchain. \n",
    "\n",
    "You are't aware of generators, executors and datasets then you can check out the following:\n",
    "\n",
    "1. [Datasets tutorial](/examples/datasets.ipynb)\n",
    "2. [Generators tutorial](/examples/generators.ipynb)\n",
    "3. [Executors and evaluators tutorial](/examples/evaluation.ipynb)\n",
    "\n",
    "Since we are making a error dataset, so we will be using existing datasets. Because our goal is to transform the existing train datasets to a error handling datasets. \n",
    "\n",
    "The flow is simple:\n",
    "\n",
    "### For training\n",
    "\n",
    "1. Start with a exising datasets which is compatible with premsql datasets. \n",
    "2. Then use a generator to run on that dataset. The executor will gather errors for in-correct generations. \n",
    "3. Now use the existing response, initial prompt and the error to create the new data points which will be now using a error handling prompt. \n",
    "\n",
    "### For Inference\n",
    "\n",
    "premsql already handles automatic error handling in the [simple-pipeline](/premsql/pipelines/simple.py) and [execution guided decoding](/examples/generators.ipynb) section. So that you do not need to worry about that. \n",
    "\n",
    "\n",
    "Now let's start with defining our generators and execuror first. "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2024-09-09 13:55:49,797 - [GENERATOR] - INFO - Experiment folder found in: experiments/train/testing_error_gen\n",
      "Unrecognized keys in `rope_scaling` for 'rope_type'='linear': {'type'}\n",
      "Loading checkpoint shards: 100%|██████████| 2/2 [00:06<00:00,  3.03s/it]\n"
     ]
    }
   ],
   "source": [
    "generator = Text2SQLGeneratorHF(\n",
    "    model_or_name_or_path=\"premai-io/prem-1B-SQL\",\n",
    "    experiment_name=\"testing_error_gen\",\n",
    "    type=\"train\", # do not type: 'test' since this will be used during training\n",
    "    device=\"cuda:0\"\n",
    ")\n",
    "\n",
    "executor = ExecutorUsingLangChain()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "After this we define our existing training dataset. We are using BirdBench dataset but you can also use your own text2sql compatible datasets or any of our existing datasets. For demo purposes, we have set `num_rows` to 10, but in actual scenerio you should be using full length of the training datasets. Because generally your error dataset will be lesser than the length of the training dataset if you are using a descent trained model which can generate SQL."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2024-09-09 14:02:05,011 - [BIRD-DATASET] - INFO - Loaded Bird Dataset\n",
      "2024-09-09 14:02:05,012 - [BIRD-DATASET] - INFO - Setting up Bird Dataset\n",
      "Applying prompt: 100%|██████████| 10/10 [00:00<00:00, 2779.53it/s]\n"
     ]
    }
   ],
   "source": [
    "from premsql.datasets import BirdDataset\n",
    "\n",
    "bird_train = BirdDataset(\n",
    "    split=\"train\",\n",
    "    dataset_folder=\"/root/anindya/text2sql/data\"\n",
    ").setup_dataset(\n",
    "    num_rows=10,\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Now we define our error handling dataset. It is simple, all you need is to feed in the generator of your choice and the executor. "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "error_dataset_gen = ErrorDatasetGenerator(\n",
    "    generator=generator,\n",
    "    executor=executor\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Now we generate and save the results. You can use `force` if you want to force the generation once more. Once the error prompt creations are done, it will save the dataset inside `./experiments/train//error_dataset.json`. "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Generating result ...:   0%|          | 0/10 [00:00You are an AI programming assistant, utilizing the Deepseek Coder model, developed by Deepseek Company, and you only answer questions related to computer science. For politically sensitive questions, security and privacy issues, and other non-computer science questions, you will refuse to answer\\n### Instruction:\\n\\n# Follow these instruction:\\nYou will be given schemas of tables of a database. Your job is to write correct\\nerror free SQL query based on the question asked. Please make sure:\\n\\n1. Do not add ``` at start / end of the query. It should be a single line query in a  single line (string format)\\n2. Make sure the column names are correct and exists in the table\\n3. For column names which has a space with it, make sure you have put `` in that column name\\n4. Think step by step and always check schema and question and the column names before writing the\\nquery. \\n\\n# Database and Table Schema:\\nCREATE TABLE \"lists\"\\n(\\n    user_id                     INTEGER\\n        references lists_users (user_id),\\n    list_id                     INTEGER not null\\n        primary key,\\n    list_title                  TEXT,\\n    list_movie_number           INTEGER,\\n    list_update_timestamp_utc   TEXT,\\n    list_creation_timestamp_utc TEXT,\\n    list_followers              INTEGER,\\n    list_url                    TEXT,\\n    list_comments               INTEGER,\\n    list_description            TEXT,\\n    list_cover_image_url        TEXT,\\n    list_first_image_url        TEXT,\\n    list_second_image_url       TEXT,\\n    list_third_image_url        TEXT\\n)\\nCREATE TABLE \"movies\"\\n(\\n    movie_id             INTEGER not null\\n        primary key,\\n    movie_title          TEXT,\\n    movie_release_year   INTEGER,\\n    movie_url            TEXT,\\n    movie_title_language TEXT,\\n    movie_popularity     INTEGER,\\n    movie_image_url      TEXT,\\n    director_id          TEXT,\\n    director_name        TEXT,\\n    director_url         TEXT\\n)\\nCREATE TABLE \"ratings_users\"\\n(\\n    user_id                 INTEGER\\n        references lists_users (user_id),\\n    rating_date_utc         TEXT,\\n    user_trialist           INTEGER,\\n    user_subscriber         INTEGER,\\n    user_avatar_image_url   TEXT,\\n    user_cover_image_url    TEXT,\\n    user_eligible_for_trial INTEGER,\\n    user_has_payment_method INTEGER\\n)\\nCREATE TABLE lists_users\\n(\\n    user_id                 INTEGER not null ,\\n    list_id                 INTEGER not null ,\\n    list_update_date_utc    TEXT,\\n    list_creation_date_utc  TEXT,\\n    user_trialist           INTEGER,\\n    user_subscriber         INTEGER,\\n    user_avatar_image_url   TEXT,\\n    user_cover_image_url    TEXT,\\n    user_eligible_for_trial TEXT,\\n    user_has_payment_method TEXT,\\n    primary key (user_id, list_id),\\n    foreign key (list_id) references lists(list_id),\\n    foreign key (user_id) references lists(user_id)\\n)\\nCREATE TABLE ratings\\n(\\n    movie_id                INTEGER,\\n    rating_id               INTEGER,\\n    rating_url              TEXT,\\n    rating_score            INTEGER,\\n    rating_timestamp_utc    TEXT,\\n    critic                  TEXT,\\n    critic_likes            INTEGER,\\n    critic_comments         INTEGER,\\n    user_id                 INTEGER,\\n    user_trialist           INTEGER,\\n    user_subscriber         INTEGER,\\n    user_eligible_for_trial INTEGER,\\n    user_has_payment_method INTEGER,\\n    foreign key (movie_id) references movies(movie_id),\\n    foreign key (user_id) references lists_users(user_id),\\n    foreign key (rating_id) references ratings(rating_id),\\n    foreign key (user_id) references ratings_users(user_id)\\n)\\n\\n\\n\\n# Here are some Examples on how to generate SQL statements and use column names:\\n\\n\\n# Question: Name movie titles released in year 1945. Sort the listing by the descending order of movie popularity.\\n\\n# Generated SQL: SELECT movie_title FROM movies WHERE movie_release_year = 1945 ORDER BY movie_popularity DESC LIMIT 1;\\n\\n## Error Message\\n\\nNone\\n\\nCarefully review the original question and error message, then rewrite the SQL query to address the identified issues. \\nEnsure your corrected query uses correct column names, \\nfollows proper SQL syntax, and accurately answers the original question \\nwithout introducing new errors.\\n\\n# SQL: \\n\\n',\n",
       "  'db_path': '/root/anindya/text2sql/data/bird/train/train_databases/movie_platform/movie_platform.sqlite'}}"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Even tokenize this\n",
    "\n",
    "existing_error_dataset = ErrorDatasetGenerator.from_existing(\n",
    "    experiment_name=\"testing_error_gen\",\n",
    "    tokenize_model_name_or_path=\"premai-io/prem-1B-SQL\",\n",
    ")\n",
    "\n",
    "existing_error_dataset[0]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Another example using sqlite executor\n",
    "\n",
    "This is an another example which uses sqlite executor to do the same thing as done above. This shows how easy it is to plug and play the components and customize it accordingly. "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2024-09-09 14:06:12,390 - [GENERATOR] - INFO - Experiment folder found in: experiments/train/testing_error_sqlite\n",
      "Unrecognized keys in `rope_scaling` for 'rope_type'='linear': {'type'}\n",
      "Loading checkpoint shards: 100%|██████████| 2/2 [00:05<00:00,  2.95s/it]\n"
     ]
    }
   ],
   "source": [
    "from premsql.executors import SQLiteExecutor\n",
    "\n",
    "generator = Text2SQLGeneratorHF(\n",
    "    model_or_name_or_path=\"premai-io/prem-1B-SQL\",\n",
    "    experiment_name=\"testing_error_sqlite\",\n",
    "    type=\"train\",\n",
    "    device=\"cuda:0\"\n",
    ")\n",
    "sqlite_executor = SQLiteExecutor()\n",
    "\n",
    "error_dataset_gen = ErrorDatasetGenerator(\n",
    "    generator=generator,\n",
    "    executor=sqlite_executor\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "You can also generate a tokenized dataset on the fly. Here is how you do that. "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Generating result ...:   0%|          | 0/10 [00:00You are an AI programming assistant, utilizing the Deepseek Coder model, developed by Deepseek Company, and you only answer questions related to computer science. For politically sensitive questions, security and privacy issues, and other non-computer science questions, you will refuse to answer\\n### Instruction:\\n\\n# Follow these instruction:\\nYou will be given schemas of tables of a database. Your job is to write correct\\nerror free SQL query based on the question asked. Please make sure:\\n\\n1. Do not add ``` at start / end of the query. It should be a single line query in a  single line (string format)\\n2. Make sure the column names are correct and exists in the table\\n3. For column names which has a space with it, make sure you have put `` in that column name\\n4. Think step by step and always check schema and question and the column names before writing the\\nquery. \\n\\n# Database and Table Schema:\\nCREATE TABLE \"lists\"\\n(\\n    user_id                     INTEGER\\n        references lists_users (user_id),\\n    list_id                     INTEGER not null\\n        primary key,\\n    list_title                  TEXT,\\n    list_movie_number           INTEGER,\\n    list_update_timestamp_utc   TEXT,\\n    list_creation_timestamp_utc TEXT,\\n    list_followers              INTEGER,\\n    list_url                    TEXT,\\n    list_comments               INTEGER,\\n    list_description            TEXT,\\n    list_cover_image_url        TEXT,\\n    list_first_image_url        TEXT,\\n    list_second_image_url       TEXT,\\n    list_third_image_url        TEXT\\n)\\nCREATE TABLE \"movies\"\\n(\\n    movie_id             INTEGER not null\\n        primary key,\\n    movie_title          TEXT,\\n    movie_release_year   INTEGER,\\n    movie_url            TEXT,\\n    movie_title_language TEXT,\\n    movie_popularity     INTEGER,\\n    movie_image_url      TEXT,\\n    director_id          TEXT,\\n    director_name        TEXT,\\n    director_url         TEXT\\n)\\nCREATE TABLE \"ratings_users\"\\n(\\n    user_id                 INTEGER\\n        references lists_users (user_id),\\n    rating_date_utc         TEXT,\\n    user_trialist           INTEGER,\\n    user_subscriber         INTEGER,\\n    user_avatar_image_url   TEXT,\\n    user_cover_image_url    TEXT,\\n    user_eligible_for_trial INTEGER,\\n    user_has_payment_method INTEGER\\n)\\nCREATE TABLE lists_users\\n(\\n    user_id                 INTEGER not null ,\\n    list_id                 INTEGER not null ,\\n    list_update_date_utc    TEXT,\\n    list_creation_date_utc  TEXT,\\n    user_trialist           INTEGER,\\n    user_subscriber         INTEGER,\\n    user_avatar_image_url   TEXT,\\n    user_cover_image_url    TEXT,\\n    user_eligible_for_trial TEXT,\\n    user_has_payment_method TEXT,\\n    primary key (user_id, list_id),\\n    foreign key (list_id) references lists(list_id),\\n    foreign key (user_id) references lists(user_id)\\n)\\nCREATE TABLE ratings\\n(\\n    movie_id                INTEGER,\\n    rating_id               INTEGER,\\n    rating_url              TEXT,\\n    rating_score            INTEGER,\\n    rating_timestamp_utc    TEXT,\\n    critic                  TEXT,\\n    critic_likes            INTEGER,\\n    critic_comments         INTEGER,\\n    user_id                 INTEGER,\\n    user_trialist           INTEGER,\\n    user_subscriber         INTEGER,\\n    user_eligible_for_trial INTEGER,\\n    user_has_payment_method INTEGER,\\n    foreign key (movie_id) references movies(movie_id),\\n    foreign key (user_id) references lists_users(user_id),\\n    foreign key (rating_id) references ratings(rating_id),\\n    foreign key (user_id) references ratings_users(user_id)\\n)\\n\\n\\n\\n# Here are some Examples on how to generate SQL statements and use column names:\\n\\n\\n# Question: Name movie titles released in year 1945. Sort the listing by the descending order of movie popularity.\\n\\n# Generated SQL: SELECT movie_title FROM movies WHERE movie_release_year = 1945 ORDER BY movie_popularity DESC LIMIT 1;\\n\\n## Error Message\\n\\nNone\\n\\nCarefully review the original question and error message, then rewrite the SQL query to address the identified issues. \\nEnsure your corrected query uses correct column names, \\nfollows proper SQL syntax, and accurately answers the original question \\nwithout introducing new errors.\\n\\n# SQL: \\n\\n',\n",
       "  'db_path': '/root/anindya/text2sql/data/bird/train/train_databases/movie_platform/movie_platform.sqlite'}}"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "error_dataset_from_sqlite[0]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Thats it, that is how you generate a error handling dataset. This dataset will be compatible with other premsql datasets. So you can use / mix all of them to use as a singular dataset entity which can be now used collectively for fine-tuning purposes. "
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "deep",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.12"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}


================================================
FILE: examples/evaluation.ipynb
================================================
{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "30e64251-c3f2-473b-a76f-10bc4a645e93",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "/root/anindya/Submission/text2sql/text2sql\n"
     ]
    }
   ],
   "source": [
    "# cd .."
   ]
  },
  {
   "cell_type": "markdown",
   "id": "52735847-f54b-4929-acfb-f4fafb509df7",
   "metadata": {},
   "source": [
    "## Evaluator\n",
    "\n",
    "premsql evaluators helps you to evaluate your text-to-sql models on various your validation datasets. Currently we support two metrics for evaluation:\n",
    "\n",
    "1. Execution Accuracy\n",
    "2. Valid Efficiency Score\n",
    "\n",
    "**Execution Accuracy (EX):** From the name, it is clear that the correctness of the LLM is measured by comparing the executed results from the LLM with the ground truth.\n",
    "    \n",
    "**Valid Efficiency Score (VES):** The primary objective of LLM-generated SQL queries is to be accurate. However, it also needs to be performance-optimized when dealing with big data. This metric asses both of the objectives. It quantifies how efficient the query is and whether the query is accurate or not. The figure below shows how it is been computed. \n",
    "\n",
    "Now let's jump in to the code to see how we can use premsql to evaluate models or pipelines using these metrics. \n",
    "\n",
    "To startoff, we import all the necessary things required to evaluate our models. "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "b2516212-a777-4ce4-87fa-781304818819",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/root/miniconda3/envs/deep/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
      "  from .autonotebook import tqdm as notebook_tqdm\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[2024-09-09 12:58:58,000] [INFO] [real_accelerator.py:203:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/root/miniconda3/envs/deep/compiler_compat/ld: cannot find -laio: No such file or directory\n",
      "collect2: error: ld returned 1 exit status\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\u001b[93m [WARNING] \u001b[0m async_io requires the dev libaio .so object and headers but these were not found.\n",
      "\u001b[93m [WARNING] \u001b[0m async_io: please install the libaio-dev package with apt\n",
      "\u001b[93m [WARNING] \u001b[0m If libaio is already installed (perhaps from source), try setting the CFLAGS and LDFLAGS environment variables to where it can be found.\n",
      "\u001b[93m [WARNING] \u001b[0m Please specify the CUTLASS repo directory as environment variable $CUTLASS_PATH\n",
      "\u001b[93m [WARNING] \u001b[0m sparse_attn requires a torch version >= 1.5 and < 2.0 but detected 2.4\n",
      "\u001b[93m [WARNING] \u001b[0m using untested triton version (3.0.0), only 1.0.0 is known to be compatible\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/root/miniconda3/envs/deep/lib/python3.10/site-packages/deepspeed/runtime/zero/linear.py:49: FutureWarning: `torch.cuda.amp.custom_fwd(args...)` is deprecated. Please use `torch.amp.custom_fwd(args..., device_type='cuda')` instead.\n",
      "  def forward(ctx, input, weight, bias=None):\n",
      "/root/miniconda3/envs/deep/lib/python3.10/site-packages/deepspeed/runtime/zero/linear.py:67: FutureWarning: `torch.cuda.amp.custom_bwd(args...)` is deprecated. Please use `torch.amp.custom_bwd(args..., device_type='cuda')` instead.\n",
      "  def backward(ctx, grad_output):\n"
     ]
    }
   ],
   "source": [
    "import json\n",
    "from pathlib import Path \n",
    "from premsql.evaluator import Text2SQLEvaluator\n",
    "from premsql.executors import SQLiteExecutor"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "226827f8-b3bd-4249-9adf-3a396fa3fa37",
   "metadata": {},
   "source": [
    "Our evaluation methods are agnostic to models or any pipelines. To evaluate we rely on a special response JSON structure. So ideally we assume that, before doing evaluation, you have got all the model responses saved inside a JSON. \n",
    "\n",
    "In our [premsql.generators](/examples/generators.ipynb) section, we have show you how you can get the model responses for validation or inference purposes. We start off by defining our experiment_path. You can get the experiment path manually or you can also get it from your generator object (more on that below). "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "7ca7d114",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'result': [('Brief Encounter',)], 'error': None, 'execution_time': 0.03717160224914551}\n"
     ]
    }
   ],
   "source": [
    "executor = SQLiteExecutor()\n",
    "db_path = (\n",
    "    '/root/anindya/text2sql/data/bird/train/train_databases/movie_platform/movie_platform.sqlite'\n",
    ")\n",
    "sql = 'SELECT movie_title FROM movies WHERE movie_release_year = 1945 ORDER BY movie_popularity DESC LIMIT 1'\n",
    "\n",
    "result = executor.execute_sql(\n",
    "    sql=sql,\n",
    "    dsn_or_db_path=db_path\n",
    ")\n",
    "\n",
    "print(result)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "790a8667",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'result': \"[('Brief Encounter',)]\",\n",
       " 'error': None,\n",
       " 'execution_time': 0.028678178787231445}"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from premsql.executors import ExecutorUsingLangChain\n",
    "\n",
    "executor = ExecutorUsingLangChain()\n",
    "executor.execute_sql(\n",
    "    sql=sql,\n",
    "    dsn_or_db_path=db_path\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "05c40a8c-8972-4992-b035-5ca7ed410211",
   "metadata": {},
   "outputs": [],
   "source": [
    "experiment_path = Path(\n",
    "    \"experiments/test/testing_finetuned_deepseek_full_fewshot/\"\n",
    ")\n",
    "responses = json.load(open(experiment_path / \"predict.json\", \"r\"))"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "3bbeee2b-9cc6-4c22-929e-1927d0ea9ad0",
   "metadata": {},
   "source": [
    "Since text-to-SQL is a database dependent task, so it requires a DB source to execute the SQL generated from the model and compare it with the result executed from the ground truth SQL. \n",
    "\n",
    "So evaluator depends on an executor object. An executor derives from `premsql.evaluator.base.BaseExecutor` abstract class. This class has one method called `execute_sql`. We are going to use `SQLiteExecutor` to execute in SQLite DBs. You can also make your own executor to evaluate with your custom executor. More on that below. "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "5c313d53-da57-4687-9a54-0b03bec9f3d0",
   "metadata": {},
   "outputs": [],
   "source": [
    "executor = SQLiteExecutor()\n",
    "evaluator = Text2SQLEvaluator(\n",
    "    executor=executor, experiment_path=experiment_path\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "0b345933-ff9d-4a07-81c0-43e53279bd0e",
   "metadata": {},
   "source": [
    "Now our setup is done, let's compute the execution accuracy score using premsql evaluator. "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "a146e8d7-ed2a-444c-a59f-6390a6b1f136",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "fb8f8c7ccd854d7c918098ad214250fd",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "  0%|          | 0/1534 [00:00 Execution Accuracy is: {'simple': 49.18918918918919, 'challenging': 24.137931034482758, 'moderate': \n",
       "38.146551724137936, 'overall': 43.481095176010434}\n",
       "\n"
      ],
      "text/plain": [
       " Execution Accuracy is: \u001b[1m{\u001b[0m\u001b[32m'simple'\u001b[0m: \u001b[1;36m49.18918918918919\u001b[0m, \u001b[32m'challenging'\u001b[0m: \u001b[1;36m24.137931034482758\u001b[0m, \u001b[32m'moderate'\u001b[0m: \n",
       "\u001b[1;36m38.146551724137936\u001b[0m, \u001b[32m'overall'\u001b[0m: \u001b[1;36m43.481095176010434\u001b[0m\u001b[1m}\u001b[0m\n"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "print(f\" Execution Accuracy is: {ex}\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "68d88030-7588-478a-9178-3ebfa332ff65",
   "metadata": {},
   "source": [
    "Once you get the result, you will also see that the results are saved in the same `experiment_path` which was initially given to the object. This saves lot of informations like the errors for each of the validation questions and other infos too. This would help us to understand the problems and debug them. Here is an instance of how it looks like:\n",
    "\n",
    "```json\n",
    "\n",
    "{\n",
    "    \"question_id\": 23,\n",
    "    \"db_id\": \"california_schools\",\n",
    "    \"question\": \"List the names of schools with more than 30 difference in enrollements between K-12 and ages 5-17? Please also give the full street adress of the schools.\",\n",
    "    \"evidence\": \"Diffrence in enrollement = `Enrollment (K-12)` - `Enrollment (Ages 5-17)`\",\n",
    "    \"SQL\": \"SELECT T1.School, T1.Street FROM schools AS T1 INNER JOIN frpm AS T2 ON T1.CDSCode = T2.CDSCode WHERE T2.`Enrollment (K-12)` - T2.`Enrollment (Ages 5-17)` > 30\",\n",
    "    \"difficulty\": \"moderate\",\n",
    "    \"db_path\": \"data/bird/validation/dev_databases/california_schools/california_schools.sqlite\",\n",
    "    \"prompt\": \"<\\uff5cbegin\\u2581of\\u2581sentence\\uff5c>You are an ... Additional Knowledge: Diffrence in enrollement = `Enrollment (K-12)` - `Enrollment (Ages 5-17)`# Question: List the names of schools with more than 30 difference in enrollements between K-12 and ages 5-17? Please also give the full street adress of the schools.\\n\\n# SQL:\\n\",\n",
    "    \"dataset_type\": \"real\",\n",
    "    \"generated\": \"SELECT T2.`School Name`, T2.Street, T2.City, T2.Zip FROM satscores AS T1 INNER JOIN frpm AS T2 ON T1.cds = T2.CDSCode WHERE T1.`Enrollment (K-12)` - T1.`Enrollment (Ages 5-17)` > 30\",\n",
    "    \"error\": \"no such column: T2.Street\",\n",
    "    \"accuracy\": 0\n",
    "},\n",
    "```\n",
    "\n",
    "All of the informations are saved in the provided `experiment_path`. Similarly, you can calculate Valid Efficiency Score (ves) using the same method. Now for this one, let's filter the result based on `db_id`."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "6670b8f2-2a71-42de-a0e9-ec37c55c0305",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "b68615d1d2fc43de8e5a3929578fb668",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "  0%|          | 0/1534 [00:00{\n",
       "    'card_games': 43.07853075375516,\n",
       "    'formula_1': 31.775724213967827,\n",
       "    'superhero': 71.8911921365164,\n",
       "    'thrombosis_prediction': 32.04531820456654,\n",
       "    'european_football_2': 55.13455641323744,\n",
       "    'debit_card_specializing': 31.443812836674972,\n",
       "    'financial': 20.009163037705697,\n",
       "    'toxicology': 38.062478162792736,\n",
       "    'california_schools': 20.28612811030907,\n",
       "    'codebase_community': 61.334184528675365,\n",
       "    'student_club': 55.627231251046005,\n",
       "    'overall': 43.69090268345865\n",
       "}\n",
       "\n"
      ],
      "text/plain": [
       "\u001b[1m{\u001b[0m\n",
       "    \u001b[32m'card_games'\u001b[0m: \u001b[1;36m43.07853075375516\u001b[0m,\n",
       "    \u001b[32m'formula_1'\u001b[0m: \u001b[1;36m31.775724213967827\u001b[0m,\n",
       "    \u001b[32m'superhero'\u001b[0m: \u001b[1;36m71.8911921365164\u001b[0m,\n",
       "    \u001b[32m'thrombosis_prediction'\u001b[0m: \u001b[1;36m32.04531820456654\u001b[0m,\n",
       "    \u001b[32m'european_football_2'\u001b[0m: \u001b[1;36m55.13455641323744\u001b[0m,\n",
       "    \u001b[32m'debit_card_specializing'\u001b[0m: \u001b[1;36m31.443812836674972\u001b[0m,\n",
       "    \u001b[32m'financial'\u001b[0m: \u001b[1;36m20.009163037705697\u001b[0m,\n",
       "    \u001b[32m'toxicology'\u001b[0m: \u001b[1;36m38.062478162792736\u001b[0m,\n",
       "    \u001b[32m'california_schools'\u001b[0m: \u001b[1;36m20.28612811030907\u001b[0m,\n",
       "    \u001b[32m'codebase_community'\u001b[0m: \u001b[1;36m61.334184528675365\u001b[0m,\n",
       "    \u001b[32m'student_club'\u001b[0m: \u001b[1;36m55.627231251046005\u001b[0m,\n",
       "    \u001b[32m'overall'\u001b[0m: \u001b[1;36m43.69090268345865\u001b[0m\n",
       "\u001b[1m}\u001b[0m\n"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "print(ves)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "8dc48a3f-3c1c-4b71-a9cf-81bd0f71db58",
   "metadata": {},
   "source": [
    "Let's now plot the result to see what is the distribution of result based on different databases. These kind of filters lets us to analyse the results of the model or pipeline on several key aspects which helps us to understand where the next iterations can be done on. "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "ce44e273-ea0e-4d88-8614-72b71fb524c6",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "
\n"
      ],
      "text/plain": []
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAABKUAAAJOCAYAAABm7rQwAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAADrTUlEQVR4nOzdd3gUZdfH8d8mkARIgYQSSkIJSAQRAUUCKKg0BRsIKrxS7IooIAjYAAvNAipNLGBDVIqIXXkUFbBQRIRHihCKNCEhBUgCyXn/4MmQJYkksOwm8fu5Lq6LnJ2dvc99z8zOnr1n1mVmJgAAAAAAAMCL/HzdAAAAAAAAAPz7UJQCAAAAAACA11GUAgAAAAAAgNdRlAIAAAAAAIDXUZQCAAAAAACA11GUAgAAAAAAgNdRlAIAAAAAAIDXUZQCAAAAAACA11GUAgAAAAAAgNdRlAIAlEjx8fFyuVyaNWuWr5tyxt566y3FxsaqdOnSKl++vEfX3bdvX9WqVcstlpqaqttvv12RkZFyuVwaOHCgJGnv3r264YYbFBERIZfLpUmTJnm0LfCtkrTP/JOzkWde6xw1apRcLpfHXuPfKK8+rFWrlvr27eubBgEAPI6iFADA56655hqVLVtWKSkp+S7Tq1cvBQQE6MCBA15sme/98ccf6tu3r2JiYvTKK69oxowZ+S6b/QEu+1/ZsmUVHR2tq6++WjNnzlR6enqBXnPMmDGaNWuW7rnnHr311lu65ZZbJEmDBg3SF198oREjRuitt95Sp06dPJLj2TB16tRCFx3S0tI0ceJEXXzxxQoLC1NQUJDOOecc3Xfffdq4cWOh27Bs2TKNGjVKBw8eLPRz/y2+/fZbt2325H9z5szxdROLtY8//lidOnVSRESEsz0PGTLkX3ccBQAUXaV83QAAAHr16qVFixZpwYIF6t27d67HDx8+rIULFzofrv5Nvv32W2VlZemFF15Q3bp1C/ScadOmKTg4WOnp6frrr7/0xRdf6NZbb9WkSZP08ccfKyoqyln2lVdeUVZWltvz//Of/6hFixYaOXJkrvi1116rIUOGnHliZ9nUqVNVsWLFAs+o2L9/vzp16qSVK1eqS5cu6tmzp4KDg7VhwwbNmTNHM2bMUEZGRqHasGzZMo0ePVp9+/b1+Ay3s6FmzZo6cuSISpcu7fXXvv/++3XRRRflisfFxXm9LZ7y6KOPavjw4T57/SFDhui5555T48aNNWzYMIWHh2vVqlWaPHmy5syZo8WLF6t+/fo+ax8AABJFKQBAEXDNNdcoJCREs2fPzrMotXDhQh06dEi9evXyQet8a9++fZJUqKLGDTfcoIoVKzp/P/7443rnnXfUu3dvde/eXT/++KPzWF4FiH379qlBgwZ5xj1ZXDl27JiysrIUEBDgsXWerr59+2r16tWaO3euunXr5vbYk08+qUceecRHLTv7co5DUFCQT9pwySWX6IYbbvDJa58tpUqVUqlSvjnVfvfdd/Xcc8/pxhtv1DvvvCN/f3/nsb59++qyyy5T9+7dtWrVKq+28dChQypXrpzXXg8AUPRx+R4AwOfKlCmjrl27avHixU4RJqfZs2crJCRE11xzjRISEjRkyBA1atRIwcHBCg0N1ZVXXqk1a9ac8nXatm2rtm3b5orndV+lrKwsTZo0SQ0bNlRQUJCqVKmiu+66S4mJiW7LrVixQh07dlTFihVVpkwZ1a5dW7feemuB8p46daoaNmyowMBAVatWTf3793e71KtWrVrObKVKlSrJ5XJp1KhRBVr3yXr16qXbb79dP/30k7766qs8c8++lGrr1q365JNPnEuoZs2aJZfLJTPTlClTnHi2gwcPauDAgYqKilJgYKDq1q2r8ePHu83Ayr7nzrPPPqtJkyYpJiZGgYGBWr9+vaTjlynecMMNCg8PV1BQkC688EJ99NFHbjlkt2Pp0qUaPHiwKlWqpHLlyun666/X33//7dZv69at05IlS5y25jXu2X766Sd98sknuu2223IVpCQpMDBQzz77rPP3b7/9pr59+6pOnToKCgpSZGSkbr31VrdLokaNGqWhQ4dKkmrXru20Iz4+3lnm7bffVrNmzVSmTBmFh4frpptu0o4dO3K9/pQpU1SnTh2VKVNGzZs31/fff5/ntrxv3z7ddtttqlKlioKCgtS4cWO98cYbbsv80zjkd6+lgozN0aNHNXr0aNWrV09BQUGKiIhQ69at3ba1MzFz5ky5XC69/vrrbvExY8bI5XLp008/dWIHDx7UoEGDVKtWLQUGBqpGjRrq3bu39u/fn+/6C3NsOHjwoPr27auwsDCVL19effr0yfMSzbzuh+RyuXTffffpww8/1HnnnafAwEA1bNhQn3/+ea7nf/vtt7rwwgsVFBSkmJgYvfzyywW+T9Xo0aNVoUIFzZgxw60gJUnNmzfXsGHDtHbtWs2dO1eSdN999yk4OFiHDx/Ota6bb75ZkZGRyszMdGKfffaZLrnkEpUrV04hISHq3Lmz1q1b5/a8vn37Kjg4WH/++aeuuuoqhYSEOF8sfP/99+revbuio6MVGBioqKgoDRo0SEeOHDllbgCAkoWZUgCAIqFXr15644039P777+u+++5z4gkJCfriiy908803q0yZMlq3bp0+/PBDde/eXbVr19bevXv18ssvq02bNlq/fr2qVavmkfbcddddmjVrlvr166f7779fW7du1eTJk7V69WotXbpUpUuX1r59+9ShQwdVqlRJw4cPV/ny5RUfH6/58+efcv2jRo3S6NGj1a5dO91zzz3asGGDpk2bpl9++cVZ/6RJk/Tmm29qwYIFziV5559//mnndMstt2jGjBn68ssv1b59+1yPn3vuuXrrrbc0aNAg1ahRQw8++KAkqUmTJs69pdq3b+82m+3w4cNq06aN/vrrL911112Kjo7WsmXLNGLECO3evTvXzdBnzpyptLQ03XnnnQoMDFR4eLjWrVunVq1aqXr16ho+fLjKlSun999/X9ddd53mzZun66+/3m0dAwYMUIUKFTRy5EjFx8dr0qRJuu+++/Tee+9JkiZNmqQBAwYoODjYmeFUpUqVfPslu8CSfe+sU/nqq6+0ZcsW9evXT5GRkVq3bp1mzJihdevW6ccff5TL5VLXrl21ceNGvfvuu5o4caIzc61SpUqSpKefflqPPfaYevToodtvv11///23XnrpJV166aVavXq1MyNt2rRpuu+++3TJJZdo0KBBio+P13XXXacKFSqoRo0aTpuOHDmitm3bavPmzbrvvvtUu3ZtffDBB+rbt68OHjyoBx544JTjcPJlnJIKPDajRo3S2LFjdfvtt6t58+ZKTk7WihUrtGrVqjy3tZOlpKTkWTTKvql+v379NH/+fA0ePFjt27dXVFSU1q5dq9GjR+u2227TVVddJen4TfovueQS/fe//9Wtt96qpk2bav/+/froo4+0c+dOtxmEp8PMdO211+qHH37Q3XffrXPPPVcLFixQnz59CryOH374QfPnz9e9996rkJAQvfjii+rWrZu2b9/uXJ68evVqderUSVWrVtXo0aOVmZmpJ554wtl+/smmTZu0YcMG9e3bV6GhoXku07t3b40cOVIff/yxbrrpJt14442aMmWKPvnkE3Xv3t1Z7vDhw1q0aJH69u3rFLfeeust9enTRx07dtT48eN1+PBhTZs2Ta1bt9bq1avdinjHjh1Tx44d1bp1az377LMqW7asJOmDDz7Q4cOHdc899ygiIkI///yzXnrpJe3cuVMffPBBgfsSAFACGAAARcCxY8esatWqFhcX5xafPn26SbIvvvjCzMzS0tIsMzPTbZmtW7daYGCgPfHEE24xSTZz5kwn1qZNG2vTpk2u1+7Tp4/VrFnT+fv77783SfbOO++4Lff555+7xRcsWGCS7JdffilUrvv27bOAgADr0KGDWy6TJ082Sfb66687sZEjR5ok+/vvv0+53lMtm5iYaJLs+uuvd2In525mVrNmTevcuXOu50uy/v37u8WefPJJK1eunG3cuNEtPnz4cPP397ft27eb2YnxCA0NtX379rkte8UVV1ijRo0sLS3NiWVlZVnLli2tXr16TmzmzJkmydq1a2dZWVlOfNCgQebv728HDx50Yg0bNsxzrPNy/fXXmyRLTEws0PKHDx/OFXv33XdNkn333XdO7JlnnjFJtnXrVrdl4+Pjzd/f355++mm3+Nq1a61UqVJOPD093SIiIuyiiy6yo0ePOsvNmjXLJLnlN2nSJJNkb7/9thPLyMiwuLg4Cw4OtuTkZDP753HIa58p6Ng0btw4z23mVL755huTlO+/3bt3O8vu3r3bwsPDrX379paenm5NmjSx6OhoS0pKcpZ5/PHHTZLNnz8/12tlbzNncmz48MMPTZJNmDDBiR07dswuueSSXOvM3h9zkmQBAQG2efNmJ7ZmzRqTZC+99JITu/rqq61s2bL2119/ObFNmzZZqVKlcq3zZNltnDhx4j8uFxoaak2bNjWz431TvXp169atm9sy77//vtt2nZKSYuXLl7c77rjDbbk9e/ZYWFiYW7xPnz4myYYPH57rtfPah8aOHWsul8u2bdvmxPLqw5o1a1qfPn3+MTcAQPHB5XsAgCLB399fN910k5YvX+52idPs2bNVpUoVXXHFFZKOX0rl53f87SszM1MHDhxQcHCw6tevr1WrVnmkLR988IHCwsLUvn177d+/3/nXrFkzBQcH65tvvpF04j5PH3/8sY4ePVrg9X/99dfKyMjQwIEDnVwk6Y477lBoaKg++eQTj+RxsuDgYEn6x185LKwPPvhAl1xyiSpUqODWV+3atVNmZqa+++47t+W7devmNtsjISFB//nPf9SjRw9ntsz+/ft14MABdezYUZs2bdJff/3lto4777zT7RKmSy65RJmZmdq2bdtp5ZCcnCxJCgkJKdDyZcqUcf6flpam/fv3q0WLFpJUoG1w/vz5ysrKUo8ePdz6LDIyUvXq1XO2rxUrVujAgQO644473O7706tXL1WoUMFtnZ9++qkiIyN18803O7HSpUvr/vvvV2pqqpYsWeK2/MnjkJfCjE358uW1bt06bdq06ZT55+Xxxx/XV199letfeHi4s0xkZKSmTJmir776Spdccol+/fVXvf76626zgebNm6fGjRvnml0nqUCXvZ3Kp59+qlKlSumee+5xYv7+/howYECB19GuXTvFxMQ4f59//vkKDQ3Vli1bJB0/rn399de67rrr3GZ+1q1bV1deeeUp15+9f59qew4JCXG2fZfLpe7du+vTTz9Vamqqs8x7772n6tWrq3Xr1pKOzxI8ePCgbr75Zrdt19/fXxdffLGz7eaUs6+y5dyHDh06pP3796tly5YyM61evfqUOQIASg4u3wMAFBm9evXSxIkTNXv2bD388MPauXOnvv/+e91///3OpSPZv0Q3depUbd261e0+J576Zb5NmzYpKSlJlStXzvPx7PtetWnTRt26ddPo0aM1ceJEtW3bVtddd5169uypwMDAfNefXTw5+ZevAgICVKdOndMurpxK9ofNghZfCmLTpk367bff8i1wnHyPsNq1a7v9vXnzZpmZHnvsMT322GP5rqN69erO39HR0W6PZxdoTr7fV0FlFzVSUlIKdCP3hIQEjR49WnPmzMmVX1JS0imfv2nTJpmZ6tWrl+fj2Tefz94OTv7VxVKlSuW6z9G2bdtUr149tyKndPySzJzrynbyOOSlMGPzxBNP6Nprr9U555yj8847T506ddItt9xS4MtNGzVqpHbt2p1yuZtuuklvv/22PvnkE915551OsTrbn3/+med9wTxl27Ztqlq1qlPgzVaYX7E7efuVjm/D2dvvvn37dOTIkTx/bbMgv8CZvX+fqvickpLidoy78cYbNWnSJH300Ufq2bOnUlNT9emnn+quu+5yCnrZRcfLL788z3WefLlgqVKl3C4zzbZ9+3Y9/vjj+uijj3LttwXZhwAAJQdFKQBAkdGsWTPFxsbq3Xff1cMPP6x3331XZub2q3tjxozRY489pltvvVVPPvmkwsPD5efnp4EDB+Z5T5ycsm/WfbKchS3peOGrcuXKeuedd/JcT3YBxuVyae7cufrxxx+1aNEiffHFF7r11lv13HPP6ccff8z1wdXXfv/9d0kF+2BbUFlZWWrfvr0eeuihPB8/55xz3P7OOUMi+/nS8Z+v79ixY57rOLm9J9+4OVteY1sQsbGxkqS1a9fqkksuOeXyPXr00LJlyzR06FBdcMEFCg4OVlZWljp16nTKbVA6nrPL5dJnn32WZy7e2G5OHoe8FGZsLr30Uv35559auHChvvzyS7366quaOHGipk+frttvv91j7T5w4IBWrFghSVq/fr2ysrJyFeJOR0GPDZ7g6e33ZNmFyN9++y3fZbZt26bk5GS3X9ls0aKFatWqpffff189e/bUokWLdOTIEd14443OMtnbxFtvvaXIyMhc6z35l/xyzmzNlpmZqfbt2yshIUHDhg1TbGysypUrp7/++kt9+/Yt0D4EACg5KEoBAIqUXr166bHHHtNvv/2m2bNnq169erroooucx+fOnavLLrtMr732mtvzDh48eMqbGFeoUMG5RCank2eRxMTE6Ouvv1arVq0K9OG9RYsWatGihZ5++mnNnj1bvXr10pw5c/L9MF6zZk1J0oYNG1SnTh0nnpGRoa1btxZoxsjpeOuttyQp3wLD6YiJiVFqaupptzk7/9KlS3s078JcqnX11Vdr7Nixevvtt09ZlEpMTNTixYs1evRoPf744048r8vW8mtDTEyMzEy1a9fOVbTLKXs72bx5sy677DInfuzYMcXHx7vNQqpZs6Z+++23XEWaP/74w21dhVHYsQkPD1e/fv3Ur18/paam6tJLL9WoUaM8WpTq37+/UlJSNHbsWI0YMUKTJk3S4MGDncdjYmKc4mthFPTYULNmTS1evFipqaluxcMNGzYU+jXzU7lyZQUFBWnz5s25HssrdrJzzjlH55xzjj788EO98MILec6MfPPNNyVJXbp0cYv36NFDL7zwgpKTk/Xee++pVq1azqWpkpzLDitXrnza++vatWu1ceNGvfHGG24/muCpX2oEABQv3FMKAFCkZM+Kevzxx/Xrr7+6zZKSjs8yOHlGwQcffJDrvkN5iYmJ0R9//KG///7bia1Zs0ZLly51W65Hjx7KzMzUk08+mWsdx44dc37+PTExMVdbLrjgAklSenp6vu1o166dAgIC9OKLL7o9/7XXXlNSUpI6d+58ylwKa/bs2Xr11VcVFxeX65KnM9GjRw8tX75cX3zxRa7HDh48qGPHjv3j8ytXrqy2bdvq5Zdf1u7du3M9nnOsCqNcuXLOOJ1KXFycOnXqpFdffVUffvhhrsczMjI0ZMgQSSdmuZw87if/ymB2GyTlakfXrl3l7++v0aNH51qPmenAgQOSpAsvvFARERF65ZVX3PrxnXfeyXXJ01VXXaU9e/Y4v0AoHd9WX3rpJQUHB6tNmzb/0AN5K8zYZLc5W3BwsOrWrfuP+0FhzZ07V++9957GjRun4cOH66abbtKjjz6qjRs3Ost069ZNa9as0YIFC3I9/59mIhX02HDVVVfp2LFjmjZtmhPLzMzUSy+9dCapufH391e7du304YcfateuXU588+bN+uyzzwq0jscff1yJiYm6++67c832WrlypcaPH6/zzjsv16WON954o9LT0/XGG2/o888/V48ePdwe79ixo0JDQzVmzJg876NXkP01r33IzPTCCy8UKDcAQMnCTCkAQJFSu3ZttWzZUgsXLpSkXEWpLl266IknnlC/fv3UsmVLrV27Vu+8847bjKP83HrrrXr++efVsWNH3Xbbbdq3b5+mT5+uhg0bOjf8lY7fK+quu+7S2LFj9euvv6pDhw4qXbq0Nm3apA8++EAvvPCCbrjhBr3xxhuaOnWqrr/+esXExCglJUWvvPKKQkNDnZ+oz0ulSpU0YsQIjR49Wp06ddI111yjDRs2aOrUqbrooov0f//3f6fZe8fNnTtXwcHBysjI0F9//aUvvvhCS5cuVePGjT3+c+tDhw7VRx99pC5duqhv375q1qyZDh06pLVr12ru3LmKj48/5Qy2KVOmqHXr1mrUqJHuuOMO1alTR3v37tXy5cu1c+dOrVmzptDtatasmaZNm6annnpKdevWVeXKlfO9D450fOZIhw4d1LVrV1199dW64oorVK5cOW3atElz5szR7t279eyzzyo0NFSXXnqpJkyYoKNHj6p69er68ssvtXXr1jzbIEmPPPKIbrrpJpUuXVpXX321YmJi9NRTT2nEiBGKj4/Xddddp5CQEG3dulULFizQnXfeqSFDhiggIECjRo3SgAEDdPnll6tHjx6Kj4/XrFmzFBMT4zYT684779TLL7+svn37auXKlapVq5bmzp2rpUuXatKkSad9H7GCjk2DBg3Utm1bNWvWTOHh4VqxYoXmzp2r++67r0Cv8/333ystLS1X/Pzzz9f555+vffv26Z577tFll13mrHPy5Mn65ptv1LdvX/3www/y8/PT0KFDNXfuXHXv3l233nqrmjVrpoSEBH300UeaPn26GjdunOfrF/TYcPXVV6tVq1YaPny44uPj1aBBA82fP9/j90EaNWqUvvzyS7Vq1Ur33HOPMjMzNXnyZJ133nn69ddfT/n8Xr166ZdfftELL7yg9evXOzfHX7VqlV5//XVFRERo7ty5zv3LsjVt2lR169bVI488ovT0dLdL96Tj94yaNm2abrnlFjVt2lQ33XSTKlWqpO3bt+uTTz5Rq1atNHny5H9sW2xsrGJiYjRkyBD99ddfCg0N1bx58077nnAAgGLOy7/2BwDAKU2ZMsUkWfPmzXM9lpaWZg8++KBVrVrVypQpY61atbLly5fn+kn3vH723czs7bfftjp16lhAQIBdcMEF9sUXX+T62fdsM2bMsGbNmlmZMmUsJCTEGjVqZA899JDt2rXLzMxWrVplN998s0VHR1tgYKBVrlzZunTpYitWrChQnpMnT7bY2FgrXbq0ValSxe655x5LTEx0Wyb7J9H//vvvU64ve9nsf0FBQVajRg3r0qWLvf7665aWlpbrOXnlXrNmTevcuXOuZSVZ//79c8VTUlJsxIgRVrduXQsICLCKFStay5Yt7dlnn7WMjAwzOzEezzzzTJ5t//PPP613794WGRlppUuXturVq1uXLl1s7ty5zjIzZ840SfbLL7+4Pfebb74xSfbNN984sT179ljnzp0tJCTEJLltG/k5fPiwPfvss3bRRRdZcHCwBQQEWL169WzAgAG2efNmZ7mdO3fa9ddfb+XLl7ewsDDr3r277dq1yyTZyJEj3db55JNPWvXq1c3Pz88k2datW53H5s2bZ61bt7Zy5cpZuXLlLDY21vr3728bNmxwW8eLL75oNWvWtMDAQGvevLktXbrUmjVrZp06dXJbbu/evdavXz+rWLGiBQQEWKNGjXJt//80DvntMwUZm6eeesqaN29u5cuXtzJlylhsbKw9/fTTzvjnJ3vs8vuX3Z9du3a1kJAQi4+Pd3v+woULTZKNHz/eiR04cMDuu+8+q169ugUEBFiNGjWsT58+tn///n/Ms6DHhgMHDtgtt9xioaGhFhYWZrfccoutXr061zqz98ec8tuHatasaX369HGLLV682Jo0aWIBAQEWExNjr776qj344IMWFBT0j32a04cffmjt27e3ChUqWGBgoNWtW9cefPDBfzyePPLIIybJ6tatm+8y33zzjXXs2NHCwsIsKCjIYmJirG/fvm7Hvj59+li5cuXyfP769eutXbt2FhwcbBUrVrQ77rjD1qxZU6A+zKuvAADFl8vMQ3dVBAAAwFmXlZWlSpUqqWvXrnrllVd83Rx40XXXXad169bleQ8zAACKI+4pBQAAUESlpaXluhfSm2++qYSEBLVt29Y3jYJXHDlyxO3vTZs26dNPP2XcAQAlCjOlAAAAiqhvv/1WgwYNUvfu3RUREaFVq1bptdde07nnnquVK1cqICDA103EWVK1alX17dtXderU0bZt2zRt2jSlp6dr9erVqlevnq+bBwCAR3CjcwAAgCKqVq1aioqK0osvvqiEhASFh4erd+/eGjduHAWpEq5Tp0569913tWfPHgUGBiouLk5jxoyhIAUAKFGYKQUAAAAAAACv455SAAAAAAAA8DqKUgAAAAAAAPC6En9PqaysLO3atUshISFyuVy+bg4AAAAAAECJZmZKSUlRtWrV5OeX/3yoEl+U2rVrl6KionzdDAAAAAAAgH+VHTt2qEaNGvk+XuKLUiEhIZKOd0RoaKiPWwMAAAAAAFCyJScnKyoqyqnJ5KfEF6WyL9kLDQ2lKAUAAAAAAOAlp7qNEjc6BwAAAAAAgNdRlAIAAAAAAIDXUZQCAAAAAACA11GUAgAAAAAAgNdRlAIAAAAAAIDXUZQCAAAAAACA11GUAgAAAAAAgNdRlAIAAAAAAIDXUZQCAAAAAACA11GUAgAAAAAAgNdRlAIAAAAAAIDXUZQCAAAAAACA11GUAgAAAAAAgNdRlAIAAAAAAIDXUZQCAAAAAACA11GUAgAAAAAAgNdRlAIAAAAAAIDXlfJ1AwDgn4xbvd/XTShWhjep6OsmAAAAAECBMFMKAAAAAAAAXkdRCgAAAAAAAF5HUQoAAAAAAABeR1EKAAAAAAAAXkdRCgAAAAAAAF7n06JUrVq15HK5cv3r37+/JCktLU39+/dXRESEgoOD1a1bN+3du9eXTQYAAAAAAIAH+LQo9csvv2j37t3Ov6+++kqS1L17d0nSoEGDtGjRIn3wwQdasmSJdu3apa5du/qyyQAAAAAAAPCAUr588UqVKrn9PW7cOMXExKhNmzZKSkrSa6+9ptmzZ+vyyy+XJM2cOVPnnnuufvzxR7Vo0cIXTQYAAAAAAIAH+LQolVNGRobefvttDR48WC6XSytXrtTRo0fVrl07Z5nY2FhFR0dr+fLl+Ral0tPTlZ6e7vydnJwsScrMzFRmZqYkyeVyyc/PT1lZWTIzZ9nsePZyp4r7+fnJ5XLlGZekrKysAsX9/f1lZnnGT25jfnFyIqeSmpP+t06X3NticuUdd/lJZmcUN0ly+UmW9b9XyfGaLpdc5t5fpxU/SzllZmay7ZETOZETOZETOZETOZETOZGTT3PK9bkuH0WmKPXhhx/q4MGD6tu3ryRpz549CggIUPny5d2Wq1Klivbs2ZPvesaOHavRo0fniq9bt07BwcGSpPDwcEVHR2vnzp1KSEhwlomMjFRkZKTi4+OVkpLixKOiohQREaFNmzYpLS3NidepU0ehoaFav369W4fXr19fAQEBWrt2rVsbGjVqpIyMDG3YsMGJ+fv7q1GjRkpJSdGWLVuceFBQkGJjY5WYmKgdO3Y48ZCQEMXExGjfvn1u/UBO5FRScyqVWUmZ/qVVJfFEGyVpb4U68s88qorJJ9po8tPe8DoKOHpE4am7nPgxvwDtLx+tMukpCju8z4mnlyqrxNBqCj6SqOC0E20/EhCqpODKCju0X2Uykp14alC4UsuGq3zKHgUeO+zEk8pW1pGgUEUk7VSprAwnnhBcTRkBZVU5MV4unTjI7w+NOms5xcensO2REzmREzmREzmREzmREzmRk09zSk1NVUG47OSyl4907NhRAQEBWrRokSRp9uzZ6tevn9usJ0lq3ry5LrvsMo0fPz7P9eQ1UyoqKkoJCQkKDQ2V9O+pTJITOZWEnJ5Zc/ygxkypgrV9SOMItj1yIidyIidyIidyIidyIidy8mlOycnJCg8PV1JSklOLyUuRmCm1bds2ff3115o/f74Ti4yMVEZGhg4ePOg2W2rv3r2KjIzMd12BgYEKDAzMFff395e/v79bLHtQ8lrW23GXy5VnPL82FjZOTuSUX7zI5+Q6Xqgxt/LQCXnGXS4Pxf1OKgP97zVdeeda6PhZyCm7/9j2yMlTcXIiJ0+1sbBxciInT7WxsHFyIidPtbGwcXIiJ0+1sbDxs5FTfq+f6zkFWuosmzlzpipXrqzOnTs7sWbNmql06dJavHixE9uwYYO2b9+uuLg4XzQTAAAAAAAAHuLzmVJZWVmaOXOm+vTpo1KlTjQnLCxMt912mwYPHqzw8HCFhoZqwIABiouL45f3AAAAAAAAijmfF6W+/vprbd++XbfeemuuxyZOnCg/Pz9169ZN6enp6tixo6ZOneqDVgIAAAAAAMCTisyNzs+W5ORkhYWFnfLmWgCKpnGr9/u6CcXK8CYVfd0EAAAAAP9yBa3FFIl7SgEAAAAAAODfhaIUAAAAAAAAvI6iFAAAAAAAALyOohQAAAAAAAC8jqIUAAAAAAAAvI6iFAAAAAAAALyOohQAAAAAAAC8jqIUAAAAAAAAvI6iFAAAAAAAALyOohQAAAAAAAC8jqIUAAAAAAAAvI6iFAAAAAAAALyOohQAAAAAAAC8jqIUAAAAAAAAvI6iFAAAAAAAALyOohQAAAAAAAC8jqIUAAAAAAAAvI6iFAAAAAAAALyOohQAAAAAAAC8jqIUAAAAAAAAvI6iFAAAAAAAALyOohQAAAAAAAC8jqIUAAAAAAAAvI6iFAAAAAAAALyOohQAAAAAAAC8jqIUAAAAAAAAvI6iFAAAAAAAALyOohQAAAAAAAC8jqIUAAAAAAAAvI6iFAAAAAAAALyOohQAAAAAAAC8jqIUAAAAAAAAvI6iFAAAAAAAALyOohQAAAAAAAC8jqIUAAAAAAAAvI6iFAAAAAAAALyOohQAAAAAAAC8jqIUAAAAAAAAvI6iFAAAAAAAALyOohQAAAAAAAC8jqIUAAAAAAAAvK6UrxsAAAAAAMDZNm71fl83odgY3qSir5uAfwlmSgEAAAAAAMDrKEoBAAAAAADA63xelPrrr7/0f//3f4qIiFCZMmXUqFEjrVixwnnczPT444+ratWqKlOmjNq1a6dNmzb5sMUAAAAAAAA4Uz4tSiUmJqpVq1YqXbq0PvvsM61fv17PPfecKlSo4CwzYcIEvfjii5o+fbp++uknlStXTh07dlRaWpoPWw4AAAAAAIAz4dMbnY8fP15RUVGaOXOmE6tdu7bzfzPTpEmT9Oijj+raa6+VJL355puqUqWKPvzwQ910001ebzMAAAAAAADOnE+LUh999JE6duyo7t27a8mSJapevbruvfde3XHHHZKkrVu3as+ePWrXrp3znLCwMF188cVavnx5nkWp9PR0paenO38nJydLkjIzM5WZmSlJcrlc8vPzU1ZWlszMWTY7nr3cqeJ+fn5yuVx5xiUpKyurQHF/f3+ZWZ7xk9uYX5ycyKmk5qT/rdMl97aYXHnHXX6S2RnFTZJcfpJl/e9VcrymyyWXuffXacXPUk6ZmZlse+RETuRETuRETuRETnnEc56TFefzvfziHs1JYtsjpzPKKdfnunz4tCi1ZcsWTZs2TYMHD9bDDz+sX375Rffff78CAgLUp08f7dmzR5JUpUoVt+dVqVLFeexkY8eO1ejRo3PF161bp+DgYElSeHi4oqOjtXPnTiUkJDjLREZGKjIyUvHx8UpJSXHiUVFRioiI0KZNm9wuG6xTp45CQ0O1fv16tw6vX7++AgICtHbtWrc2NGrUSBkZGdqwYYMT8/f3V6NGjZSSkqItW7Y48aCgIMXGxioxMVE7duxw4iEhIYqJidG+ffvc+oCcyKmk5lQqs5Iy/UurSuKJNkrS3gp15J95VBWTT7TR5Ke94XUUcPSIwlN3OfFjfgHaXz5aZdJTFHZ4nxNPL1VWiaHVFHwkUcFpJ9p+JCBUScGVFXZov8pkJDvx1KBwpZYNV/mUPQo8dtiJJ5WtrCNBoYpI2qlSWRlOPCG4mjICyqpyYrxcOnGQ3x8addZyio9PYdsjJ3IiJ3IiJ3IiJ3LKI6cqifFOvDif73njHFYS2x45nVFOqampKgiXnVz28qKAgABdeOGFWrZsmRO7//779csvv2j58uVatmyZWrVqpV27dqlq1arOMj169JDL5dJ7772Xa515zZSKiopSQkKCQkNDJf17KpPkRE4lIadn1hw/qBXbb5m8/M3ZkMYRbHvkRE7kRE7kRE7kRE55xJ/5db8TK87ne/nFPZnT8KaV2PbI6YxySk5OVnh4uJKSkpxaTF58OlOqatWqatCggVvs3HPP1bx58yQdrxRK0t69e92KUnv37tUFF1yQ5zoDAwMVGBiYK+7v7y9/f3+3WPag5LWst+MulyvPeH5tLGycnMgpv3iRz8l1/C3V3N5aT8gz7nJ5KO530tv//17TlXeuhY6fhZyy+49tj5w8FScncvJUGwsbJydy8lQbCxsnp5KbU57nZMXwfO/Ucc/kxLZHTmeSU36vn+s5BVrqLGnVqpXb1DFJ2rhxo2rWrCnp+E3PIyMjtXjxYufx5ORk/fTTT4qLi/NqWwEAAAAAAOA5Pp0pNWjQILVs2VJjxoxRjx499PPPP2vGjBmaMWOGpOPVuoEDB+qpp55SvXr1VLt2bT322GOqVq2arrvuOl82HQAAAAAAAGfAp0Wpiy66SAsWLNCIESP0xBNPqHbt2po0aZJ69erlLPPQQw/p0KFDuvPOO3Xw4EG1bt1an3/+uYKCgnzYcgAAAAAAAJwJn97o3BuSk5MVFhZ2yptrASiaxq3ef+qF4BjepKKvmwAAAFAkcV5ZcJxT4kwVtBbj03tKAQAAAAAA4N+JohQAAAAAAAC8jqIUAAAAAAAAvI6iFAAAAAAAALyOohQAAAAAAAC8jqIUAAAAAAAAvI6iFAAAAAAAALyOohQAAAAAAAC8jqIUAAAAAAAAvI6iFAAAAAAAALyOohQAAAAAAAC8jqIUAAAAAAAAvI6iFAAAAAAAALyOohQAAAAAAAC8jqIUAAAAAAAAvI6iFAAAAAAAALyOohQAAAAAAAC8rpSvGwAAAAAAAEqmcav3+7oJxcrwJhV93QSvYqYUAAAAAAAAvI6iFAAAAAAAALyOy/cAAChCmOJeOP+2Ke4AAAAlCTOlAAAAAAAA4HUUpQAAAAAAAOB1FKUAAAAAAADgdRSlAAAAAAAA4HUUpQAAAAAAAOB1FKUAAAAAAADgdRSlAAAAAAAA4HUUpQAAAAAAAOB1FKUAAAAAAADgdRSlAAAAAAAA4HUUpQAAAAAAAOB1FKUAAAAAAADgdRSlAAAAAAAA4HUUpQAAAAAAAOB1FKUAAAAAAADgdRSlAAAAAAAA4HUUpQAAAAAAAOB1FKUAAAAAAADgdRSlAAAAAAAA4HUUpQAAAAAAAOB1FKUAAAAAAADgdRSlAAAAAAAA4HUUpQAAAAAAAOB1Pi1KjRo1Si6Xy+1fbGys83haWpr69++viIgIBQcHq1u3btq7d68PWwwAAAAAAABP8PlMqYYNG2r37t3Ovx9++MF5bNCgQVq0aJE++OADLVmyRLt27VLXrl192FoAAAAAAAB4QimfN6BUKUVGRuaKJyUl6bXXXtPs2bN1+eWXS5Jmzpypc889Vz/++KNatGjh7aYCAAAAAADAQ3xelNq0aZOqVaumoKAgxcXFaezYsYqOjtbKlSt19OhRtWvXzlk2NjZW0dHRWr58eb5FqfT0dKWnpzt/JycnS5IyMzOVmZkpSXK5XPLz81NWVpbMzFk2O5693Knifn5+crlcecYlKSsrq0Bxf39/mVme8ZPbmF+cnMippOak/63TJfe2mFx5x11+ktkZxU2SXH6SZf3vVXK8pssll7n312nFz1JOmZmZbHvFPCdJxXLbyy9+1ven//Uf2x45kRM5kRM5nSqnnO8hxfl8L7+4R3OSPDZOudZflM4jiuA4FZf96VTHiFyf6/Lh06LUxRdfrFmzZql+/fravXu3Ro8erUsuuUS///679uzZo4CAAJUvX97tOVWqVNGePXvyXefYsWM1evToXPF169YpODhYkhQeHq7o6Gjt3LlTCQkJzjKRkZGKjIxUfHy8UlJSnHhUVJQiIiK0adMmpaWlOfE6deooNDRU69evd+vw+vXrKyAgQGvXrnVrQ6NGjZSRkaENGzY4MX9/fzVq1EgpKSnasmWLEw8KClJsbKwSExO1Y8cOJx4SEqKYmBjt27fPrR/IiZxKak6lMisp07+0qiSeaKMk7a1QR/6ZR1Ux+UQbTX7aG15HAUePKDx1lxM/5heg/eWjVSY9RWGH9znx9FJllRhaTcFHEhWcdqLtRwJClRRcWWGH9qtMRrITTw0KV2rZcJVP2aPAY4edeFLZyjoSFKqIpJ0qlZXhxBOCqykjoKwqJ8bLpRMH+f2hUWctp/j4FLa9Yp6TVLZYbnu+2p/S0kLY9siJnMiJnMipQDlVSYx34sX5fM8b77mSPDZOOfugqJ1HFMVxKi7706mOEampqSoIl51c9vKhgwcPqmbNmnr++edVpkwZ9evXz23WkyQ1b95cl112mcaPH5/nOvKaKRUVFaWEhASFhoZKKjmV/pL47QU5kdPJ8WfWHD+oFaVvL4ryNzJDGkew7RXznCasSSiW215+8bO9Pz3UpJIktj1yIidyIidyOnVOz/y634kV5/O9/OKezGl400oeG6cJq/8uEjkVl3Eaen4Ft3UU1f3pVMeI5ORkhYeHKykpyanF5MXnl+/lVL58eZ1zzjnavHmz2rdvr4yMDB08eNBtttTevXvzvAdVtsDAQAUGBuaK+/v7y9/f3y2WPSh5LevtuMvlyjOeXxsLGycncsovXuRzch0/VJvbIfuEPOMul4fifie9rfzvNV1551ro+FnIKbv/2PaKd07Fcds7dfzs5JR9ySPbHjmREzl5Kk5OJTenPN9beM/NN+6pccq734vGeYQTL0LjVFz2p1PF83v9XM8p0FJekpqaqj///FNVq1ZVs2bNVLp0aS1evNh5fMOGDdq+fbvi4uJ82EoAAAAAAACcKZ/OlBoyZIiuvvpq1axZU7t27dLIkSPl7++vm2++WWFhYbrttts0ePBghYeHKzQ0VAMGDFBcXBy/vAcAAAAAAFDM+bQotXPnTt188806cOCAKlWqpNatW+vHH39UpUrH7w8xceJE+fn5qVu3bkpPT1fHjh01depUXzYZAAAAAAAAHuDTotScOXP+8fGgoCBNmTJFU6ZM8VKLAAAAAAAA4A1F6p5SAAAAAAAA+HegKAUAAAAAAACvoygFAAAAAAAAr6MoBQAAAAAAAK+jKAUAAAAAAACvoygFAAAAAAAAr6MoBQAAAAAAAK+jKAUAAAAAAACvoygFAAAAAAAAr6MoBQAAAAAAAK+jKAUAAAAAAACvoygFAAAAAAAAr6MoBQAAAAAAAK+jKAUAAAAAAACvoygFAAAAAAAAr6MoBQAAAAAAAK+jKAUAAAAAAACvoygFAAAAAAAAr6MoBQAAAAAAAK+jKAUAAAAAAACvoygFAAAAAAAAr6MoBQAAAAAAAK+jKAUAAAAAAACvoygFAAAAAAAAr6MoBQAAAAAAAK+jKAUAAAAAAACvoygFAAAAAAAAr6MoBQAAAAAAAK+jKAUAAAAAAACvoygFAAAAAAAAr6MoBQAAAAAAAK8r5esGoHDGrd7v6yYUK8ObVPR1EwAAAAAAQB6YKQUAAAAAAACvoygFAAAAAAAAr6MoBQAAAAAAAK+jKAUAAAAAAACvoygFAAAAAAAAr6MoBQAAAAAAAK+jKAUAAAAAAACvoygFAAAAAAAAryt0UWrHjh3auXOn8/fPP/+sgQMHasaMGR5tGAAAAAAAAEquQhelevbsqW+++UaStGfPHrVv314///yzHnnkET3xxBMebyAAAAAAAABKnkIXpX7//Xc1b95ckvT+++/rvPPO07Jly/TOO+9o1qxZnm4fAAAAAAAASqBCF6WOHj2qwMBASdLXX3+ta665RpIUGxur3bt3n3ZDxo0bJ5fLpYEDBzqxtLQ09e/fXxEREQoODla3bt20d+/e034NAAAAAAAAFA2FLko1bNhQ06dP1/fff6+vvvpKnTp1kiTt2rVLERERp9WIX375RS+//LLOP/98t/igQYO0aNEiffDBB1qyZIl27dqlrl27ntZrAAAAAAAAoOgodFFq/Pjxevnll9W2bVvdfPPNaty4sSTpo48+ci7rK4zU1FT16tVLr7zyiipUqODEk5KS9Nprr+n555/X5ZdfrmbNmmnmzJlatmyZfvzxx0K/DgAAAAAAAIqOUoV9Qtu2bbV//34lJye7FZHuvPNOlS1bttAN6N+/vzp37qx27drpqaeecuIrV67U0aNH1a5dOycWGxur6OhoLV++XC1atCj0awEAAAAAAKBoKHRRSpLMTCtXrtSff/6pnj17KiQkRAEBAYUuSs2ZM0erVq3SL7/8kuuxPXv2KCAgQOXLl3eLV6lSRXv27Ml3nenp6UpPT3f+Tk5OliRlZmYqMzNTkuRyueTn56esrCyZmbNsdjx7uVPF/fz85HK58oxLUlZWVoHi/v7+MrM847naaFkyl59kJpdOxE2SXH6SZcmVYx0ml+RyyWXu6z6tuOT2mv8Yz6ONhY17IqfMzEyfjFN+8eK87fkqJ/1vncVt2ztl/CzllJmZybZXzHOSVCy3vfziZ31/+l//se2REzmREzmR06lyyvkeUpzP9/KLezQnyWPjlGv9Rek8ogiOU3HZn051jMj1uS4fhS5Kbdu2TZ06ddL27duVnp6u9u3bKyQkROPHj1d6erqmT59eoPXs2LFDDzzwgL766isFBQUVthn5Gjt2rEaPHp0rvm7dOgUHB0uSwsPDFR0drZ07dyohIcFZJjIyUpGRkYqPj1dKSooTj4qKUkREhDZt2qS0tDQnXqdOHYWGhmr9+vVuHV6/fn0FBARo7dq1bm1o1KiRMjIytGHDBifm7++vRo0aKSUlRVu2bHHiQUFBio2NVWJionbs2OHEy6f4KTG0moKPJCo47UTbjwSEKim4ssIO7VeZjGQnnhoUrtSy4SqfskeBxw478aSylXUkKFQRSTtVKivDiScEV1NGQFlVToyXSyc2yv2hUcr0L60qiSfaKEl7K9SRf+ZRVUw+0UaTn/aG11HA0SMKT93lxI/5BWh/+WiVSU9R2OF9Tjy9VNmzltPatbt8Mk4hISGKiYnRvn373IqoxXnb81VOpTIrFcttT/LN/hQfn8K2V8xzksoWy23PV/tTWloI2x45kRM5kRM5FSinKonxTrw4n+954z1XksfGKWcfFLXziKI4TsVlfzrVMSI1NVUF4bKTy16ncN111ykkJESvvfaaIiIitGbNGtWpU0fffvut7rjjDm3atKlA6/nwww91/fXXy9/f34llZmY6FbYvvvhC7dq1U2JiottsqZo1a2rgwIEaNGhQnuvNa6ZUVFSUEhISFBoaejzpYlzpf3bNASrIhWj7kMYRRb6CnDNelLc9X+X0zJrjB7Xitu2dMn6WchrSOIJtr5jnNGFNQrHc9vKLn+396aEmlSSx7ZETOZETOZHTqXN65tf9Tqw4n+/lF/dkTsObVvLYOE1Y/XeRyKm4jNPQ8yu4raOo7k+nOkYkJycrPDxcSUlJTi0mL4WeKfX9999r2bJlCggIcIvXqlVLf/31V4HXc8UVV+Sq3PXr10+xsbEaNmyYoqKiVLp0aS1evFjdunWTJG3YsEHbt29XXFxcvusNDAxUYGBgrri/v79bAUw6MSh5LevtuMvlyjN+chvN5Zf9BGdHcV+R30m7wUnPO9N4Xq+ZXzzfNhY2fvo55exTb47T6caL8rZ3uvEzbqPr+DZR3La9AsXPQk7Z/ce2V7xzKo7b3qnjZyen7Ese2fbIiZzIyVNxciq5OeX53sJ7br5xT41T3v1eNM4jnHgRGqfisj+dKp7f65+s0EWprKysPK8N3Llzp0JCQgq8npCQEJ133nlusXLlyikiIsKJ33bbbRo8eLDCw8MVGhqqAQMGKC4ujpucAwAAAAAAFHP5fEWbvw4dOmjSpEnO3y6XS6mpqRo5cqSuuuoqT7ZNEydOVJcuXdStWzddeumlioyM1Pz58z36GgAAAAAAAPC+Qs+Ueu6559SxY0c1aNBAaWlp6tmzpzZt2qSKFSvq3XffPaPGfPvtt25/BwUFacqUKZoyZcoZrRcAAAAAAABFS6GLUjVq1NCaNWs0Z84c/fbbb0pNTdVtt92mXr16qUyZMmejjQAAAAAAAChhCl2UkqRSpUrp//7v/zzdFgAAAAAAAPxLFLoo9eabb/7j47179z7txgAAAAAAAODfodBFqQceeMDt76NHj+rw4cMKCAhQ2bJlKUoBAAAAAADglApdlEpMTMwV27Rpk+655x4NHTrUI40CAAAAcHaNW73f100oNoY3qejrJgBAieTniZXUq1dP48aNyzWLCgAAAAAAAMiLR4pS0vGbn+/atctTqwMAAAAAAEAJVujL9z766CO3v81Mu3fv1uTJk9WqVSuPNQwAAAAAAAAlV6GLUtddd53b3y6XS5UqVdLll1+u5557zlPtAgAAAAAAQAlW6KJUVlbW2WgHAAAAAAAA/kU8dk8pAAAAAAAAoKAKNFNq8ODBBV7h888/f9qNAQAAAAAAwL9DgYpSq1evLtDKXC7XGTUGAAAAAAAA/w4FKkp98803Z7sdAAAAAAAA+BfhnlIAAAAAAADwukL/+p4krVixQu+//762b9+ujIwMt8fmz5/vkYYBAAAAAACg5Cr0TKk5c+aoZcuW+u9//6sFCxbo6NGjWrdunf7zn/8oLCzsbLQRAAAAAAAAJUyhi1JjxozRxIkTtWjRIgUEBOiFF17QH3/8oR49eig6OvpstBEAAAAAAAAlTKGLUn/++ac6d+4sSQoICNChQ4fkcrk0aNAgzZgxw+MNBAAAAAAAQMlT6KJUhQoVlJKSIkmqXr26fv/9d0nSwYMHdfjwYc+2DgAAAAAAACVSgYtS2cWnSy+9VF999ZUkqXv37nrggQd0xx136Oabb9YVV1xxdloJAAAAAACAEqXAv753/vnn66KLLtJ1112n7t27S5IeeeQRlS5dWsuWLVO3bt306KOPnrWGAgAAAAAAoOQocFFqyZIlmjlzpsaOHaunn35a3bp10+23367hw4efzfYBAAAAAACgBCrw5XuXXHKJXn/9de3evVsvvfSS4uPj1aZNG51zzjkaP3689uzZczbbCQAAAAAAgBKk0Dc6L1eunPr166clS5Zo48aN6t69u6ZMmaLo6Ghdc801Z6ONAAAAAAAAKGEKXZTKqW7dunr44Yf16KOPKiQkRJ988omn2gUAAAAAAIASrMD3lDrZd999p9dff13z5s2Tn5+fevToodtuu82TbQMAAAAAAEAJVaii1K5duzRr1izNmjVLmzdvVsuWLfXiiy+qR48eKleu3NlqIwAAAAAAAEqYAhelrrzySn399deqWLGievfurVtvvVX169c/m20DAAAAAABACVXgolTp0qU1d+5cdenSRf7+/mezTQAAAAAAACjhClyU+uijj85mOwAAAAAAAPAvcka/vgcAAAAAAACcDopSAAAAAAAA8DqKUgAAAAAAAPA6ilIAAAAAAADwOopSAAAAAAAA8DqKUgAAAAAAAPC6Ur5uAACgaBq3er+vm1BsDG9S0ddNAAAAAIodZkoBAAAAAADA65gpBRQAM0YKh1kjAAAAAIBTYaYUAAAAAAAAvI6ZUgAAAGJWbGEwIxYAAHgCM6UAAAAAAADgdRSlAAAAAAAA4HU+LUpNmzZN559/vkJDQxUaGqq4uDh99tlnzuNpaWnq37+/IiIiFBwcrG7dumnv3r0+bDEAAAAAAAA8wadFqRo1amjcuHFauXKlVqxYocsvv1zXXnut1q1bJ0kaNGiQFi1apA8++EBLlizRrl271LVrV182GQAAAAAAAB7g0xudX3311W5/P/3005o2bZp+/PFH1ahRQ6+99ppmz56tyy+/XJI0c+ZMnXvuufrxxx/VokULXzQZAAAAAAAAHlBk7imVmZmpOXPm6NChQ4qLi9PKlSt19OhRtWvXzlkmNjZW0dHRWr58uQ9bCgAAAAAAgDPl05lSkrR27VrFxcUpLS1NwcHBWrBggRo0aKBff/1VAQEBKl++vNvyVapU0Z49e/JdX3p6utLT052/k5OTJR0vemVmZkqSXC6X/Pz8lJWVJTNzls2OZy93qrifn59cLleecUnKysoqUNzf319mlmc8VxstS+byk8zk0om4SZLLT7IsuXKsw+SSXC65zH3dpxWX3F7zH+N5tLGwcU/klJmZ6ZFxyq/ffZHTKePy/ThlZmZ6bH/S/7Z/X+fkvGYRH6fMzMy8jx2ncdyTVCRyyi9elMYpu98LeizPL57d70UhJ6mYjNP/+s8T77lFJqdiME5ZWVlF5twov3hxPt/zVk7Fcdvz1f4kiW2vBOSUc/soLtuez/YnyWPjlGv9JfAY4cmcisv+dKpjRK7PdfnweVGqfv36+vXXX5WUlKS5c+eqT58+WrJkyWmvb+zYsRo9enSu+Lp16xQcHCxJCg8PV3R0tHbu3KmEhARnmcjISEVGRio+Pl4pKSlOPCoqShEREdq0aZPS0tKceJ06dRQaGqr169e7dXj9+vUVEBCgtWvXurWhUaNGysjI0IYNG5yYv7+/GjVqpJSUFG3ZssWJBwUFKTY2VomJidqxY4cTL5/ip8TQago+kqjgtBNtPxIQqqTgygo7tF9lMpKdeGpQuFLLhqt8yh4FHjvsxJPKVtaRoFBFJO1UqawMJ54QXE0ZAWVVOTFeLp3YKPeHRinTv7SqJJ5ooyTtrVBH/plHVTH5RBtNftobXkcBR48oPHWXEz/mF6D95aNVJj1FYYf3OfH0UmXPWk5r1+7yyDhVTswoMjlJRX+c1q7d5bH9qVRmpSKRk1Q8xik+PkUxMTHat2+fWwH/dI57kopETsVhnNav31uoY3lISEi+4ySVLRI5FZdxSksL8dh7blHJqTiMU2Kif5E5N/qn/am4nu95K6fiuO35an+SKrPtlYCcqiTGO/Hisu35an+S5LFxytkHJfUY4cmcisv+dKpjRGpqqgrCZSeXvXysXbt2iomJ0Y033qgrrrhCiYmJbrOlatasqYEDB2rQoEF5Pj+vmVJRUVFKSEhQaGiopOJd6X92zQEqyIVo+5DGER4Zp/z63Rc5nTIu34/TkMYRHtufnlmTUCRycl6ziI/TkMYRHvuGc8KahCKRU37xojRO2f3uiW+ZJqxJKBI5ScVjnB5qUkmSZ95zx636u0jkVBzGaegFFYvMuVF+8eJ8vuetnMav2ucWLw7bnq/2p2FNK7PtlYCcnvl1vxMrLtuer/an4U0reWycJqz+u0jkVFzGaej5FdzWUVT3p1MdI5KTkxUeHq6kpCSnFpMXn8+UOllWVpbS09PVrFkzlS5dWosXL1a3bt0kSRs2bND27dsVFxeX7/MDAwMVGBiYK+7v7398an4O2YOS17LejrtcrjzjJ7fRXH7ZT3B2FPcV+Z20G5z0vDON5/Wa+cXzbWNh46efU84+PZNxOnW/ey+nAsV9PE6e6vfs15N8n5Pbaxbhccruv/yOb4U97hWFnE4d9/04ZfdfQY/lp4oXhZxOtKVoj1P2JY+eeM8tKjk58SI8TtnbblE4NzrdeFE+3zvdeGHbWBy3vdOOeyAntr3in1Oe20cx2PZ8tT95apzy7veSd4zwVE7FZX86VTy/1z+ZT4tSI0aM0JVXXqno6GilpKRo9uzZ+vbbb/XFF18oLCxMt912mwYPHqzw8HCFhoZqwIABiouL45f3AAAAAAAAijmfFqX27dun3r17a/fu3QoLC9P555+vL774Qu3bt5ckTZw4UX5+furWrZvS09PVsWNHTZ061ZdNBgAAAAAAgAf4tCj12muv/ePjQUFBmjJliqZMmeKlFgEAAAAAAMAb8rmZBQAAAAAAAHD2UJQCAAAAAACA11GUAgAAAAAAgNdRlAIAAAAAAIDXUZQCAAAAAACA11GUAgAAAAAAgNdRlAIAAAAAAIDXUZQCAAAAAACA11GUAgAAAAAAgNdRlAIAAAAAAIDXUZQCAAAAAACA11GUAgAAAAAAgNdRlAIAAAAAAIDXUZQCAAAAAACA11GUAgAAAAAAgNdRlAIAAAAAAIDXUZQCAAAAAACA11GUAgAAAAAAgNdRlAIAAAAAAIDXUZQCAAAAAACA11GUAgAAAAAAgNdRlAIAAAAAAIDXUZQCAAAAAACA11GUAgAAAAAAgNdRlAIAAAAAAIDXUZQCAAAAAACA11GUAgAAAAAAgNdRlAIAAAAAAIDXUZQCAAAAAACA11GUAgAAAAAAgNdRlAIAAAAAAIDXUZQCAAAAAACA11GUAgAAAAAAgNdRlAIAAAAAAIDXUZQCAAAAAACA11GUAgAAAAAAgNdRlAIAAAAAAIDXUZQCAAAAAACA11GUAgAAAAAAgNdRlAIAAAAAAIDXUZQCAAAAAACA11GUAgAAAAAAgNdRlAIAAAAAAIDXUZQCAAAAAACA1/m0KDV27FhddNFFCgkJUeXKlXXddddpw4YNbsukpaWpf//+ioiIUHBwsLp166a9e/f6qMUAAAAAAADwBJ8WpZYsWaL+/fvrxx9/1FdffaWjR4+qQ4cOOnTokLPMoEGDtGjRIn3wwQdasmSJdu3apa5du/qw1QAAAAAAADhTpXz54p9//rnb37NmzVLlypW1cuVKXXrppUpKStJrr72m2bNn6/LLL5ckzZw5U+eee65+/PFHtWjRwhfNBgAAAAAAwBnyaVHqZElJSZKk8PBwSdLKlSt19OhRtWvXzlkmNjZW0dHRWr58eZ5FqfT0dKWnpzt/JycnS5IyMzOVmZkpSXK5XPLz81NWVpbMzFk2O5693Knifn5+crlcecYlKSsrq0Bxf39/mVme8VxttCyZy08yk0sn4iZJLj/JsuTKsQ6TS3K55DL3dZ9WXHJ7zX+M59HGwsY9kVNmZqZHxim/fvdFTqeMy/fjlJmZ6bH9Sf/b/n2dk/OaRXycMjMz8z52nMZxT1KRyCm/eFEap+x+L+ixPL94dr8XhZykYjJO/+s/T7znFpmcisE4ZWVlFZlzo/zixfl8z1s5Fcdtz1f7kySPjdP4VfuKRE7FZZyGNanosf0p5+sWl23PZ+Mkeey4l2v9xWTbyzPuhXEqKe9PuT7X5aPIFKWysrI0cOBAtWrVSuedd54kac+ePQoICFD58uXdlq1SpYr27NmT53rGjh2r0aNH54qvW7dOwcHBko4XvaKjo7Vz504lJCQ4y0RGRioyMlLx8fFKSUlx4lFRUYqIiNCmTZuUlpbmxOvUqaPQ0FCtX7/ercPr16+vgIAArV271q0NjRo1UkZGhtt9s/z9/dWoUSOlpKRoy5YtTjwoKEixsbFKTEzUjh07nHj5FD8lhlZT8JFEBaedaPuRgFAlBVdW2KH9KpOR7MRTg8KVWjZc5VP2KPDYYSeeVLayjgSFKiJpp0plZTjxhOBqyggoq8qJ8XLpxEa5PzRKmf6lVSXxRBslaW+FOvLPPKqKySfaaPLT3vA6Cjh6ROGpu5z4Mb8A7S8frTLpKQo7fOLNOL1U2bOW09q1uzwyTpUTM4pMTlLRH6e1a3d5bH8qlVmpSOQkFY9xio9PUUxMjPbt2+d2nDyd456kIpFTcRin9ev3FupYHhISku84SWWLRE7FZZzS0kI89p5bVHIqDuOUmOjvsXOjiav25J1TxuG8c0pLzjunwwl555S6L8+cKiTvynOcKh7cnuc4VUnYcubj5IGc7rmkgcfOYYvjtuer/UmqXKhj+T+95xaVnIrLOKWlpZ3R56ec41QlMb5I5FQcxkmSxz7n5uyD4rTt+WqcinI9ojDHvdTUVBWEy04ue/nIPffco88++0w//PCDatSoIUmaPXu2+vXr5zbzSZKaN2+uyy67TOPHj8+1nrxmSkVFRSkhIUGhoaGSivc3Z8+uOUAFuRBtH9I4wiPjlF+/+yKnU8bl+3Ea0jjCY/vTM2sSikROzmsW8XEa0jjCY9+uT1iTUCRyyi9elMYpu9898S3ThDUJRSInqXiM00NNKknyzHvuuFV/F4mcisM4Db2gosfOjcav3l8kciou4zS8aWWPncPmOWPHBzkVh3Ealke/S8yUKm4zpZ75dX+RyCnPeBEbp+FNK3nsc+6E1X8XiZyKyzgNPb+C2zqKUj0iv3hex73k5GSFh4crKSnJqcXkpUjMlLrvvvv08ccf67vvvnMKUtLxbxMyMjJ08OBBt9lSe/fuVWRkZJ7rCgwMVGBgYK64v7//8an5OWQPSl7LejvucrnyjJ/cRnP5ZT/B2VHcV+R30m5w0vPONJ7Xa+YXz7eNhY2ffk45+/RMxunU/e69nAoU9/E4earfs19P8n1Obq9ZhMcpu//yO74V9rhXFHI6ddz345TdfwU9lp8qXhRyOtGWoj1O2Zc8euI9t6jk5MSL8Dhlb7seOTfyWNv/PePkqXPYopRTcRinwh7L84sXpZyKwzjld8w+nffcPF+3GGx7vhonT33Ozbvfi/62l2/8LI9TUa5HFCae3+vnek6BljpLzEz33XefFixYoP/85z+qXbu22+PNmjVT6dKltXjxYie2YcMGbd++XXFxcd5uLgAAAAAAADzEpzOl+vfvr9mzZ2vhwoUKCQlxrkkMCwtTmTJlFBYWpttuu02DBw9WeHi4QkNDNWDAAMXFxfHLewAAAAAAAMWYT4tS06ZNkyS1bdvWLT5z5kz17dtXkjRx4kT5+fmpW7duSk9PV8eOHTV16lQvtxQAAAAAAACe5NOiVEHusR4UFKQpU6ZoypQpXmgRAAAAAAAAvMGn95QCAAAAAADAvxNFKQAAAAAAAHgdRSkAAAAAAAB4HUUpAAAAAAAAeB1FKQAAAAAAAHgdRSkAAAAAAAB4HUUpAAAAAAAAeB1FKQAAAAAAAHgdRSkAAAAAAAB4HUUpAAAAAAAAeB1FKQAAAAAAAHgdRSkAAAAAAAB4HUUpAAAAAAAAeB1FKQAAAAAAAHgdRSkAAAAAAAB4HUUpAAAAAAAAeB1FKQAAAAAAAHgdRSkAAAAAAAB4HUUpAAAAAAAAeB1FKQAAAAAAAHgdRSkAAAAAAAB4HUUpAAAAAAAAeB1FKQAAAAAAAHgdRSkAAAAAAAB4HUUpAAAAAAAAeB1FKQAAAAAAAHgdRSkAAAAAAAB4HUUpAAAAAAAAeB1FKQAAAAAAAHgdRSkAAAAAAAB4HUUpAAAAAAAAeB1FKQAAAAAAAHgdRSkAAAAAAAB4HUUpAAAAAAAAeB1FKQAAAAAAAHgdRSkAAAAAAAB4HUUpAAAAAAAAeB1FKQAAAAAAAHgdRSkAAAAAAAB4HUUpAAAAAAAAeB1FKQAAAAAAAHgdRSkAAAAAAAB4HUUpAAAAAAAAeB1FKQAAAAAAAHidT4tS3333na6++mpVq1ZNLpdLH374odvjZqbHH39cVatWVZkyZdSuXTtt2rTJN40FAAAAAACAx/i0KHXo0CE1btxYU6ZMyfPxCRMm6MUXX9T06dP1008/qVy5curYsaPS0tK83FIAAAAAAAB4UilfvviVV16pK6+8Ms/HzEyTJk3So48+qmuvvVaS9Oabb6pKlSr68MMPddNNN3mzqQAAAAAAAPAgnxal/snWrVu1Z88etWvXzomFhYXp4osv1vLly/MtSqWnpys9Pd35Ozk5WZKUmZmpzMxMSZLL5ZKfn5+ysrJkZs6y2fHs5U4V9/Pzk8vlyjMuSVlZWQWK+/v7y8zyjOdqo2XJXH6SmVw6ETdJcvlJliVXjnWYXJLLJZe5r/u04pLba/5jPI82FjbuiZwyMzM9Mk759bsvcjplXL4fp8zMTI/tT/rf9u/rnJzXLOLjlJmZmfex4zSOe5KKRE75xYvSOGX3e0GP5fnFs/u9KOQkFZNx+l//eeI9t8jkVAzGKSsry3PnRmfYdk/lVGzGSfLYOWyRyakYjJOUu9+l/I/l//SeW1RyKi7jlN8x+3Tec3O+bnHZ9nw2TpLHPufmWn8x2fbyjHthnIpyPSK/eF7HvVyf6/JRZItSe/bskSRVqVLFLV6lShXnsbyMHTtWo0ePzhVft26dgoODJUnh4eGKjo7Wzp07lZCQ4CwTGRmpyMhIxcfHKyUlxYlHRUUpIiJCmzZtcrt0sE6dOgoNDdX69evdOrx+/foKCAjQ2rVr3drQqFEjZWRkaMOGDU7M399fjRo1UkpKirZs2eLEg4KCFBsbq8TERO3YscOJl0/xU2JoNQUfSVRw2om2HwkIVVJwZYUd2q8yGclOPDUoXKllw1U+ZY8Cjx124kllK+tIUKgiknaqVFaGE08IrqaMgLKqnBgvl05slPtDo5TpX1pVEk+0UZL2Vqgj/8yjqph8oo0mP+0Nr6OAo0cUnrrLiR/zC9D+8tEqk56isMP7nHh6qbJnLae1a3d5ZJwqJ2YUmZykoj9Oa9fu8tj+VCqzUpHISSoe4xQfn6KYmBjt27fP7Vh5Osc9SUUip+IwTuvX7y3UsTwkJCTfcZLKFomciss4paWFeOw9t6jkVBzGKTHR32PnRqUyM4pETsVlnKTKHjuHLSo5FYdxkioX6lj+T++5RSWn4jJOaWlpZ/T5Kec4VUmMLxI5FYdxkuSxz7k5+6A4bXu+GqeiXI8ozHEvNTVVBeGyk8tePuJyubRgwQJdd911kqRly5apVatW2rVrl6pWreos16NHD7lcLr333nt5rievmVJRUVFKSEhQaGio81rFdabUs2sOUEEuRNuHNI7wyDjl1+++yOmUcfl+nIY0jvDY/vTMmoQikZPzmkV8nIY0jvDYTKkJaxKKRE75xYvSOGX3uye+ZZqwJqFI5CQVj3F6qEklSZ55zx236u8ikVNxGKehF1T02LnR+NX7i0ROxWWchjet7LFz2PGr9rnFi8O256txGpZHv0unN1Mqz34vBtuer8ZpWJOKHpvZ8cyv+4tETnnGi9g4DW9ayWOfcyes/rtI5FRcxmno+RXc1lGU6hH5xfM67iUnJys8PFxJSUlOLSYvRXamVGRkpCRp7969bkWpvXv36oILLsj3eYGBgQoMDMwV9/f3Pz41P4fsQclrWW/HXS5XnvGT22guv+wnODuK+4r8TtoNTnremcbzes384vm2sbDx088pZ5+eyTidut+9l1OB4j4eJ0/1e/brSb7Pye01i/A4Zfdffse3wh73ikJOp477fpyy+6+gx/JTxYtCTifaUrTHKfuSR0+85xaVnJx4ER6n7G3XI+dGHmv7v2ecPHUOW5RyKg7jVNhjeX7xopRTcRin/I7Zp/Oem+frFoNtz1fj5KnPuXn3e9Hf9vKNn+VxKsr1iMLE83v9XM8p0FI+ULt2bUVGRmrx4sVOLDk5WT/99JPi4uJ82DIAAAAAAACcKZ/OlEpNTdXmzZudv7du3apff/3VuR5x4MCBeuqpp1SvXj3Vrl1bjz32mKpVq+Zc4gcAAAAAAIDiyadFqRUrVuiyyy5z/h48eLAkqU+fPpo1a5YeeughHTp0SHfeeacOHjyo1q1b6/PPP1dQUJCvmgwAAAAAAAAP8GlRqm3btrlumpWTy+XSE088oSeeeMKLrQIAAAAAAMDZVmTvKQUAAAAAAICSi6IUAAAAAAAAvI6iFAAAAAAAALyOohQAAAAAAAC8jqIUAAAAAAAAvI6iFAAAAAAAALyOohQAAAAAAAC8jqIUAAAAAAAAvI6iFAAAAAAAALyOohQAAAAAAAC8jqIUAAAAAAAAvI6iFAAAAAAAALyOohQAAAAAAAC8jqIUAAAAAAAAvI6iFAAAAAAAALyOohQAAAAAAAC8jqIUAAAAAAAAvI6iFAAAAAAAALyOohQAAAAAAAC8jqIUAAAAAAAAvI6iFAAAAAAAALyOohQAAAAAAAC8jqIUAAAAAAAAvI6iFAAAAAAAALyOohQAAAAAAAC8jqIUAAAAAAAAvI6iFAAAAAAAALyOohQAAAAAAAC8jqIUAAAAAAAAvI6iFAAAAAAAALyOohQAAAAAAAC8jqIUAAAAAAAAvI6iFAAAAAAAALyOohQAAAAAAAC8jqIUAAAAAAAAvI6iFAAAAAAAALyOohQAAAAAAAC8jqIUAAAAAAAAvI6iFAAAAAAAALyOohQAAAAAAAC8jqIUAAAAAAAAvI6iFAAAAAAAALyOohQAAAAAAAC8jqIUAAAAAAAAvK5YFKWmTJmiWrVqKSgoSBdffLF+/vlnXzcJAAAAAAAAZ6DIF6Xee+89DR48WCNHjtSqVavUuHFjdezYUfv27fN10wAAAAAAAHCainxR6vnnn9cdd9yhfv36qUGDBpo+fbrKli2r119/3ddNAwAAAAAAwGkq5esG/JOMjAytXLlSI0aMcGJ+fn5q166dli9fnudz0tPTlZ6e7vydlJQkSUpMTFRmZqYkyeVyyc/PT1lZWTIzZ9nsePZyp4r7+fnJ5XLlGZekrKysAsX9/f1lZnnGT25jekqSzOUnmcmlE3GTJJefZFly5ViHySW5XHKZ+7pPKy65veY/xvNoY2HjnsgpMdHfI+OUX7/7IqdTxuX7cUpM9PfY/pSWklwkcnJes4iPU2Kif57HjtM57qWlphSJnPKLF6Vxyu73gh7L84tn93tRyEkqHuOUlFRakmfec9NSkotETsVhnA4eLOWxc6OC9vvZzqm4jFNycoDHzmHTU5KKRE7FYZzy6ncp/2P5P73n5tnvxWDb89U4JSWVPqPPTznjOfu+uGx7vhqn5OQAj33OzbXNF5NtL8+4F8YpMdHfbR1FqR6RXzyv415y8vHPcSc//2QuO9USPrRr1y5Vr15dy5YtU1xcnBN/6KGHtGTJEv3000+5njNq1CiNHj3am80EAAAAAADASXbs2KEaNWrk+3iRnil1OkaMGKHBgwc7f2dlZSkhIUERERFyuVz/8EycruTkZEVFRWnHjh0KDQ31dXP+Neh336HvfYN+9x363jfod9+h732HvvcN+t136HvfoN/PPjNTSkqKqlWr9o/LFemiVMWKFeXv76+9e/e6xffu3avIyMg8nxMYGKjAwEC3WPny5c9WE5FDaGgoO7QP0O++Q9/7Bv3uO/S9b9DvvkPf+w597xv0u+/Q975Bv59dYWFhp1ymSN/oPCAgQM2aNdPixYudWFZWlhYvXux2OR8AAAAAAACKlyI9U0qSBg8erD59+ujCCy9U8+bNNWnSJB06dEj9+vXzddMAAAAAAABwmop8UerGG2/U33//rccff1x79uzRBRdcoM8//1xVqlTxddPwP4GBgRo5cmSuyyZxdtHvvkPf+wb97jv0vW/Q775D3/sOfe8b9Lvv0Pe+Qb8XHUX61/cAAAAAAABQMhXpe0oBAAAAAACgZKIoBQAAAAAAAK+jKAUAAAAAAACvoygFAACAf42srCxfNwEAAPwPRSkAAACUeB999JG2bt0qPz8/8Ts/AAAUDRSlkCe+RQSAki3nh3I+oHsX/e19q1ev1iOPPKIRI0Zo+/btcrlcjIOX0M/4N+KzFFBwFKWQS1ZWlvz8jm8av/zyiw4dOuTjFv27cPLmffmdODAWZx997DuJiYlKSUlRUlKSXC4XJ9Be5HK5JEmZmZk+bsm/R5MmTdS/f3/t3btXI0aM0LZt2yhMeUn29j5t2jTNmjXLt435l+G47jvZn6Vefvll/fHHH5I45/GG7D42M/q7GKEoBTc5C1KPPfaY/u///k/ffPONMjIyfNyyku3tt9/Wk08+KUmcJHtZzm3+559/1tKlS/Xzzz9LOnEijbMjKyvL6eO0tDQdPXrUeYx94Ox677331KNHD8XFxalNmzZavXq1/Pz8+ABzluXs32eeeUZ33nknX/x4QXa/33333br55pu1Y8cOPfzwwxSmvCgxMVGLFi3SL7/8IoliiTfkPL9ZuHChFi9e7OMW/fuYmZ566ik999xzkjivPNsyMjKcPj527BjH9mKEohTcZL95Pfroo3rllVc0efJkXXzxxQoICPBxy0quhQsXqnfv3ho5cqQeffRRSRSmvCl7mx82bJh69uyp66+/Xt26ddNVV12l5ORkH7euZMvu+3HjxunKK6/UDTfcoBkzZkhiHzib3nzzTd1222269tprdc8996hhw4bq0qWLdu3a5YwJPC/nB8QVK1Zo//79mjlzpsaMGaO0tDQft65k8/Pzc2al3XnnnerVqxeFKS+rUKGCevbsqZkzZ+qPP/7gWHOWmZnb+c2QIUO0adMm/f333z5u2b9H9hdvI0eO1H//+19t3rzZ100qsb7++mtJcj6vjh8/XldddZU6d+6shx9+mPfYYoB3BOSyZcsWLVy4UK+++qrat2+vUqVK6Y8//tALL7ygb775RseOHfN1E0uM+Ph4zZw5U4MHD9b06dM1YcIEjRgxQhIfyr1p8uTJevXVV/Xmm2/q66+/1syZM7V582ZdddVVbtOA4Rk5vyF/9tln9eyzz6pFixYqW7ashg4dqscee0wS+8DZsHLlSk2YMEHTp0/XgAED1L9/fz366KMKCQnR77//7uvmlWjZHxAfeugh3XzzzUpPT1ebNm00YcIEDR06VOnp6T5uYcnm7+/v/P+uu+5ixtRZdHI/Zh/zu3fvrssuu0yvvvoq55JnWfZskXHjxmnWrFl64403dPfdd6tSpUo+blnJdfJ2n33Mb926tf78809mqp0lr732mrp3765XX31VkjRhwgSNGTNGTZs2Vf369fXyyy+rY8eO2rhxo49bin9SytcNQNFz+PBh7dmzR+XLl9eSJUs0Z84cLV++XLt371a1atU0cuRIXXfddb5uZokQFBSkpk2bqmPHjrr44osVGBioO+64Q5I0duxYpvmeJTlnLEjSqlWrdMstt6hly5ZO7LPPPlPbtm111113acaMGYyFB+WcLVK2bFnNnj1bHTp0UFJSkmbPnq0BAwZIkp588knngyL97xl79+5VxYoV1bp1ayd27rnnqkyZMtq0aZM6dOhAf59Fixcv1ssvv6xPP/1UrVq1Unp6uj788EP16dNHLpdL48ePV5kyZXzdzBIje1tevXq1fv31V5UtW1b16tVT06ZNdc8990iS3n33XT388MMaO3asoqOj2f49ILv/Jk6cqAYNGqhBgwaKiopSYGCg4uLiNHv2bD3xxBMqVaoU/e1BkyZN0nXXXadatWpJkg4cOKDPP/9cY8aMUcuWLbVt2zb997//1ezZs1WlShU988wzvm1wCZJzO164cKGOHDmim266SZIUGxur/v37a/LkyerUqZNq1qzpy6aWOHFxcerTp4+ee+45HTp0SAcOHNB7772nTp06STr+RdCll16qAQMG6IsvvvBxa5Evw79aZmZmnvH27dtb5cqVLSgoyO6//377+OOPLSMjw84991x79tlnvdzKki0xMdH5f3p6us2aNctKly5tw4YNc+IHDx60rVu3er9xJVBWVpbz/08//dTMzDp27GhXXXWVEz927JiZmY0dO9bi4uIsOTnZu438F/juu+/M5XJZeHi4fffdd048NTXVpk6daqVLl7bHHnvMhy0smRISEuyHH35w/k5PTzczsxYtWtj06dN91ax/jXnz5llMTEyuY8prr71mLpfLHn30UTty5IiPWleyZB/r582bZ5UrV7a4uDg777zz7NJLL7XZs2c7y02dOtUuu+wy69Kli23fvt1XzS0Rcp5THj582K655hqrUqWKXXrppfb000/b4cOHLT093Zo1a2bDhw/3YUtLno0bN5rL5bJevXrZjh07nHiHDh3s5ptvtvnz59s111xjrVu3tmuuucYqVKhg/fr182GLS6bffvvNWrZsaZGRkdaxY0d75513LDEx0TZu3GjNmjWzTz75xMxOnGfCMzZu3GgPPPCANWzY0CpWrOic52Sf42zYsMHCwsLsjTfe8GUz8Q+4fO9fLOdskY8++khvvPGGnn/+eaWlpenLL7/UrFmz9M033+iFF15Q586dVbp0aVWuXFmBgYE+bnnJUr58eef/AQEB6tWrl2bMmKHnn39eDz/8sA4fPqzOnTvrjTfe8F0jSwjL8U3WE088obvvvlvbt29Xz549FR8fr7lz50o6cZlHeHi40tPTuaTjLKhZs6ZGjRqltLQ0LV++3ImXK1dOvXv31ksvvaSnnnpKr7zyig9bWfJUqFBBrVq1knR8fyhV6viE6cDAQB0+fNiJ9+jRQ0uXLvVZO0uCvI4bNWrU0NatW50fU8heJi4uThUqVNDTTz+tkSNHerWdJZXL5dKSJUt07733atSoUVq2bJmeffZZrV69Wo888ohzqcc999yjLl26KDMzk/scnQHLcQ+jTz/9VGXKlNHChQv1wQcfqGvXrpo4caI6d+6s/v37q3Xr1tqwYYNSUlJ83OqSwcxUr149LV++XAsWLNCwYcO0fft2SVLnzp21Z88e9ezZUw0bNtSYMWO0cOFC3X333VxC6QH/+c9/NG/ePEnSgAED9N1332n+/Pn6/vvvFRQUpOnTp6tZs2bauHGjjhw5ohdffFGS++XEOD05bwVRr1493XrrrWrXrp0OHjyoJUuWSDr+uSozM1NVqlRRrVq1lJiY6Kvm4lR8Vw9DUTF06FCLioqyjh07Wt26dS02NtaZQWJ2fObCli1b7KqrrrLzzz/fjh496sPW/jscPXrU3nzzTStTpoxVqFDBateubRkZGb5uVomxYsUK69Gjh3377bdmZrZp0ya79tprrXPnzvbmm2+amdnevXutU6dO1r17d7fZVSi8/GZk7tmzxx5++GELCgqyKVOmuD2WkpJiCxYs4HjjJZdddpkzC/aqq66yKlWqcMw5Azm3+UOHDjn/T0tLs5tuuskuueQStxmCu3fvtrvvvttmzZplpUqVso8//tir7S0JxowZY5s2bXL+zsjIsAcffNDuu+8+MzPbtm2b1a5d22644Qbr1auXRUVFuc2YyjlrGYWTc3tfuXKl1ahRw+n3bLt377apU6faVVddZS6Xy1wul33wwQfebmqJlT3z5scff7SgoCC76aab7MCBA5aVlWUJCQn2559/ui3ftm1bu//++33R1BJjz549du2111rr1q3t+uuvt8DAQFuzZo3z+LFjxyw+Pt4efPBBa9OmjUVHR5vL5bLPP//czIxzSw959dVXnf//97//tXvvvdeqVavmdl559OhRi42N5WqfIoyi1L/crFmzLDIy0jmILlq0yFwul3322WdmdvyA+e6771rz5s2tbdu2zocUpp2efTt27LDo6Ghr3bq188GcD+hn7s0337RLL73ULrzwQtu9e7cTX7Vqld18881WvXp1q1q1qp133nnWuHFjZ5vn5OH05PywMnv2bBs/frwNGTLE1q9fbxkZGZaSkmKPPvqohYSE5CpMZWO7P3uyj+WXX365TZo0yXr27Gn16tVztnv6vvByHiueeeYZ69atm3Xr1s2WLl1qWVlZtmzZMrvmmmvsvPPOs2nTptnChQutQ4cO1q5dO9u3b5+dc845nDgX0qFDh6xt27a2fv16t/iOHTvshx9+sNTUVLvooovstttuMzOz//znP1a2bFkLDQ211157zRdNLjFybu8vvfSS9evXz6pWrWplypTJVZjKtnDhQrvhhhusU6dOlpCQ4K2mlkjZ/Z+VleX8f/ny5RYYGGg33nij26V8ycnJtnz5cuvQoYM1atSI4/tpGj16tNPX69ats3POOcdcLpc9//zzzjIn9+1///tf++STT6xGjRrOcQhn7q+//rJKlSpZo0aNnNi6detswIABFhISYrfddpuNGDHCrr/+eqtXrx7bfBFGUepfbtSoUc5JwzvvvGNhYWE2depUMzt+kpeSkmIJCQk2d+5c58MLO/TZl5qaatdee63VqlWLgpSHffrpp9a0aVMrU6aMzZ071+2xvXv32rp162zy5Mk2f/58tnkPGjx4sFWsWNHat29vMTExFhUVZRMmTLCkpCQ7ePCgPfbYY1a+fHmbMGGCr5taYhSkkJq9zOWXX24ul8vOP/98ClJnIGcR9plnnrHQ0FB76KGHrEGDBhYbG2vTp0+3rKwsW716tQ0cONCCg4Od+xxl9/dFF11kM2bM8FUKxVZ233/33Xe2YcMGt8e++eYba9q0qTNbZPXq1daxY0d77LHHcs0gwekZPXq0hYWF2dy5c+3TTz+122+/3WJjY+2uu+5ylsm+v4uZ2Ycffmg1atTINVYouJzHm5O/NP7hhx8sMDDQevbsadu2bTOz48XAG2+80bp06cKXzKdpwYIFduONNzrb8vbt250vFS6//HK32X/Hjh3L9T68ePFiq1Spkv32229ebXdJcXJ/ZmZm2rJly6xhw4bWpEkTJ75+/Xrr37+/VaxY0S644AKbN2+e8x7LNl80UZT6l8p+I7vpppvsgQcesF9++cWCg4OdglRWVpaNHz/eJk2a5PY8dmTv+Pvvv+2FF16gIHWG8rts7IcffrDmzZtbp06d7D//+Y8Tz+tDPNv8mVu0aJFVrVrVfv31V6c/H3zwQWvcuLFzzNm5c6cNHDjQ2rdvz6y0M/DII4/YggUL3L49L4h7773XmjRpwjHHQ9avX2+33nqr2/GlX79+1qhRI5s6daqlpaWZmdmuXbvcLhsbOnSo1axZkx+2OE1Hjx61pk2bWo0aNWzjxo1O/Msvv7TQ0FBbtGiRmZmNGDHCevbsySV7HrJ//35r2bKl22zXhIQEGz9+vEVHR9sDDzzgxHNeFlyvXj17//33vdnUEiPn+c3EiROtZ8+e1r59exszZoxt3rzZzE4Upnr16mX79u1zCuLZz+U4X3hHjhxx+m/RokXOe+yKFSusW7dudumll+b6wjPncWbbtm3WsGFDW7lypdfaXNJlz0CuX7++W2Fq3bp11q9fP+vTp48zTpzTF10Upf4l8vtw/tlnnznXOM+cOdOJp6am2pVXXmlDhgzxUguRH04aTk/Obf7999+3SZMm2cMPP+xMZf/+++8tLi7Ounbtat98842zLAWRMzN69Ghbt26dW2zWrFnWsGFDO3DggNv2fPfdd1udOnWcDynZ978wYxxOR2JiotWuXdvatGljn3/+eaH68q+//nL2Ge4lVTjjxo1zm23z9ttvW3R0tJ1zzjm2atUqt2VvvfVWa9y4sU2ePNntg8qyZcvs3nvvtcqVK+d6Dv5Z9vadkpJiZsePI3FxcXbuuec6s3C2bNli3bt3t8jISGvWrJmFhITYr7/+6rM2lzQZGRl2/vnn24MPPugWP3TokF1xxRXm7+9vAwYMcHts2rRpFhISYlu2bPFmU0ucYcOGWXh4uI0cOdK6du1qLVu2tPPPP9+5lHXp0qVWrlw569Chgx04cMB5Xn6fC5C/nO+lK1eutKioKLv55pudvly6dKndcMMNdtlll9mcOXPM7PivOz/zzDPO82bMmGEul8uZvYZTy77/a7YpU6ZYmzZt3GLZhamaNWtay5YtnfjmzZud8WGbL9r4mZF/gZy/svf1119rzpw52rVrlyTpwgsvVMeOHXXOOedIkjIyMvT777+re/fu2rt3r8aOHeuzdhd3OX8V4kyel/3rWCic7G3+oYce0tChQ/X111/rl19+Ud26dbVgwQK1bt1aTz/9tPbu3aspU6boiy++kCTn1/lQeN9++63++OMP53iS7dChQ0pKSlLZsmVVqlQpHTlyRJI0cuRI7du3z/mVlPDwcLlcLrdfSUTB7N27V+XLl9eyZcuUkZGhp59+Wl988YXTl3aKX5CsVq2as89wzCm4jz/+WKtXr1bNmjWdWLdu3XTBBRcoPj5eS5cuVUZGhvPYa6+9pubNm2vMmDFavHixE4+JiVHjxo21bNkyNWnSxKs5FGfZ2/eXX36pYcOGaenSpQoPD9eiRYsUEhKia6+9Vps2bVLt2rX1yCOPaMyYMbr22mu1cuVKNW7c2NfNL5byOrc5duyYWrRoof/+97/6448/nHjZsmV18cUXq2PHjlq1apVeeOEF57HatWvrxx9/VO3atb3S7pJo7dq1WrBggd5//32NGjVK8+bN09ixY1WzZk3deuut2rVrl1q2bKnPPvtMGRkZbr/2zC9NFs7J5yXnnHOOhg0bps2bN6t3797KyspSy5YtNWjQIFWtWlVDhw7Vueeeqy1btuiBBx6QJGVmZqpWrVr67bffFB0d7atUipV3331Xl112md566y1Jx48/4eHh2rJli7p27eos53K5FBcXp9tuu03Lly9XTEyMpOPvrX5+fm6fhVFE+bQkBq8aNmyYhYWFWfXq1a1ChQo2bdo0O3LkiP3xxx925513Wvny5Z2bxbVp04brzc9Azmr8W2+9ZWPGjLGHHnrItm/f7swUyWv2Qs7YzJkznUsNcHpmz55tkZGRtnr1ajM7fi2/y+WyefPmOct8/fXXVq9ePRsxYoSPWlmyZG/7CxYssKVLl5rZ8Zur1qhRw66//nq3ZdeuXZvnbBIUzoMPPmi33nqrc8nG7t277eKLL7ZLLrnEPvvss3+cMZUz9u677zJ75DRkb/Mff/yxc0lGenq6XXnllda4cWObO3durtlnY8aMcd5bmR14ZubPn29BQUE2ZswYW7FihRPfv3+/NW/e3OrXr+/2q3w4fTnPbTZt2mR//vmn/f3332Zm9ttvv1mVKlWsV69eznvu4cOHrWvXrjZlyhS78cYb7corr3S7rxTOzNKlSy04ONjtuJ2VlWWffPKJNWrUKNcMEzNmi5yOnH2WmZlpR44cMbPj5zZTp061Jk2aWK9evZzl1q1bZx9++KFNmjSJS+I94NFHH7XAwEB74403zOz4r9guWLDAateubddcc43bsm+++abdcsstdtttt/H5tZihKFWC5TzR3bRpk7Vq1cqWLl1qqamp9uCDD1pUVJQ988wzduTIEcvIyLBNmzbZ/PnzbcWKFVxv7iHDhg2zypUrW7du3axBgwbWuHFje++99+zw4cNm5v4hJOf/p0+fbmXLlqUodYaeffZZu/fee83MbM6cORYSEmLTpk0zs+OXOmWfWPzyyy+8eZ2hnB+6t27darVr17abbrrJfvrpJzM7fqlwlSpVrEOHDvbdd9/Zt99+a126dLEWLVpwknyGHn/8cWvatKkNHjy4UIWpk485ZcqUsS+++MK7jS/Gcr4/rl271qKjo+3WW291bmCblpZmHTp0sKZNm+ZZmDLjS58ztXnzZqtbt26uX+7MPqbs37/f4uLirEqVKs6+gdOT83jxyCOPWL169SwqKsoqVark3H901apVVqtWLWvevLldfPHF1qxZM6tfv76ZHf9lvoYNGzqXWaJwcvZ/9va9efNmO//88+311193Ox5lZGRYtWrV7LnnnvN6O0uy8ePH2/XXX29XX321ff3112Z2/BLV6dOnO4WpvI7pHOdPT85t/uGHH7ZSpUrZrFmzzOz4++v8+fOtTp061qVLF0tKSrK9e/da9+7d7emnn3aeR98XHxSlSqicH/IOHjxo27Zts4EDB7rtnMOHD7eoqCh79tlnbc+ePf+4DhTelClTLCoqyvnG8KuvvjKXy2WNGjWy2bNnOze6zfkzvmbHPxyGhoa6zebB6XnggQesW7du9tVXX1lISIhzU20zs+eee84eeOABt32CN6/Tk5SU5Pz/vffes7S0NJs3b561aNHC7VvzZcuWWZMmTaxatWp2zjnn2GWXXcaMzDOQ87jxzDPPWJMmTWzQoEEFKkxxzDkzOd8f//vf/5qZ2SuvvGIXXXSR3XHHHbZmzRozO37i3LFjR7vooovsrbfe4oseD/vuu++sTp06br/gdvKMs/3799vll19OUeoM5OzT8ePHW8WKFe3jjz+27777zp588kkLCQmxYcOGmZnZxo0b7ZVXXrEBAwbY008/7Rzje/fubV27dnXOfVBwJ5+PZx9HMjMzrWvXrnb++efbkiVLnMcTExPtwgsvtHfeecer7Sxpcvb7k08+aZUqVbJ77rnHOnXqZC6Xy1599VUzO1GYuvDCC+2qq67i85MH5NWHI0aMyFWY+uSTTywmJsbKli1rMTExdt555/E+W0xRlCrhHn30UWvWrJmVL1/emjZtan/99Zfb48OHD7fatWvbyJEj7eDBgz5qZfH3wAMP2Pz5852/U1JS7KmnnnJm5cydO9fKly9v06ZNsw4dOlhUVJS98847dujQIbf1vPzyyxYaGprrlzvwz/J7A/ruu++sadOmVqpUKZs8ebITT0lJsauvvtruv/9+bzWxxPr222+tfPnydvDgQXvwwQetRo0atnPnTjMzmzdvnl144YXWs2dPt1+a+f333+3PP/9kRqYH5DxxmzBhQr6FqUsvvdTt5ufZsgtSHHMKLmefP/bYYxYdHW3x8fFmdrww1bRp01yFqWbNmlm/fv180t6SKHs7/uyzz6x69epOUSrn2Hz77bf2/fff54qj4P744w/n/5mZmZaenm7t27e3J554wm25GTNmWEBAgH3wwQe51rFu3TobOnSoVahQwZlFiILLue2+8MILduONN1q7du1szJgxlp6ebkePHrVLLrnEzjvvPLvvvvtsypQpdsUVV1ijRo14b/WQ+Ph4GzNmjFP4S0tLs9GjR5u/v7+98sorZna8MPXcc89Z3759Od6coZz99/bbb9ubb77pHPMfeeQRt8KU2fEf55o5c6a9//77zhecfNFZ/FCUKmFyfuB4//33rWLFijZt2jTr3bu3VatWzQYOHOj8+li2e++917p27cr9LE7T5s2brXPnznbeeefZp59+ambHx2HFihW2Z88e++OPPyw2NtYmTpxoZsd/sSMwMNBq1apln3/+ubOeF1980UJCQpitUAgnF/UWLlxoM2bMcE4cUlJSrH///nbuuefa2LFj7e+//7aff/7ZrrrqKmvSpMk/3t8LBbNjxw7r2LGjRUREWFhYWK5fUsouTP3f//2fLV++PNfzOXk7Pflts+PGjcuzMNWyZUs799xz7ccff3SWffHFFy0sLIxjTiHkPNEdOHCglS1b1oKDg+2tt95y4q+++qo1bdrU7rzzTudDeEZGBtv6Gcprm9+1a5eVL1/e7rnnnlyPDRo0yB566CHuYXSaBgwYYC1btnTuDWh2/B46DRs2tAkTJpiZufVt7969rU2bNnb06FG3++g8/fTTdt5553G/ujM0bNgwq1ixog0YMMD69+9vZcuWtS5duti2bdvs6NGjNmTIELviiiusRYsW1rNnT2Yhe8jHH39sLpfLoqKi3PaFY8eOOYWp7BlTR44ccY5THO/P3JAhQywqKspeeOEFt8+u2YWp7HtMnYxtvniiKFVCffLJJ/bAAw+4VZLHjh1rTZs2tQcffDBXYYobrZ6ZX375xXr37m0NGza0jz/+2MxO9OW8efOsadOmtnXrVjMz++KLL+yOO+6whx56yDlw7ty50zp37uz8hCxO7frrr7dhw4Y5l44NHz7cypYta40aNTKXy2UPPvigHTx40A4cOGD33HOPxcbGWlBQkDVt2tSuuOIKTtjO0Mn3F3G5XFapUiXnUuCc39DOmzfPLr74YrvyyiudS51w+nKe7O7bt892797t1t9jx47NVZj666+/7I477nC2982bN9vFF19s7777rncbX0Lcf//9Fh4ebj///LO1a9fO+dIh26uvvmoXXXSRde/e3e2yMT6onJ7s480PP/xgzzzzjL311lvO7Kj33nvPgoKC7Pbbb7cVK1bYqlWrbOjQoRYWFmbr16/3ZbOLtZUrV1qDBg3s2muvdfswfvvtt1udOnVs3759ZnbifoJDhgzJddNhs+PbfPayOD2rV6+26OhotxuX//bbb1azZk3r1q2bEzt69Kjb5fTMlCq8k4/R+/fvt0GDBpm/v7/zfpm9TGZmpj355JPmcrnso48+cp7DZ6kz99prr1mVKlXcvkjL6dFHH7WgoCCbPn26l1uGs4WiVAm0cuVKa9q0qVWoUMFee+01t8fGjBljTZs2taFDhzqXG2TjIFp4Od/wf/zxR/u///s/a9iwoX3yySdOfNq0aVazZk379ttvbefOnXb11Vfb8OHDncezsrLs2LFjnLQV0rhx48zPz8+eeuop++mnn6x169b2008/2dGjR+3999+3kJAQu/fee+3gwYN29OhR279/v3355Ze2adMmLhs7QzlP2g4cOGAbNmyw77//3jp37myRkZHOL13lvHfIggULrF+/fnwoP0M5+2/06NF2ySWXWGhoqN1777324YcfOo/l/BIi5yU42Q4dOuRcZol/9vDDD9uuXbucvydNmmRlypRx7pXWs2dPe+yxx8zMnB9PMDObOnWq9enTh23eQ+bPn2/lypWzxo0bW926da158+b2888/m5nZp59+apGRkRYVFWV16tSxhg0b8queZyC7eL127VqLjY21q6++2r777jszO375devWra1NmzbOecuxY8fs8ssvt9tvv91tPZxXesbPP/9sNWrUcAqx2YXAFStWWKlSpWzBggW5nkPfn5n333/feY88cOCA3XnnnRYQEGBfffWVmZ3o32PHjuW60TxOX3a/3nnnnXbnnXe6xU5+L+3fv79deuml3m0gzhqKUiXUjBkzrEGDBtayZUtnhk62cePGWfXq1d3usQPPWLp0aa7C1JEjR+yCCy6wKlWqWI0aNeyCCy7I81eYUDDZRTyz47/m43K57K677spV8Pjggw8sJCTE7rvvvlwFWDNmLJyunP02duxYu+uuu5xv0ePj461Dhw4WGRnpdtx56aWXLDU1Nc914PQ8+uijVqlSJZszZ44tWrTIWrZsac2bN3e7jGz8+PFWvXp1e/HFF82MDymnY+PGjXbxxRe7feD46KOPbOPGjc7f/fr1c5utkJmZab/88ovbetjmz8zff/9tgwYNspkzZ5qZ2ddff2033HCD1a1b1/km/e+//3ZmSvElz5nLfp/97bffLDY21rp06eL8murnn39urVu3trCwMGvbtq1dcMEF1qBBA+fchmPN6cur7/744w8LCAiw9957z8yOj82xY8csLS3NzjvvPLcfccGZ+/vvv83lclmnTp2cLyQSExPt9ttvt8DAwFyFqWwUpjyna9eudsstt+SKp6en21dffZXrCh+OOcUfRali7p9OdGfMmGGtWrWyXr165fpQ/uabb3LZkoc8//zz1rZtW+fvnIWphQsXmtnxwtTcuXNtwYIFTr/z5nV6cr7xHDp0yBYsWGAul8tiY2PdZjOYHb/BfIUKFax37962e/dubze1RBs2bJhFRETY3Llz3S4H3r59u3OPqTfffNMuu+wya9KkCccbD/r666+tQYMGTjHwu+++s4CAAGvevLldeOGFbpcBv/XWW/T9acoupGa/z7733ntu23r2B/Bhw4ZZ+/btnXibNm3s2muv5STZQ1auXGkXXnihtWrVytauXevEf/rpJ7vhhhssJibGli1b5sMWlhz5nVOuXr3aYmNjrXPnzk7B9cCBAzZp0iR7/PHH7bnnnnO7jxROT857dCUkJJjZiTF54IEHrGbNmm73Ij18+LA1aNDAKdbi9OR1rP7999+tSpUq1qVLF7fC1B133GFly5Z1btWBs2Po0KFWtWrVXDO6d+/ebTfffLPbr03yXlsyUJQqxk6eFTJy5EibPHmyM8Xa7PjlA61bt7ZevXrZtm3bcq2DDytnJisry+bPn29hYWF2/fXXO/HswtR5553ndp15Nvr99OTc5vv3729lypQxs+PXnrtcLhs1apQlJia6PefNN9+09u3bM1PBg7799luLiYmxH374Ic/H9+3bZ7169bJGjRpZ586dnQ/vjIFn/PnnnzZu3DgzO/7rYxEREfb666/b+vXrrWrVqtakSRPnlz+zccwpnPvuu89efPFFS0lJMbPjJ8Iul8uuueaaXLOPX375Zbvooovs2LFj1rFjRzvnnHOYDetBCxcutDZt2lhwcHCuX2/7+eef7aabbrLw8HBbsWKFj1pYMuQ8Pn/zzTf27rvv2uLFi50PhatWrXIKU/kVATnOnJ6PPvrIbTbxmDFjrG3btnbFFVfY22+/bampqbZ9+3br3bu3VahQwUaOHGkTJ060Dh06WKNGjeh3D8nux+wix++//24RERFuhamDBw/aDTfc4PZlNDwnu++PHDliTZs2tcaNG9vvv/9uu3fvtl27dlmnTp2sVatWbPMlEEWpYipnVfihhx6yatWq2ZVXXmlt27a15s2b2zvvvOM8PnXqVGvTpo1deeWVzk2IcXry+lCdkZFhn3zyiUVERLjd5HPZsmXWp08fq1SpEt/ietjGjRutX79+9s033zixyZMnm8vlsqeeesoOHjyY5/MoihTeU089leueRPPmzbOYmJg870mU81vybdu2Occqvj0/PXlts9k3s01LS7POnTvbqFGjnOU6dOhgsbGxdv/99/Pt4Rno0qWLxcbG2uuvv+7cOHjVqlUWFhZmXbt2dStMzZkzx2rVqmVt2rSxunXrOgUptnnP+fTTT61FixbWtGnTXMejpUuXWp8+fZx72eHMDB061KKjoy06Otrq169vMTExtnLlSjM7vg+ce+65dt1119nXX3/t45aWDFOnTrXatWvb888/b2Zmr7zyilWoUMGee+45u+yyy6xZs2Y2fPhwO3TokO3fv9/GjRtndevWtUsvvdR69OjBj7Z4yPjx461Xr152+PBhMzvxOWvt2rVWoUIF6969uzNTNiUlhfNJL9i8ebO1bt3aKlWq5Nz+pFmzZnzRWUJRlCrmJk+ebLVq1XKKHpMnT/7/9u48rqatjQP4b58GRFSkVObrisxD5iEkMs9TXTJLkakBmbmUSNGEIkmUWSTzPCZj5nkoQtI8nef9o0/7PUe8rwZ1dZ/vP3TO3uezWme39lrPXutZpKysTLVq1aKNGzeKxzk7O9OUKVP4D7iQHD16VO7n9PR0OnToEFWsWFFuxtSpU6doyZIl3FkoRIGBgfTHH39Qy5YtKTY2Vm5GQk6OqRUrVohT31n+hYWF0fDhw3Ndv15eXqSjoyM+2ZX9DkJDQ+nEiRNyx3O7kz+ySzkePXpE9+/flws0ff36lfT19WnVqlVElN1RHjlyJAUFBYl1zoGpvJG9VkePHk1169alTZs2ie1JZGQklS1blgYOHEhPnz4louzlk4IgUOvWrTkgVUA51+uHDx/o48ePcg/SDhw4QCYmJtS2bVu5nF5E8gnmWf75+flRxYoV6eLFi/Tp0ye6dOkSDRkyhFRVVenmzZtERHTr1i3S0NAgW1vbYi5tyZCSkkITJ04kQ0NDWrNmDVlbW8stDXN0dKSWLVuSra2tOBM8Pj5e7r7M7U3B7d+/nxQVFcnS0lIMTOXcD1xcXEgQBOrWrRt9/PhRPIf7NvmT137J3r17adu2bbR7925OgVKCcVDqN5acnEyTJk0iFxcXIspuUCtUqECOjo40bNgw0tPTk5sx9aPdC1jeREZGkiAINGXKFLnX09LSaPv27SQIAo0dOzbXeRyYKhxbtmyhdu3akZqampgnSnbwvmHDBhIEgbZu3VpcRSxRcgbZ+/fvF3e6+vTpE1WuXJlGjBghd2xCQgL16tWL1q5dW9TFLFFsbW0pKSlJ/Nne3p50dHRIU1OTqlevTuvXr6fXr19TWloaDRw4kExNTWnJkiXUvXt3MjQ0lNuumuWdbFttbm4uBqZyBoQ5galBgwbR27dviSg7IM45dQomp49y4MAB6tixI9WqVYtMTU3Jx8dHPGb//v3UvXt36tixI0VFRRVXUUssW1tbGjVqlNxrr1+/pj59+pCJiYk4a/Dp06fcpykEOW1FamoqjR07llq3bk01atSQS8MhlUppwYIF1KpVK5ozZ06uJP784CHvfnRvPHLkCJUuXZomTZokBqaIsmevjR49mnr16sX31UIkW8ff86M2htuekomDUr+R7zWEb9++padPn9LDhw+pdu3a4mAwODiYlJWVqVy5crRnzx7xeL555d23dfb582fy8vIibW1tsrKyknvv+fPnVKNGDRIEgWbPnl2UxSyRvnfNS6VS2rdvH9WrV4/at28vPkmXDUyFhITwwLAA7OzsaP78+eLPN2/epFq1apGZmZm4jGPnzp2koaFBPXv2pPDwcNq7dy/16NGDGjZsyHVfAM+fPyd1dXVq1aqVOAOzcuXKtHfvXrp+/TpNmzaN6tWrR/PmzaO0tDQ6c+YMDRo0iFq0aEF9+vThae0FINvWy3Z6zczMcgWmbt68SRUqVKD27dvLLRfma79gDh48SCoqKrR69Wo6cuQI2djYkLKyslyg++DBg9S6dWsyMTHh3F0F8L02YsaMGVS/fv1cgz4PDw+qXbt2roAIDw7z79v6T0lJISsrK1JVVaWZM2fmGrAvWrSIatWqxTtnF5BsOx8aGkqbN2+mR48eiQ+CQkNDqUyZMjRhwgSKioqiuLg46t+/P23ZskU8j++v+RMWFibOsJ8/fz45Ojr+VF3yzO9/Bw5K/SZk/xA9PT3p3r17cu9v3bqVWrVqJT7FOnLkCA0cOJB8fHy401AAso1lSkqKOOCIj48nHx8fqlSpklxg6sOHDzR27Fg6e/Ys13sBydZ9aGgoBQQEkJ+fnzgo3L9/P7Vv3566d+/+3cAUEQ8Q8+PTp080cuRIat26tTgLkyh7hpqhoSGNHj1abH8uXLhATZo0oerVq5OBgYFcUISv//y7efMm1a9fn1q3bk1eXl5irpEcK1asoKpVq4qbKHz58oWSkpI4f1cByLY3qampcjPViIhGjRqVKzB19epV6tq1Kw9QCsmLFy+offv24qA7NjaW9PT0qEWLFlS2bFm59ujIkSPf3byF/Zxvk5rnLM07cOAANW7cmHx8fMQk/0REJ06coAYNGuTayZnlj2z9+/r6ijs1p6Wl0eTJk6l58+a0Zs2aXMtSN23axPfWQpKzg7COjg5Vr16dli9fLs6+P3nyJKmpqZGenh5Vq1aNGjduzPfVAoqLiyN9fX2qW7cuTZkyhVRUVOR2U/0R2fGv7NJJVvJwUOo3IHvz+vDhA1WqVInatm1LDx8+FF/fvn07aWlp0eHDhyk5OZl69+5Ns2fPFv+Y+SZWMMuWLaN+/fqRkZERXbp0iYiyp53mJKTs378/+fv7U9euXalHjx5c74XI1taWdHV1ycTEhKpVq0bt2rWj0NBQIsqerdOxY0fq0aOHuDMKK7h3796RpaUldezYkVasWCG+7u/vT82aNaO//vqLbt26RUTZHYanT5/S27dvOShSQLJt/c2bN6lRo0YkCAJNnz6diOTrtU+fPtSxY8dc53GAJO9k62zVqlXUv39/ql27Nrm6uoqDdaLswFRO8vNPnz798DNY/sTFxZGtrS29fv2a3r59S/r6+jRp0iR68+YN9evXjwRBoOXLlxd3MX97326U8+eff5Kfnx8lJiZSZmYmmZmZkaGhITk5OdGLFy/o5cuXZGJiQiYmJjxToZDNmTOHdHV1admyZeIstNTUVBo/fryYY+p7+dK4b5l3OdeuVCqlFy9eUOfOnenq1auUnJxM9vb21LRpU7K3txeXZD9//pwCAgIoICBAvPdyvRdMdHQ0VahQgcqUKSPu3vy/+ouy7c3atWupUaNGP9zIiP3+OCj1G3FwcBCXaSgpKVGjRo3EwFRUVBQNGjSI1NXVqVatWtSgQQNxxgJ3Igpm3bp1pKmpSfb29tSlSxdSUVEhX19fysrKorS0NAoLCyN9fX1q3rw5GRsb8/KZQuTr60s6OjrikrHNmzeTRCIRg1JE2TvB6evrk42NTXEVs8SQ7XAdO3aMhgwZQn/++Se5ubmJr/v7+1Pz5s1pzJgx392Cna/7/Hn16pX4/71791JqairduHGD2rRpQ3Xr1qX3798T0X/bc0dHR+rRo0exlLWk+PZadXBwIE1NTVqzZg05OzuLS1ZzOs9ERH/99RepqanRwYMHiYjvr4UtZ3bO/PnzqV+/fmKCeXt7e6pZsybVrFmTYmNjud4LgbOzM1WuXJnOnj0rNzMwMzOTJk6cSM2aNSOJREKNGjXiHa9+gY0bN1KlSpUoIiJCvPfm/JuamkoTJkyg1q1b0+LFi3PNAmd5I3vNfvz4kV6/fk1jxoyRC/gtXLiQmjZtSg4ODt/dWZgDUgX3+PFjql69OtWuXZsaN24stu/ftilSqVSujffy8iINDQ25PMms5OGg1G/C3d2dKlSoQJcvX6bHjx/TpUuXqEmTJlSvXj1xF5r79+/ToUOHyM/Pj3cnKIBvG8fVq1fL5eWaOXMmKSkp0aZNm8SOQnp6OsXExPBMkUJmb28vJpQPCgqiChUqkIeHBxFlD15yltGcOHGCOwyFaNasWWRsbExdunQhDQ0NqlmzJjk5OYnvb9u2jQwNDalfv368DXshOH36NHXu3JmOHTtGNjY2JAiC+LQ2MjKS6tevT82aNaNnz57Rly9fKDU1ldq3b0/Dhg0r5pKXHHv37qU//vhDTOZ/+fJlEgSBateuTYMHD6bLly+Lxy5evJjbmwLKuVc+efKELl68SF++fJFL0N+7d2+563v69Onk5uYmpihgeZPTnhBl131SUhJ17dpVbkkk0X+XwEulUoqOjqZ9+/bRmTNnuE/5C0yfPp0sLS2J6L/1+u0y4sGDB9P48eM5CFtI5s+fT/Xq1aMqVaqQgYFBrhmvixYtohYtWpClpWWu91jefS+A/fXrV3rx4gU1bNiQGjZsKPbjc8guGybKDkiVL1+eQkJCfmVR2T8AB6V+E1ZWVt/dEcXAwICaNWuWa3tkIo7q54fsjf/QoUO0detWGjRokNzMHKLswFSpUqXI19eXvn79KvceP0UsuJxrd+TIkbRy5Uq6ceMGlStXjjw9PYkou47d3NzI29v7u+ex/NuxYwepqanRlStXKDk5md68eUNmZmbUokULuQGMl5cXjR07lq/3QnDv3j3q2rUr1ahRg9TU1HLlDIyMjCQDAwPS1NQU83o1bNiQZ8Pm06BBg2jGjBniz1KplM6cOUOrV68mouxE2mpqarR161bav38/lSpVioYPH07Hjx+X+xxubwomJCSEtLW1SVtbm2rWrEmbNm0Sc4Y4OzuTnp4ezZs3jyZPnkyVKlWiJ0+eFHOJf099+/YVr+0ccXFxVKNGDTF5s+y1nJyc/N2dDfl6z79v22ipVEpGRkbUt2/fXMekpaXRjRs3xP9zkuf8k62zffv2UaVKlWjLli00fvx4qlWrFo0aNUouYEuU3b8fM2YM13cByfYN9+/fTz4+PnT69GnxtaioKGrUqBE1adKEPnz4QFlZWTR69GhauXKleIy3tzcHpP5FOCj1mxg5ciQ1b95c/DnnqYqHhwcJgkCGhobi8g5+kpU/3+ZZKFOmDNWvX58EQaApU6aIybRzzJkzhwRBoEOHDhV1UUucHwU2AgICqHTp0iQIAgUGBoqvJyYmUvfu3cnBwaGoivivsXz5cmrRooXcd/L8+XPq0aMH6enpyS3ly8GBqfzLqbuFCxeSsrIytW7dmo4cOZLruMjISOrSpQsJgkBRUVHiedze501aWhodO3Ys13KYjx8/0ocPH+jz58/UoUMHWrVqFRFl3xfq1atHlStXpsWLFxdHkUuMrKws8T776NEjatKkCbm5uVFUVBRZWFhQ/fr1ycnJieLj4+nt27c0Z84cMjAwoI4dO1JkZGTxFv43tmfPHvF6l52V0LJlS7mgSE6bcuvWLVqwYAG9fv26SMtZUn17L83ZfczJyYmaN28uN1AnInr69Cn17NlTbnYm32MLJigoiBYsWECbNm0SX3Nzc6P27dvT6NGjc+Uklc0/xQrGzs6OVFRUqGHDhiQIAs2ZM0dcHhkVFUVNmzYldXV1MjQ0pNq1a4t9moCAABIEQW6lCivZOCj1D5fTIJ45c4Zq1KghF0Emyl5yMGXKFGrSpAl16dKlOIpY4ly6dIlMTU3p/PnzlJiYSI6OjqSnp0fOzs5i4C+Hu7s7DwoLSLazdeHCBTp06BA9fPiQvn79SlKplCZOnEhVqlShI0eOUFJSEj169Ih69OhBzZo147ovRDlPwTdu3EiNGzcWByQ538/p06epfPnyck/XibjTll859ZZTv4cPH6YjR45Qz549qVu3brR79+5cx1+7do1GjBghflc8UCkYNzc3MjIyknvt1atXVKdOHQoODiai7MSsY8eOpR07dnB959O3wY2rV6+Ss7MzTZ48Wa5ObWxsqH79+uTs7CwO3BMTE3PNRmY/59u22dXVlSZNmiTmIt2zZw9Vr16dpk6dKh6fkpJCPXr0IFNTU27bC4Hs9b1gwQIyNjYWc9TdvHmTDAwMaOjQoWKOuhcvXlCfPn2offv2PDOtkNy7d0/cwdPLy0vuvZzAlIWFRa52iq//grt16xZ16NCBrly5QhkZGbRr1y5SVVUlKysrsb6TkpLo77//JmdnZ7FPL5VKKSIigg4fPlycxWdFjINSv4nY2FiaPXs2tW7dmhYuXEhpaWn06tUr6t27Nzk6OtLhw4dJU1OTrl27VtxF/e3Idhq2b99OAwYMoCFDhsi9Pn/+fKpatSqtWrUqV2CKiGcrFIaZM2eSlpYWqaurU926dcUd9V69ekUWFhakqKhIVatWpUaNGlGHDh3EpUvcccuf7yWWJCK6c+cOlStXjmxtbSk5OVl8/+TJk9SrVy9ydXXlwXkBydZfbGwsZWRkiHV98+ZNMjY2pm7dutHevXvF43x8fORm9/B1n3ey9S6VSikwMJB0dXVp0KBB4ut37tyhRo0a0bRp02jHjh1kampKXbp04R1V88nR0ZEmTpxIKSkpYv3n7KTXokULMfiUw8bGhho1akSLFi3i7b8L6Nt22tXVlSpXrkxz5syh169fU0pKCrm7u5Oenh41btyYTE1NydDQkJcG/wL29vakra1NwcHBcn3IK1euUOfOnalGjRpUuXLlXEnlub3Ju+8tlQwMDKRmzZpRw4YNc616WL9+Pf3555+0bNmyoixmiff333/TX3/9RRYWFnJtUXBwMKmqqpK1tTU9f/4813l8zf97cVDqN/Lq1StasGAB6enpkZqaGlWvXp0aNmxIRNk3tho1aohPwNjPkb15RUVFka2tLVWpUoVq1aoltxsWUXbnumbNmjR//nxxxwiWf7J1f/jwYapfvz6dOXOG3rx5Q9u3b6euXbtS06ZNxQ7E1atXac+ePXThwgVeulRAsh0EHx8fmj17NvXr109cNnbgwAGSSCRkbW1NYWFh9ODBA+rZsydNnTqVB+cFJHvdL1++nNq3b0+NGzemrl27ikuU7t69SyYmJmRkZESLFi2i3r17U6VKlTgYWACydXfjxg1xB7c9e/ZQjRo1aMCAAeL77u7u1KhRI/rzzz+pU6dOPEDPJxsbG6pQoQLduXOHiOSXjo0bN460tLRo48aNuQJT48ePpzZt2nBQqpD4+/uLAW0/Pz/S0dEhGxsbio6OJqLs9sbKyopmzJhBK1asEO+rfH8tHBcuXKCqVauKM6RSU1Pp9evXdOTIEYqNjaWkpCS6fv06ubu706FDhzipfAF8myg+Z1dJqVRKu3btorZt25KJiUmuh8vBwcHcpylkzs7OJAgC1a9fP9fyyJCQEFJXV6e//vor13vs34uDUsVs586dedrhITk5mWJjY2nbtm0UFhYmNqKzZs2iVq1aUWxs7K8qaokje/OytramJk2a0JcvX8jd3Z3q1KlD1tbW9OLFC7lzpk2bRgMGDODBSSEKCgqi6dOn07Rp0+ReP3v2LLVv356srKy+ux0yD9ALbs6cOaSjo0NTp04lS0tLEgSBFi1aRETZgam6deuSjo4OVa9enZo1a8aD80Lk6OhIFStWpI0bN9KKFSuoT58+pKKiIk5Xv3PnDllYWFCHDh3I1NSUt2MvANnr1cHBgQwNDSkwMJDS09MpOTmZQkJCqEaNGnL5dZ49e0Zv3rzhAHg+bd++nSpXrky3b98mouyHChYWFnTu3DnxmMGDB5OBgQH5+/vLzcokolyzGVj+REZGkqamplxfZvPmzaSjo0MzZsygZ8+effc8HqAXnlOnTlHjxo3p2bNndPXqVZo9ezbVqVOHqlSpQm3btqXr16/nOofrv2CWLl1KRkZG1KtXL9q+fTsRZd8HduzYQR06dPhuYIqI6z2/ftQv2bhxo9iv/HaXPX9/fzI2NuY+DRNxUKoYhYeHkyAItHjx4lx/rD/y7WDw5s2bZG1tTRUqVKCbN2/+glKWfJ8/f6aRI0fK7azk5ORETZs2pRkzZtDLly/ljucEiAUjm0snIyODWrRoQYIgfDcnmp2dHTVu3JhSUlKKupglXlhYGFWrVk3c5SciIoIEQaAdO3aIx0RHR1NUVBTPTitkMTEx1LRpU7GzTESUkpJCU6ZMIRUVFXGgmJiYSPHx8eLfDNd9wSxZsoQ0NTUpPDxc7p6bkpJCISEhVLNmTbkZUzm405x3Tk5OpK+vT0RER44cocaNG1OjRo1ozJgxcgmcBw0aRAYGBhQQECDOamD5922/5NWrV6SiopJr84TNmzeTnp4ezZ49m+7fv1+URfzXefr0KZUrV47atWtHZcuWpQkTJtDOnTvp7NmzVLNmTTGfFCscbm5uVKVKFXJwcKBhw4aRgoICOTk5EVH230dQUBB16tSJmjdvzqseCoHs/fH8+fMUGhoqlwvK1dWVBEGgZcuW0ZcvX/7vZ7B/Lw5KFbONGzeSRCKhhQsX/vSMqW+3ODU3Nxenx7O82bBhA2lpaVHbtm1zzYpatWoVNWvWjGbPnp3raSIHpAouZ+lAcnIyDRw4kLS1tWnr1q1yT8x3795NBgYGPL23EHx70w8MDKRevXqJ/y9Xrhx5eHgQEdGXL1++O1Dhp4iFI2eQcvToUSL6765knz9/ppYtW9KiRYtIKpXKfWfcacs/qVRK7969o+bNm8sFAon+e02npqbSnj17qHTp0ryrZyG4evUq1a1bl4yMjEgikdCJEydoz5491KJFCzI3N5cLTA0bNoyqVKlCO3fuLMYSl1ytWrWigIAAIiK5++vmzZtJIpF8d0dVVjhy2u379++Tq6srHT58WFyumpGRQc2bN+ft7gvo23vj+vXraf/+/URE9PXrV1q7di1JJBJxoyipVEq+vr40ZcoUvq8WIltbW9LX16c6depQ27ZtqUGDBpSQkEBE2d+JIAi0YsUKDgSyH+KgVDHJWYpBROTt7S1G8v9fDgXZYEjODIfU1NRfU8h/gYsXL1LLli1JVVWVHj16REQkt1TM2dmZdHV1udNWyPz9/cnU1JSuXr1KRNkdZWNjY3GL8OjoaHr58iUZGRmRsbExBwEL0apVq+jVq1e0detWatq0KR04cIDKly8vBqSIspfe/PXXX3laWsy+70fXbqdOncjc3FwcoOQEoTp37kw2NjZFWcR/hVevXpGWlpY4I1Z2MJKSkkLv37+nrKwsOn36NAdfC0nOkuBWrVqJrwUEBHw3MPXXX3/R06dPi6OYJY6Liwvp6+tT3759afXq1VSzZk0aM2YMpaWl5eovHjx4kK/3X+zbwEdKSgp9+vSJevToQS1btuT6LwDZ++vevXspMDCQ2rRpIzfjOyUlhVxdXXPNmMrBgam8+7Zf4+bmRpUqVaIrV64QUXYbJAgChYaGyh0jCAJt3bq1SMvKfh8SsCJHRFBSUgIArFq1CikpKVBWVoajoyM8PDwQFxf3w/MEQQAAeHl5oW/fvrh79y5KlSpVZGX/nUml0lyvtWzZEp6entDW1oa5uTnS09OhrKyMjIwMAMDs2bPh5OQES0vLoi5uiZaZmYnPnz9j3bp1uH79OsqUKYN9+/ZBS0sLs2fPRtu2bTFjxgyUKVMGBw8ehCAI3/3+2P9HROL/N2/ejMWLF+Pdu3fo1q0bypYti379+mHRokWYMmUKACAlJQVBQUEQBAHq6urFVewSQSqVim12SkqKXNs+aNAgPHz4EK6urmLbnpmZiczMTFSsWLG4ilwi5Fzzste+iooKMjIycO3aNQCARCIR25Rbt25h586dSE1NRadOnaCgoICsrKyiL3gJkpKSggcPHmDcuHFISEjAiBEjAACjRo3CjBkzcP/+fXh7e+P8+fMAgK1bt6JWrVrFWeTfHhEhLS0NdevWxYgRI1C9enUcP34cRIStW7eiUaNG6NKlCyZNmoRJkyYhNjYWvXv35uv9F5NIsodaRISsrCy4uLjA1NQUX758wYULF7j+80l2TDR37lwMHToUq1evxpUrV3D27FmkpaUBAEqXLo1JkybB1dUVdnZ2CAgIEM8D/vv9sJ/z8uVLCIIgXrNEhLt372LBggUwNDTE/v37sWjRInh7e8PU1BRfv34FEcHa2hq7du3CyJEji/k3YP9YxRMLY0TZifjU1dXp4MGDFBISQvPmzRMTwn07vVE2Ku3l5UWqqqo85TcPZJ9EXb58mcLCwuj27dvi1NKIiAiqWbMmtW/fXpzF9m1ybX6alT8/egq1Y8cOat++PQ0fPpyuXbtGRNkzpvr3709Vq1YlX19fMZfU9xKds7w5efIkTZ8+nYKCgogo+3vx9PSkpk2b0qBBgygiIoL27dtHPXr0oIYNG4r5i3iWWv7IXvfLly+nrl27UpUqVWjcuHF0/PhxkkqlNHfuXGrcuDG1bNmSpk+fTm3atKH69etz7qgCkK33mJgY+vLli7gByOLFi0lbW5v8/PzEY9LT06l79+5kZmbG13ohy8kRtXnzZqpbty6NGDFCfG/Hjh1Uu3Ztmjx5MqWkpHDd59P/m+WRmppKXl5eZGJiQgEBAbR+/XoaMmQIDR48mPs0xSQqKoqcnJx4l71Ccu/ePerUqRNdv36dXrx4Qb6+vqSgoEALFiyQq9vk5GTatWsX13cBrFy5kgRBoFu3bhHRf9sfExMTWrt2LR0+fFguFURmZia5urqSt7e33Ofwd8C+h4NSRSjnjzcrK4uSkpKoXbt2tHTpUrljcqY3Ll++XOxIfxuQKl++PO3evbvoCv4b+3Y5pK2tLeno6FCNGjVIWVmZhg0bRidOnCCi7MBU7dq1qWPHjnLLK1nhCA8PpydPnsi9tn37dmrfvj0NGzZMTNSfnJxMRkZG1KJFC9q3bx8nOS8EJ0+epAYNGpCmpqZcwtvU1FTy9vamTp06UZkyZahly5Y0YMAA8frnQUvefZusOWeXvbVr15Kbmxu1atWK2rdvT4GBgUSUvcvhuHHjaPDgwTR9+nSxs8Z1n3ey98ply5ZRp06dyMDAgLp06ULnzp2jz58/0/Tp06l8+fJkbm5OkyZNoo4dO1KDBg14Z8lfKCEhgXx9fUlfX18uMBUcHPzD3d/Y/ycbkNq8eTNNnjyZpk+fThs3bpQ7LjQ0lMqWLUuvX7/OdR63M/mXn7bi23O4vSmYFStWkKmpKQ0dOlTu4aW/v/93A1M5OCiSP9evX6cBAwaQnp6e2GfPysqihQsXUrt27XKlgnj//j2ZmpqSi4tLcRWZ/UY4KFVEZG88Dx8+JCIiAwMDMfFeenq62FEYMmQIqaqqkr29PcXHx4vnrV+/ntTU1HiG1E9q2LAh2dvbiz97enqSpqYmnTlzhuLi4ujQoUPUvXt3MjU1pYsXLxJRdoNbrlw5mjJlSnEVu8SQ7fhGRkZS1apVycrKip4/fy53nJ+fH6mqqtKIESPE7yE5OZl69uxJtWvX5p1pCkFcXBzZ29tTpUqVaPjw4XIdspy26f79+/Tlyxfe6a0AmjZtSuvWrRN/fvr0KTVs2FBMukpE9OzZM7KwsKB27dpRVFSU+LrsPYLrvmDmz59PFStWpL1799Lp06epffv2pKKiQvHx8RQTE0M7d+6krl270pAhQ8jGxkasb673XycxMZF8fX2pQYMG4gYLrHDMmTOHNDU1acSIEWRqakpKSko0duxYMeD04sULqlGjBt29e1fuPA6I5J9s/+bt27cUFxcn7iz2v+pV9rx3797xd1BAwcHBJAgC6enpiXlhc/j7+5OysjJNnz6dg6+FKCoqiszMzEhHR0fsw9y/f5/++OMPql+/PkVERFBaWhq9evWKevbsSa1ateJ7K/spHJQqArI3HSsrKypfvjwREdnY2JCenh69fPmSiP77xGrGjBnUvHlzateunXjuqVOnqFKlSrw7zU9avHgxNWrUSK4DMHnyZDI3N5c77vTp09S8eXOaPXs2EWV3GB48eMA3sAKSrff9+/dTXFwcrVu3jlq0aEHTpk3LFZhq3Lgx6enp0aJFi8SbV3JyMg0YMICfpOfRt8s5ctqQ+Ph4mj9/PjVu3Jjs7e1zLR3gxJ8FY2trSwYGBnKzLGNiYqhatWpiu51Tr69fvyZtbW1ydXUtlrKWZO/evaN27dpReHg4EWUnclZTU6MNGzYQ0X+v82/beG7zf73ExETy8PAgQ0NDevPmTXEX57f17Rbs2tradObMGSLKbs/Dw8NJTU2NLC0txeO0tLRyLaFh+fPt0uy2bdtS3bp1acCAAeLmLd+7h8reY93d3WngwIH/d3Mj9l8/6pccPnyYBEEgS0tLiomJkXvPy8uLOnTowMG/ApKt+8DAQJo/fz4JgkDVq1cXZ0xFRkZStWrVqFGjRqSjo0Nt2rQhQ0NDnnnPfhoHpYrQ48ePydzcnE6dOkVERHfu3CFjY2Nq06aNGJhKT0+nvn370vnz5+Ua0Vu3btH169eLo9i/pZkzZ1KzZs2IiGjWrFnk6upKkyZNogEDBhCRfAO7evVq0tDQyJXHixvQ/JG9bh0cHEhLS4s8PT2JKHtHjiZNmtD06dPFwFR0dDSNHz+etmzZIn4vvHwyf2Tr3tPTkywtLcna2poOHz5MRNnLaObOnUuGhobk4OAgXuPcYSu4ESNGUP/+/YmIyN7enkJCQujjx4/0xx9/0Lx584gou03JucZ79epF06ZNK7byllSPHj0idXV1iomJEfNb5LQ/SUlJ5OLiQq9evSrmUv57JSUliTNKWN59O6Py4MGDVLNmTXEXzxwhISGkrq5Op0+fpsTERFq0aBH3aQqBbP3PmzePNDU1aefOnbRnzx7q0qUL6ejo0Pnz54lIvp8pe563tzeVK1dOzO/I/j/Zurxz5w6dO3eOXr58KS6XDwkJIUEQyNramt6/f//dz+B+TsHNmjWLqlevTqtWraIpU6ZQgwYNqEqVKhQREUFERM+fP6fQ0FByc3Ojo0ePct40licclCoi27dvp7p161Lr1q3lnowcPXqUTExMqGzZstSlSxeqV68e1a1bV/wD5hkLeZNz0zl37hzVq1ePGjVqROXLl6fo6Gjy8fEhRUVFscOQY+fOndS6dWv6+vVrcRS5xFqyZAlVqlSJrl69SnFxceLrHh4e1KZNGzI1NaXVq1dT9+7dqXv37uJ3x9d8/sjWm729PVWoUIH69OlD3bp1I0EQaO7cuURE9PXrV3JwcKA2bdrQ1KlTub4LKOe63b17N+nq6lL79u2pXLlydP/+fSIi2rp1K0kkErk8L6mpqdSsWTP6+++/i6XMJcX3ZvfFx8dT//79afbs2aSqqio3OyQqKor69etHx48fL/KyMlZQJ0+epICAACIimjRpEs2YMYMiIyOpbNmydPToUbljHz16RNra2rmWv3NgKn9evHgh9/OxY8eoadOmYsqBw4cPk6qqKjVv3pw0NDTowoULRJTdLsneYzkvbN7JtvN2dnZUp04dUlVVpQYNGlD//v3pw4cPRJQdmJJIJGRjY0Pv3r374Wew/Hn48CHVqlWLDh06JL528eJF6tWrF+no6NDt27eJKHddc5vDfhYHpYrIpk2bqHXr1qShoUGfPn2Sey82Npa8vb1p3rx5tHTpUk50W0hMTExIEATq2bOn+Nrw4cNJQ0ODDh8+TC9evKC4uDgyNjam3r17802rEH369Im6desmdqDfvHlDJ0+epIkTJ9KuXbto+fLlNGrUKLFTwUmGC09UVBSNHTuWrly5Ir62bds2UlBQoBUrVhBRdo4pS0tLmjBhAtd5IWrXrh0JgkATJkwQX8vMzBR3rBkyZAiNHTuWunTpQgYGBvz0sABkB3ppaWlyGyJMnTqVBEGQm4mWkJBApqam1L17dw7Est+KVCqlr1+/krGxMXXq1In69OlD5cuXp5s3b1J8fDz17duXBg4cKAZCiLI3eWnQoIGYz47b+fzr06cPjRs3Tu61mzdvkp2dHRERHTlyhDQ1NcnT05Pu3LlDtWrVIi0trVzB75yAFOeFzZ9169aRhoYGHT9+nO7fv08+Pj7UoUMHatWqlbgx1L59+0gQBFq7dm3xFrYE+LbNiIyMpFKlStHZs2flXj9+/DhpaGhQ7dq16caNG0VZRFbCcFDqF/jezT8zM5OCgoKobt26ZGRkJAamftRR4MFKwXz69Il69+5NS5Ysofr164s7/mRlZZGFhQWVK1eOqlatSvXr16cmTZpwUKSQff78mXR0dGjevHl05swZGjZsGBkaGlKLFi1IW1ubNmzYQJmZmfTp0ydOrF2IgoODqWrVqvTnn3/Ss2fPSCqVivXr6elJKioq4vr/pKQk8T2+7gsuMjKSOnToQDY2NlS9enVxZlqOI0eOkJmZGQ0aNIisra354UMhWb58OXXu3JlatmxJNjY24kzkAQMGULVq1cjMzIxsbGyoY8eO1LBhQ7Gt58AU+918+vSJ6tatS4IgiJvkEGXnbTQyMqJOnTrRunXraP/+/WRsbEzNmjXj9qUQfPjwQdzZLSf4QZT9fWRlZVGfPn3k2vuePXtStWrVqEePHuJrmzdvprJly/IMqXxKTU2l4cOHi0vhibLb8GPHjlGrVq3I1tZWvKeeO3eO+5O/QGJiInXo0IHmzp0rt1w4MzOTOnbsSJqamtS7d+9iLCH73UnACpVUKoUgCACAFy9eIDo6GjExMVBQUMDgwYPh6OiItLQ0/PXXX4iLi4MgCMjIyMj1OYqKikVd9BJFQ0MD+/btw/z58zFr1izcuHED5ubmkEgk8PX1xZ49e7Bu3TosWbIE169fh5KSEjIzM8XvjhWMuro6lixZAg8PD/Tp0wfVq1fH8uXLce3aNXTt2hVXrlyBgoICNDQ0IAgCpFIpX/OFQFFREQYGBnj58iU+ffoEQRCQmZkJAOjevTsqVaqEd+/eAQBUVFQgCAKIiK/7QtCkSROEh4dj6dKlmDx5MrZt24b58+eL7/fo0QN+fn4ICQmBm5sbFBUVkZmZCQUFhWIs9e9HKpWK/1+5ciWcnJzQoUMH9OzZE9u3b8eAAQNw584d7NmzB5MmTUJWVhbevHmDjh074saNG2JbL5Fw94f9XiQSCWrXro0OHTrgxIkT2LZtGwCgb9++sLW1RcOGDbFgwQIsW7YMCgoKuHz5MhQUFJCVlVXMJf99ZWVlQVNTE8rKynB1dYWxsTHu3bsHILuf+eHDB9y8eRM1a9YEAHz58gXlypWDt7c3Dh8+LH6OqqoqAgMDMXDgwGL5PX43su08AJQqVQoJCQm4f/+++JpEIkG3bt3QrFkzXLp0SezHtG/fXry/soJZtWoVpk6dCgAoW7YsWrVqhSNHjmDnzp1IT08HACQkJKBixYrYsmUL9u/fX5zFZb85gYiouAtRUkilUrGju2TJEhw6dAgfPnxAvXr1MGXKFPTt2xeZmZkICgqCl5cXNDQ04Ofnh4oVKxZzyUu2pKQk7Nq1C6tWrULz5s2xffv2XMdkZWXx4PAXePXqFdLS0lCnTh0A2X8j3bt3R+vWrbFs2bJiLl3JdOLECSxYsABfvnxBUFAQGjZsCAD48OEDWrZsiXXr1qF///7FW8gSRrbtB7LresuWLVi/fj3GjBmDJUuWAAAyMzPF4CsHAwvm+vXrOHXqFAwMDGBqagoAePfuHbp3747KlSvj5MmT3z2P23r2u4uJicG4ceOQkpICCwsLmJubi+9FR0ejTJkyqFChgvhQgh/4FI43b96gSZMmaNKkCdzd3VGvXj0AwPDhw3Hnzh1YWloiJCQE6enpOHv2rBgQ5PYmb2Tvp7du3YK2tja0tLSwbNkyHDhwAG5ubmjZsqVYrxs3bsSWLVtw5MgRlC9fvjiLXuJs2bIFY8eOxezZs+Hk5AQAMDc3x507d1C9enUYGhriyJEjkEqlOHfuHBQUFHL1hxj7acU7UatkcnR0JE1NTdq/fz+dPHmS+vbtS6qqquK24BkZGRQQEEB//vknzZ49u5hL+++QmJhIfn5+ZGBgIJdjihWNhIQEOnfuHPXu3ZsaNmzIU6t/AdkleEePHiVjY2OqWrUqbdmyhbZu3Uq9e/emBg0a8HKOIvL+/XtatWoV1axZk3fZK2SnT58mQRBIRUVFTOScs7zm5cuXpKKiQj4+PsVZRMZ+qWfPnlGvXr3I2NiYNm/eLC6hcXBwEI/hJar596O6e/PmDWlpaVHnzp0pKiqKiIjOnz9Pw4YNo4YNG1Lfvn15iXAByPZj5s6dS82aNROXPH78+JEMDAzIyMiIwsPDKSEhgb58+UJdunShYcOGFVeRS4wfXa9BQUGkrKxMM2bMEF/z8PCg4cOHU4cOHWjkyJF8zbNCwTOlCgHJPPE+e/YsZsyYATc3N7Rr1w5hYWEYOnQomjVrhoiICGzZsgWDBg1CRkYGTp48iW7duvFTlCKSlJSELVu24MKFCwgICOBIfhEhIpw5cwYuLi7IyMjAwYMHoaSkxE8QfwHZtuj48eNYuHAhrl+/DhMTExgbG2PChAkoXbo0130R+fDhA9zd3XHv3j3s3r2bZ0bl07dPXt++fQsfHx+4uLjAzs4Ojo6OICJkZWVBIpGIy/lkl08yVtI8f/4cs2fPxv3795GWlgYVFRVERERAWVm5uIv2W5O9j+7duxcvX75Ey5Yt8ccff0BLSwtv3rxBixYtULduXWzevBl//PEHAODTp09iSgKeoVYwS5cuxfr167Ft2za0aNECGhoaALLvqb1790Zqaio+fvwIXV1dpKWlISIiAkpKSjwDuRBcv34dLVq0kHstKCgIo0ePhrW1NVavXi2+npycDBUVFQDga54VXPHFw0oG2ahwQkICffjwgebOnUtSqZSOHj1KlStXJi8vL3r27Bk1atSIVFRUaMuWLXKfwTMXik5KSor4JIYj+kUnNTWVbty4IdY5z5T6db6dMdWvXz9q06YN3b17l4iyvwtWdD5//sxtTgHI1tm+ffsoMjKSiIiio6PJwcGBJBIJeXh4iMdkZGSQvr4+/f3330VdVMaK3Lt37+jgwYO0adMm8b7K99f8k71/zpkzhypWrEh//PEH6erqkrW1Nd27d4+IiF6/fk1VqlQhIyMjun37ttxncDuff1KplKKjo6l58+bk7+8v917ObNivX7/SiRMnyNXVlQICAsQxFF/3+SM7Br1y5QoJgkCurq65jvPx8SFBEMRdnGXxZjmsMPBMqQKQfXrr4uKCJ0+ewMHBAZUrV0bp0qUxdOhQ1KxZEytXroQgCBg6dCju3bsHPT09hIWFAQBH9IsJ8dOUYsPrzfMnL/Ume30fPnwY69evR0JCAtavX4/GjRv/ymKWSD+q+//3nci+z21O3snWmYODA7Zt24YVK1agX79+qFChAqKjo7Fu3To4OTlh5MiR0NbWxtOnTxEVFYV79+7xU1v2r8OzYAvH1atXsWjRIixatAgtWrSAh4cHAgMDoa+vj9mzZ6N+/fp4+/YtqlatiqlTp8Ld3b24i1xiPH/+HIaGhti/fz/atm0rdx9NSUlBYmIiNDU15c7h6z5/Pn/+LM5Cu3btGmrWrInNmzfD0dERLi4usLa2Fo+NiopCx44d8fnzZ7i7u4sJ0BkrLDwyLICcRtLOzg6rVq1Cx44dIZVKUbp0acTHx+PGjRtQV1eHIAhISEgAkL1bUFhYGARB4AFKMeK6Lz4ckMo72U7Z+fPnERcX9z+Pz9lVDwBMTU0xc+ZMCIKA2bNnIz09Hfws4ucRkVj3OUGRLVu24NOnT5BIJD/c2Ur2vDt37nCbkw85dbZixQpx58KhQ4eiQoUKAIAqVapg3rx5sLOzw8GDB3H58mXMmjVLDEjxrmPs34YH5vkju9vbjh07sHr1aqipqaFFixaQSCSwsrLC6NGj8eDBA6xevRpRUVHQ1dXF+/fv4erqWnwFL4EqV64MJSUlHD16FADk7rMRERHYu3cvEhMT5c7h6z7vTp06BTMzM7x79w42NjYYPHgwFBUVYWNjg+XLl2P69OlywVY1NTWMHDkS4eHhmDRpUjGWnJVU/BixgE6cOIHg4GDs3bsX7dq1E18vX748TExM4OPjg9TUVJw6dQqpqakwNTWFIAg8W4Qx9lNkgxvz58/Hjh074OzsjF69eqFUqVL/89ycdqZatWqwtrZG27ZtOd9IHsjO1JkzZw62bt0KHR0dZGRkwN/fH9u2bYOurm6up7Sy523YsAEzZsxAVFSUmHuE/byEhAScPn0a8+bNQ+vWrfHmzRs8fvwYfn5+MDAwgJmZGRwcHKCsrIx169YhKioK7du3R2ZmJg9UGGP/l+w99uHDh7h06RLOnDkDDQ0NfPr0SZyVM2nSJAiCgG3btmHevHlYu3YtatSoAYBn6hQWqVSKMmXKYNy4cdi/fz90dXUxceJEcSfDpUuXomLFipgwYUJxF/W3FxMTg9TUVBgZGeHjx4+4evUq1NTUAECcIWVjY4Nnz57B0NAQAQEBICJ07dqV86axX4KvpgJ69eoVVFRUYGBgIL6WMyAxNzeHiooKDh8+jOrVqyMwMJC3y2SM/TTZtmLBggXYtGkTduzYgcaNG+cKSH1vqZggCHB1dYWvry8OHDgAXV3dIv8dfmc5gaXnz5/j3bt3OH78OOrVq4ejR49izZo16N+/P/bt2ycXmJINSHl7e2PBggUICAjggFQ+ZWZm4sOHD3j37h2CgoKwe/dufPjwAWlpaYiKisKHDx/g4uKC8ePHIzMzEw4ODkhJScH06dOLu+iMsX842fumtbU1IiIiEBoaCj09Pfj4+MDJyQkzZ85ElSpVAAATJ05EUlISHjx4gGrVqomfwwGpwpHzXZiZmSE2NharVq3C8ePHoa2tjRs3biA+Ph6HDh0SZ4PzDOS8y+mrjBgxAmfOnMHp06fRuXNnubosXbo0Zs6cCV1dXcyaNQsnT56Empoajh8/LtY9B6RYYePISD7lLH9JSUmRWyJAROJ7MTExGDVqFC5duoRdu3ZBSUkJmZmZHJBijP1Pnp6eAP7bQXvz5g0OHToEDw8PGBkZQSqV4s6dO1i2bBlCQ0ORlJT03dxF3t7eWLx4MebOnSs+0WV5s337dvTu3Rvv379HjRo1oKSkhN69e8PBwQGqqqoYMGAA3r59CwUFBWRkZMjVva2tLXx8fDB06NBi/i1+X+rq6hg3bhwCAwMxZcoU1K1bF0uWLMHly5fRuHFjfP78GQBQtWpVWFtbw9zcHC4uLvjy5QsvU2WM/U859824uDhER0dj6dKlUFdXh62tLczNzXHy5Em4ubkhJiZGPGfGjBnw8vKCRCKRW/bHCk/dunUxb948LFu2DB8/fkR0dDRatGiByMhIcSzFAam8k0qlYgB1165d0NbWxsaNG6GkpIQZM2bg9u3bcseNHDkS9+7dQ3h4OE6fPs11z34pjo7kU84fpJGRER4/fiyuKRcEARKJBAkJCfDz88O5c+egoKDAkWXG2E/x9/dHeHi4XLA7MTERr169gpKSEk6ePAl7e3uMGTMG3t7esLe3x759+wBArrOQExTZtGkThg8fXhy/ym+PiJCcnIxy5cohKioKSkpK4nvGxsaYO3cuKlSogLZt2yI2NlZ838vLCw4ODvD19cWgQYOKq/i/vZygkrW1NU6cOIHIyEgsW7YMnTp1ApAdrFVXVxeP09bWhr29PSIiIqCmpsYdZ8bY/+Xm5oZmzZrh48eP0NfXF19fuHAh+vbti/DwcLi7u+Pt27fiezl9en7I/OtUrVoVI0aMwMmTJxEcHAxXV1coKirysrF8kr1e7e3tMW/ePGhqamLcuHEYPXo0EhMT4ejoiDt37ojHhYeHQ0VFBVpaWmLqGa579qvw7nuFwMfHB1ZWVpgyZQp69+4NZWVlrFixAjExMYiIiOA/YMbYT4uLi0P58uWhoKCAkydPokuXLgCA4cOH4/jx40hOTsbkyZPRvXt39OjRA4aGhjA1NcWiRYvEz8gJSHFQJG++txwgMzMTISEhWLx4MWrUqIEdO3aIeRcAIDQ0FEePHsXatWuhoKCA06dPo0uXLggODua6/0n/a5Dx7XcSHx+PO3fuYOXKlXj58iUiIyOhqKjIy+IZY/ly+vRpWFtb4+3bt7h58yaqVauG9PR0Mf/i0qVL4ePjg3nz5mHy5MnFXNrf14+W2/2vZXg5q08kEonc/1n+LV26FG5ubggNDcWff/4p9mf2798PLy8vEBEsLS3h6emJ9+/fIyIigh/wsCLBQalCQEQ4cOAApk2bhqysLKipqUFXVxeHDh2CkpISJ0BkjP0U2c7ZhQsXMGjQIIwcORJr1qwBABw9ehSamppo1qyZeE6XLl1gamqK2bNnAwD27duHESNGYPv27Rg4cGDR/xK/KdmgxsuXL1GqVCkIggAtLS1kZGQgKCgInp6eqFixIgICAsQd4L79jNTUVNy/fx/Nmzcv6l/htzN79mw4OjqiQoUKP32fPHfuHOzs7KCuro59+/bxPZYx9tO+F7zOyMjAtWvXMHz4cNSpUwcnTpwQX8+Z/err64vRo0dzO5NPsvUeExMDQRCgpKQEDQ2NXO/L4rxRhevz588YNmwYxowZg1GjRuHt27d49OgRAgMD0a1bN7x58wYXL15EREQEatWqhaNHj0JJSYm/B1YkOChViD5+/Ij4+HhIpVLUrl0bEomEp5kyxn7Kt52y9+/fw8fHB8HBwejRowecnJzE9xITE/HmzRvMnj0br169wo0bN8R2Jjo6Gg8fPkTnzp2L+lf4bcnW/dKlS3Hw4EHExsaiXr16mDp1Knr16oWMjAzs2LED3t7e0NTUhJ+fH9TV1Yu55L+vqKgo9O7dG+rq6jh16hTKly//0/fLW7duoWHDhnyPZYz9NNl2PiIiAvHx8ahWrRp0dXVRpkwZXLx4EYMHD0ajRo0QFhYGAHIzpgDeZS8/ZAMaixcvxsmTJ/HkyRO0atUKPXr0wMSJE//vef7+/oiJiYGtrW2RlbskiouLQ4MGDWBhYYHu3bvDw8MDz58/h1QqxZs3b7Bw4UIMHz4csbGxPI5lRY7nQBaiSpUqoXbt2qhTp46YAJH/kBlj/49sZzkwMBCXL1+GlpYWpkyZguHDhyM0NBT29vbi8YcPH4aFhQVSUlLEJcJZWVnIyspClSpVOCCVR7I7HLq7u2PevHnYtGkTFBQUMGrUKOzZswdKSkoYMWIEpkyZgnv37mHlypXFXOrf259//oktW7ZAUVERHTt2xNevX8V8If8LEaFx48bicg6+xzLGfkZOO29nZ4f+/ftjzJgxaNSoESZPnoyLFy+ibdu2CAkJwd27d9GrVy8AkAtIAbzLXn7kBJYWLlwINzc32NnZITg4GFlZWZg+fTqePn2a6xzZgJSXlxesrKxQv379Ii13SaSuro4lS5bAw8MDffr0QfXq1bF8+XJcu3YNXbt2xeXLl6GmpsbjWFY8iDHGWLGRSqXi/+3s7KhKlSq0YcMGiouLIyKi9+/f0/Lly0lfX5/s7e2JiCgrK4sOHTpEmZmZRESUkZFR5OUuaU6fPk3Nmzen8+fPExHRkSNHSFVVlTp06ECqqqq0d+9eIiJKS0ujI0eOiHXP8i49PV38f1hYGDVq1Ig6dOhACQkJRPTj61n2b+X8+fP08OHDX1tQxthv7fjx43LthqenJ2lqatLJkycpNjaWdu7cSV27dqW+fftSREQEERFduHCBFBQUaObMmcVV7BLn3bt31LFjRwoLCyOi7HZfVVWVNm7cSETy94SsrCzx/15eXlShQgUKCQkp2gKXcC9fvqRHjx6JP2dlZVHXrl1p3rx5xVgq9m/HQSnGGPsHWLlyJWlqatL169fFDlpO5yw+Pp5WrFhB9evXpylTpsidx8GRwvHkyRNycHAgouwOc+XKlcnLy4uePn1KDRo0oHLlylFAQIDcOVz3eSc7QFyxYgUNGDCADAwMSBAEMjQ0pPj4eCLKHZiSPW/9+vWkqqpKN2/eLJpCM8Z+O82bN6c2bdpQVlYWZWZmklQqJTMzM5owYYLccWFhYdSkSRNydHQkoux2/fbt29y+F6K3b99S1apV6cmTJ3Tw4EEqV64ceXp6EhFRamoqbdiwge7duyd3jre3N5UvX54DUr9QQkICnTt3jnr37k0NGzbkB5ysWPHyPcYYK2ZpaWm4cuUKHBwc0Lx5c0RHR+PIkSPo168fFi5ciNevX8Pa2hp9+/ZFfHw8SCYVIC8nyDupVJrrtdq1a4v5KjZu3AgLCwtMnDgRtWrVgr6+PvT09ODv7w8AYv1z3eddzpKMNWvWYMWKFbC0tMSOHTvg6emJlJQUdO7cGQkJCeKSVEB+KYe3tzccHR2xadMmNG7cuNh+D8bYP1dQUBASExNx9uxZSCQSxMbGim1IYmIiAIjti4mJCQYMGICNGzciMTERCgoKaNiwIRQUFMRj2M/73v1VWVkZdevWhYeHB8zNzeHs7CzuZPj06VMcO3YMb968EY/38fHBtGnT4Ofnx7vY/iJEhOvXr2PVqlXIyMiQSwXBWHHghaKMMVaMiAhZWVl49OgRSpcujV27dmH79u34+vUrSpUqhd27d+Pz589wd3fH7NmzoaGhAUEQeDeUfJLN33X16lV8+vQJampqaNCgAdTU1BAbG4vIyEi0atUKgiDg69evEAQBzs7OYp4Rrve8+zaR/9WrVzFp0iR069YNAFC/fn3UqFEDU6dORc+ePXH06FGULVtWbgcsb29v2NrawtfXlwcqjLEf0tLSwrNnz3DlyhUcOnQIZ8+exYULF9CkSRPMnz8fkZGRaNq0qXh8zZo1xcTOsvjBQ97ItvMuLi74+vUr5s+fj0qVKqFDhw5YtGgRJk6cKAakEhISMGfOHKSnp6Nr164Asjd5uXTpEgIDA3kH4V9IEAS0adMGS5YsEfM0clJzVpz4ymOMsSL07eBcEASoqKjA3d0dFhYWOHnyJCZOnAhjY2N06NABc+fOxb179wAAFStWBMDbJBdETt3b29vjwIEDSElJQa1atZCYmIjQ0FBoamrCxMQEXl5eSEtLw7Fjx5CWloaePXtCEIQfbl3NfoyIxDo7dOgQunTpAgCIjIwUj1FQUICJiQn69OmDdevWoWHDhrhz5w7Kli0LIDsgZWdnxwEpxtj/1aRJE0ydOhV9+vRBZmYmoqKiAACzZs3ChQsX0KtXLwQFBaF27dooX748/P39UalSJZQpU6aYS/57y2nnbW1tERgYCBsbG8TExKBq1apYsGABPnz4gM2bNyM5ORkSiQQvXrzAp0+fcOPGDSgoKICIoKWlBRcXF2hoaBTzb1PylSpVSgzOclJzVtz46mOMsSLy7S57jx49QmZmJnr16gUjIyNERkYiOTkZurq6ALKXF9y4cQO1a9eW+xwOSBWMu7s7fH19sX//frRp0wYLFy7E0qVLce3aNfTs2RPjxo1DVlYW9u/fjxo1aiAwMBAKCgockMoH2QDqkiVLEBAQgP3798PY2Bje3t7YvXs3+vXrJ3aGDQwMMHDgQFStWhWlS5cGAFy+fBlz5szhpRyMsR+aNGkSTExMMHDgQKirqyMtLQ3x8fGoXLkynj59Cj09PQDZAe7p06ejd+/e0NDQgKqqKiQSCa5fv86zkAvB1q1bsWXLFoSHh6NJkyYAslMUCIKA9evXo0WLFjh//jxSUlLQvXt32Nraijuv5sxM44BU0eO+DStuAskmJ2GMMfbLzZ49GwEBAahfvz5SUlJw5coVrFq1CtbW1ihdujS+fv2KCxcuwMPDAy9evEBkZCQUFRW5s1wIMjMzMWHCBDRu3Bg2NjY4ePAgRo4cibVr12L8+PFIS0uDRCKBkpISEhISUK5cOQiCwNPaC+jevXtYsGABLC0t0bVrV3z+/BnDhw+HVCqFubk5hgwZgrS0NFhYWKBRo0ZYsmSJ3Pl37txBw4YNi6n0jLF/sufPn8PX1xcLFiwQl/s6Ozvjjz/+wNGjR3H48GF4eXnB1NRUPCcsLAxfvnwBEWHo0KFQUFDgdr4QODo64u3bt/D19cW9e/dw+vRpeHh4QElJCVZWVhg/fnyuvkxWVhYvlWTsX46DUowxVoSOHDmCMWPG4MiRI2jatCkEQYCbmxtmzpwJLy8vjB8/HpGRkVi8eDGICCEhIVBSUuLOciEaMGAA+vbti8qVK2P48OFi0tWsrCxs2rQJKioqGDlypNhJ5mBgwfj4+MDLywuCIGD37t2oUaMGACAmJgZTpkzBs2fP8Pr1a+jo6EAqleL27dtiEFYqlfJghTH2Q4mJiShXrpx4j/T19YUgCLCwsACQvUzY3d0dJ06cgKenp1xgShYHRgqHk5MT7O3tsXjxYuzatQt//vknDA0NcffuXZw/fx4RERE8E4oxlguPcBhjrAh9+vQJurq6qF+/vjjgnjZtmpjws3v37mjatCnWrl2L6tWrc/LJAvjecrusrCzo6OjA1dUVr169gpOTk5h09ePHj9i7dy9MTU3lBicckCqYDh06YMOGDbh//z4uXLggBqW0tbXh7++PZ8+e4cKFC6hQoQKGDRsm7gCkoKDAg0TG2A/NnTsXJ06cQFhYGNTV1fHhwweEhobi9evXyMjIwMSJE9G0aVNMmzYNgiDA0tISnp6e6NmzZ67P4rYmb360nH3OnDmIjY3FoUOHMGHCBHTv3h36+vq4ffs2Hj16hISEBA5KMcZy4ZlSjDFWhAIDAzF27Fi8ePEC2tra4u5id+/ehYmJCXbt2oV27dqJx3Meo/yRrbdbt25BRUUFEokEtWvXRmxsLDp06IDMzEyEh4dDU1MTiYmJGDduHOLi4nDu3DkOAubTj67X58+fY8CAAVBXV8eiRYvQqVOnH34Gz1hgjP0/RARfX19s2bIFampq2Lp1KzQ0NHDv3j2sWbMG9+/fx+jRozFp0iQA2feB9evXw9/fH6dPn0abNm2K+Tf4fcm281u3bsXdu3chCAJMTU3RuXNnANk766mqqgIAMjIy0LdvX0gkEhw6dIgf9DDGcuGgFGOM/QI/mt0UFxeHPn36QEtLC25ubmJS8+fPn8PExAS+vr5o3759URe3xLK1tUVAQIA442z69OlwcHDA/fv3YWxsjAoVKiAxMRF6enpIT0/HxYsXoaSkxIGRfJAdqDx48ABfvnyBgYEBlJSUULp0aTx8+BCDBw+Gnp4eHBwc0LFjx1znMcbYz5JKpdi5cyfWr1+PChUqYNu2bahYsSLu3bsHZ2dnPHr0SC4wdf36dRw7dgy2trbcvhcCOzs7+Pn5oW/fvnj48CGysrLQv39/2NraAsgOTIWEhGD79u34+PEjrl27BiUlJW7zGWO5cFCKMcYK0cePH1GpUiXxZ19fXzx58gRlypRBly5d0K5dO+zZsweurq5QUFDAokWLkJWVBRcXF3z69AkXL17kzloByOZ/OnnyJMzNzeHv7w+JRIJ79+7BxsYGVlZWcHV1RXx8PEJDQxEXF4caNWqgR48enOw2n2Trff78+QgODsanT5+gp6eHMWPGYMSIEdDS0sKDBw8wdOhQVKtWDdOnT4exsXExl5wx9jvKaXOkUimCgoKwYcOG7wamHj9+jDFjxmDChAly5/ODh4Lx8fHB33//jZCQEDRv3hxBQUEwMzODgYEBhgwZgvnz5+Pz58/YvHkzHj16BE9PT3GXPb6/Msa+xUEpxhgrJMOHD8eXL1+wadMm6OnpwdHREWvXroWxsTEiIiKgrq6OXr16YcWKFQgLC8P69etx9OhR1KtXD5UqVcLRo0d5lk4h8ff3x9WrV6Guro6lS5eKrx84cAD9+/fHpk2bMHbs2Fzncd0XzLJly+Dh4QFfX1/06NED/fr1w61bt2BmZgYrKytoa2vj4cOH6NixI8zMzODi4lLcRWaM/Ua+t/FEVlYWgoKC4OHhIReYioqKgouLC86dOwdnZ2f069evmEpdsmRlZcHJyQlEhLlz52Lv3r0YO3Ys7O3tce/ePZw4cQIzZ87ErFmzkDPMFASB76+MsR/ioBRjjBWS8+fPw8TEBP369YONjQ1sbW2xYsUKtG3bFklJSXB1dcXevXvRv39/zJ8/H0D2MidVVVVUqVKFk5oXkufPn2PixIm4dOkSxo0bh3Xr1iErKwtEBEVFRUydOhUPHjzAwYMHUapUKe4kF5KoqChMmjQJtra26NOnD44dO4ZBgwahVatW4jIaS0tLaGtr49WrV9DV1eW6Z4z9NNllXy9evECpUqUgCAK0tbWRmZmJoKAgeHp6ygWmbt26hUOHDsHe3p7bm3ySDQTm/D82Nhbp6enIyMiAqakpxo8fj5kzZ+LmzZvo2rUrypYtiwULFmD8+PG5PoMxxr7Fa0QYY6wQZGVloX379jh9+jT27NmD+fPnQyKR4M8//wQAlC1bFlOmTIGRkRFCQ0Px+fNnAEDdunWhq6sLiUQCqVTKAal8+PbZSs2aNTFr1iy0a9cO/v7+uHbtGhQUFMTBjLq6OqRSKVRUVHiQUgBSqVTuZz09PVhbW6NLly44d+4czM3NsXr1ahw7dgz6+vrw9/fH8uXL8fHjR1SrVg0KCgrIysoqptIzxn4nsgGpJUuWYOjQoWjTpg0sLCxw4MABKCoqYvjw4ZgyZQq+fv2KMWPGIDY2Fo0bN8a8efO4vckHIoJUKpULJuW0+5qamtDV1cWtW7cAAMOGDQMAfP36FZ07d4adnZ3cbGQOSDHG/hcOSjHGWCHI6fC2bNkS586dw5UrV3D69Gk8ePBAPEZDQwPjxo3DlStXEBERAUC+o8a5pPJOtsOckZGBpKQkAECPHj3g6OgIQ0NDWFhY4Nq1awCA5ORkXLhwAZqamsVW5pJAdoB46dIlvHr1CuXLl0evXr1QtmxZbN26FQMHDsS4ceMAADVq1ICKigqkUikqVqwofg4HBRljPyOnvVmwYAHWr1+P+fPnw8/PD8rKyjAzM8OuXbvkAlOPHj3CqlWrAPz3wQW3Nz/v5cuXEARBrPfVq1djxIgR6NatG7Zt24Znz54BAEqVKoWMjAyEhYUhJiYGzs7O0NXVhaWlJSQSCQcCGWM/hUdAjDFWALKzRXI6vC1btsTp06dRpkwZrF69Go8fPxaPKVWqFOrUqYNSpUoVeVlLGtnAiLOzM/r06YPOnTtj4sSJePHiBdq3bw9HR0dUrlwZ7dq1Q7NmzWBpaYn4+HgEBAQAyD3Liv1/RCTW+9y5czF27FhcvHgRiYmJKFu2LADg8+fPSEpKQmZmJgAgPj4eLi4uWL9+PQRB4HpnjP0U2bbi7NmzCA0Nxd69e9G3b1+kpaXh1KlTaNasGcaNG4fdu3dDUVERQ4cOhZubmxiU4lk6eePk5ISaNWvi9u3bALI3r/j777+hrq6OKlWqwMbGBsuXL8edO3fQunVrNGvWDEuWLEHz5s3x7t07uLi4iO08BwIZYz+Dc0oxxlg+yeZICA8Px6dPn9CmTRtoamqibNmyuHz5MoyMjNC2bVuMGDEC1atXh5ubG168eIGbN29yZ62QzJs3Dz4+PrCysoKSkhK8vb2hra0NFxcXtG/fHmfPnoWzszOioqKwdOlSjBw5EkD2zColJaViLv3va+XKlVi7di127tyJpk2bokKFCuJ7c+bMQVhYGOrUqYPo6GjEx8fjzp07UFBQ4O3AGWM/RbatSExMREpKClxdXbFs2TIcO3YM5ubmWLJkCbp3747+/fvjyZMn8PDwwOjRo8XP4OTaeff27VtMmzYNZ86cwdGjRxEQEID+/fujU6dOAIC9e/diyZIlaNWqFby8vPDu3Ts8f/4cnz9/hqmpKe9iyxjLMw5KMcZYAdnZ2cHX1xdA9hK9iRMn4q+//oKmpiauXLkCY2NjJCYm4q+//oKysjI2bNjAu+wVAiLCo0eP0Lt3b7i6uqJXr14AsnNaGBsbQ0FBASdOnECZMmVw9OhReHt74/Xr1/D19UXDhg058Wo+ERESEhLQq1cvjBgxApaWluJ7ste0o6Mj3r59CwUFBXE7cL7mGWM/QzYg5eLigidPnsDBwQGVK1dG6dKlMXToUNSsWRMrV66EIAgYOnQo7t27Bz09PYSFhQHgGVIFERMTA0tLSxw9ehQaGhoICgpCu3btxPdDQkIwatQonD17Fq1atZI7l9t5xlhe8aNKxhjLo5xYPhHh6dOnuHjxIg4fPozHjx/DxMQEO3bswIYNG/Dhwwe0atUKZ8+eBQDUr18fPj4+UFJSQmZmJnfa8kF2uaQgCChdujQSExNRuXJlAEBaWhrKly+Pw4cP4/79+9i4cSMAwMTEBFOnToWenh4GDBiA27dv84AlnwRBQHJyMh49eoTq1asDgJg3REFBAcnJyYiNjcXSpUvh6+uLjRs3QlFRka95xthPywlI2dnZYdWqVejYsSOkUilKly6N+Ph43LhxA+rq6hAEAQkJCQCyZ2+GhYVBEARu3wtIW1sb7u7uGDlyJN6+fYt3794ByJ5hDACDBw9G9erVceXKlVzncjvPGMsrnlfJGGN5IPv0NiEhAcrKyqhTpw4aN24MZWVluLm5wdbWFgcPHoQgCJgyZQqaNGmC27dvQ19fH0B2MIunteedbC6jsWPHomLFili0aBEyMjIQHh6Oli1biklX1dTUYGBgIA5WAKBr165IT0/Htm3boKqqWly/Romgra0NbW1thISEoFevXmKifwUFBdy+fRtnzpzBxIkToa6uLp7D1zxjLC9OnDiB4OBg7N27V26WTvny5WFiYgIfHx+kpqbi1KlTSE1NhampKQRB4CXC+fC9OtPV1cWKFSvw+fNnTJgwAdWqVRNnRX3+/BmZmZkoX758cRSXMVbCcA+RMcbyIKfTtnDhQuzduxdfvnxBpUqV5GbwODk5wc7ODqGhoYiPj4ejoyMaNGgAAJxnIZ9kl9pFRkbi4sWLcHV1RdmyZTFv3jysWbMGFStWxOTJk6GkpASpVIqkpCSoqKgA+G+Hu2fPnujUqZP4Osu7nLocO3Ys/Pz8sGTJEixYsAAKCgrIyMjA4sWLUapUKdja2hZ3URljv7FXr15BRUUFBgYG4ms59wJzc3OoqKjg8OHDqF69OgIDAzlnXT7J1llAQAAePnyI9PR0dOrUCaampvDz88OYMWNgbGwMa2traGlpITw8HOXKlYOZmVkxl54xVhJwTinGGPsJskGR4OBgTJ48GStXrsTp06dx/vx5GBsbY9WqVXLb3U+aNAkZGRnYvHkzLyUoJL6+vjh16hQqVqwIV1dXANkDFw8PD/j4+KBPnz7Q09PD5cuX8f79e9y8eVMMAnIOqcL1/v17rF27Fvv27UPFihVRs2ZNPHnyBElJSbhx4waUlJS4zhljeZbTbnh4eMDd3R2XLl2CmpoaiEicMbtv3z7UqFEDDRs2hEQigSAI/NCngGxtbbFt2zYMHDgQb968wd27dzFixAgsW7YM7969g52dHbZv347BgwejZ8+eMDMz4/yYjLFCwY8SGGPsJ+QMrENCQnD//n2sXbsWEyZMwPbt22FlZYX79+9j7ty5iIuLE8/x9vYWA1Ic/88f2fxd0dHROH78OEJDQxETEyMeU61aNcycORPe3t548uQJbt++jRo1aiAyMlJMrg1w0tvCRETQ0tKCvb093N3dUatWLZQqVQrdunVDZGSkmDeN65wxllc57YaRkREeP34sPoAQBAESiQQJCQnw8/PDuXPnoKCgIN5jOSCVN7IzvENDQ7Fr1y7s378fGzZswPDhw/Hu3Tsx7YCOjg5Wr16NHj16ICEhARYWFpwfkzFWaHimFGOM/aQ7d+7AzMwMT58+haenJ8zNzQFkd+xcXFywb98+NG7cGEuXLpWbMcWzRQrPtWvX4OnpicDAQPj5+WHEiBFy739b1/zk/Nf5X9c1PzlnjBUGHx8fWFlZYcqUKejduzeUlZWxYsUKxMTEICIigtv3fPDw8MCgQYOgpaUlttVeXl7YvXs3jh07hpCQEIwdOxZOTk6YPHkyEhMTce/ePbRq1QqfP3+GmpoaL5FkjBUqblEYY+wHvo3Z6+vrw8bGBjVq1MCGDRvw9etXANl5pmbNmoWBAwfi2LFj8PX1lTuPA1IF4+7ujubNmwMAWrZsiWnTpmHkyJFYsmQJQkJCxOMyMzPlzuMn53kXFRUl/n/jxo14+PDhD4/Nua5zltQA2QFaqVTKASnGWKGYMGECgoODsW/fPlhYWGDq1KkAgOvXr8vNhGU/5+DBg3B1dYWjoyM+fvwottWKioqoVq0awsLCYGFhIQakACA8PBz79+9HXFwcNDQ0IJFI5GZZMcZYQfFMKcYY+45vk6VmZGSIU9WDg4Ph4uICPT09uZ3cpFIpgoKCMGzYMB6UFxIiQlhYGMaMGYPGjRsjPDwcQPaAxMvLC5cuXcKSJUswaNCgYi7p7y8iIgLjx4/HX3/9hdevX8PV1RWPHz9G7dq1/+d5sjOmOMkwY+xX+PjxI+Lj4yGVSlG7dm1IJBKeCZsPUqkUbm5uCA4Ohr6+PlauXAlNTU1cu3ZN3FnPz88Po0ePBgCkpKRgwIABqF69Ory8vPghG2Psl+CgFGOMfUN2YL1hwwZcvXoVHz9+RM+ePTF27FiUKVMGO3bswLp166CjowN/f38xMJWDly/lz/eCGllZWThz5gzMzc2hr6+PEydOAMgOTPn4+GD37t0ICQmBkZFRcRT5t/fmzRvo6enhw4cPWLlyJXbs2IHk5GRcuHABDRo0+J8DP9mAlK+vL+7du4fVq1fzwIUx9ktxADzv0tPToaysDCB7l+DDhw9DX18fy5YtQ6VKleDn54dJkyZhwYIF4v10yZIleP/+vTgzjdMRMMZ+BW7NGWPsGzkdXTs7OyxatAilSpWClpYWZs6cifHjx+P58+cYPnw4rKysEBsbC1NTUyQnJ8t9Bgek8ien7g8dOiS+pqCggE6dOsHf3x8PHjyAsbExAKBFixawsLDAnDlz0LFjx2Ip7+9uxowZmDdvHrKyslC5cmXUq1cPiYmJqFatGo4dOwYAP1wiIzs48fb2xvTp09GpUycesDDGfjkOSOUNEYkBKU9PTzx69AjPnj2Dn58f5s2bh48fP8LCwgLu7u5wd3fHkCFDMG3aNCgqKuLatWvifYDbd8bYr8AzpRhj7DuuXr2KQYMGISgoCO3atQMAXLx4EUOGDIGJiQl8fX2Rnp4OX19fREZGwtPTkzvJheTu3bto1qwZhg0bhm3btomvZ2Rk4NChQxg0aBCGDBmCnTt3yp3Hs9Py7ty5c2jTpg0UFRXx9etXpKam4tWrV9ixYwfOnz+P/v37w8HB4X9+hre3N+zs7LB582ZeRskYY/9gK1aswKpVq+Dn5wdNTU0EBgbi0qVLaNmyJf7++29UqlQJr169QmJiIsqUKYMaNWpAEAReKskY+6V4BMUYY0CupJ0529lXqVIFRITMzEy0bdsWgYGB8Pf3R3h4OJSVlTFhwgR4e3tz4s8C+Lbe6tSpg02bNuHy5cswMzMTX1dSUoKhoSHq1KmD4OBgWFlZyZ3HAamfl/M8qkOHDlBUVMT27dvRtm1bxMXFoUWLFpgxYwZatWqFffv2wcnJSTxv6dKlePz4sfizj48PbG1tOSDFGGP/YESEhIQEHD16FHPnzsXAgQPRoUMHeHp6Yvjw4Th27JiY/LxatWqoX78+atasCUEQIJVKOSDFGPulOCjFGGP471KAyZMnIzAwEBoaGoiOjsajR4/kkji3bNkSf/zxB968eQNAPhDCM6XyTjYvyJYtW2BnZ4f58+cDABwdHXHt2jUx4SoAlClTBm3btsW5c+ewbt26YilzSfDtEowyZcpAS0sLEyZMwIMHD6Cnpwc7Ozu0bt0awcHBGDlyJHr16oUNGzagVq1aAIDNmzfDysoKfn5+HJBijLF/MEEQoKqqCiUlJbx9+1buPXt7exgYGCAoKAiTJ09GXFyc3Pvct2GM/WrcyjDG/tVkVzCfPn0awcHBqFixIvT19TFu3DhYWVnhwoULUFRUhEQiQVZWFiQSCcqUKVOMpS45cjq7tra2sLe3R0ZGBp4/f45Vq1bh3LlzcHR0RFhYGIyNjbFp0yYMHjwYb9++RZs2baCgoMDbgefD92b0DRw4EDNnzkTp0qUxbtw4PHjwALq6urC3t8fQoUORnJwMVVVVvH79GgoKCvj48SPu3r2L4OBgDBw4sBh+C8YYYz/yvXZeKpWiZs2auHDhAl68eCH3XqNGjVC3bl3UqlULFSpUKKJSMsZYNs4pxRhjALZt24bIyEhoamqKOXQiIyPh7OyMY8eOwd7eHioqKjhw4ADevXuHGzdu8HKxQhIWFgZLS0sEBQXB0NAQwcHBMDMzQ0BAAIYMGYKzZ8/Czs4OmZmZqFSpEg4cOAAlJSXefSkfZOssPDwc6enpEAQBvXr1El9zdnZGcnIyNm/eDH19fXHHppzE5jm5uxISEnLtOskYY6x4ybbz9+7dg5KSEogIdevWxdevX9GkSRNUq1YNGzZsQM2aNaGkpIThw4eje/fumDhxorhkj++vjLGiwguEGWP/ek+fPoWvry+uXr0KGxsb8fWmTZti4cKFqFOnDtzc3KCnp4cqVarg+vXr4iwdDkwV3Lt371C1alUYGhoiJCQE48aNg6urK4YMGYLU1FSoqKjg4sWLiIuLg7q6OiddzYdRo0aJuaIAwMbGBlu3boWGhgbevn2LLl26YPny5ejevTuICGvWrMHEiRPh6ekJAwMDANnLP4hIvOY5IMUYY/8sRCQGk+bOnYuQkBAkJSUhMzMT48ePx/Lly3Hu3Dl06dIFQ4YMgZKSEiQSCZKSkrBz504OSDHGigXPlGKM/evIbmWfIzQ0FGvWrMHdu3dx/PhxNGzYUO79+Ph4lClTBkpKShwUKWQ5ieNHjRqFoUOHwtnZGZMnTwYA7N27FxcuXIC9vT0qVaoE4PvfH/uxuLg4LFy4EP7+/nB2dka/fv1gZGQEf39/6OjoID4+HgMGDICmpiZ8fX3xxx9/4NChQ1i8eDGaN28OLy+v4v4VGGOM5cHq1auxcuVKBAcHQxAEPH/+HJMnT4a5uTk2bdqE1NRUBAQEIDo6GoqKipgzZw4UFRX5YRtjrFhwUIox9q+SkZEBJSUlANmBJqlUCnV1dQDAsWPH4OTkhK9fv8LX1xcGBgZiDingv8mhOShSuB48eIDGjRsjIyMDvr6+GDNmDAAgJSUFAwYMgJ6eHjZu3Mh1XgDR0dHw8PCAm5sb+vfvDwDw8/MDkJ3X6/3792jZsiU6d+4Mf39/AMClS5fQqlUrfmLOGGP/cLL9EqlUikGDBsHAwADLli0Tjzl16hS6du2KdevWwdraOtdncECKMVZcuKfJGPtX2LFjBwCIAanFixeja9eu6NatGxYuXAgAMDY2xqxZs1CxYkWMHz8eUVFRYgdNNiDCwZHCpa+vj+3bt6N06dK4f/8+Tp8+jVOnTqFfv36Ijo6Gl5eXuHSM5U1OnVWpUgVTpkyBjY0NDh48iMePH0MikUAikSA1NRVaWlpwcXFBeHi4mAC3TZs2kEgk302Yyxhj7J9BKpWK/ZKPHz9CIpHg0aNHSE9PB5B9H8jIyICRkRGmT5+Offv2ITk5GZmZmXKfwwEpxlhx4aAUY6zEO3HiBEaNGoUFCxYAANavXw8PDw8MHz4cXbp0gZOTEywsLAAAPXr0wLRp01CxYkX06dMHz58/5yBUERgwYAA2b96M7du3w8zMDHPmzEHp0qVx/fp1cUkBfw95IztQAQAdHR2MHz8eVlZWuHz5Mtzd3QEApUuXBpA9Y0pNTQ0qKipyn8MzpRhj7J9JNv/TmjVrsGDBArx9+xajRo1CSEgIrl+/DkEQxHQD5cqVg0QigYqKCqcgYIz9Y3BrxBgr8dq2bYtNmzZhypQpUFRUhI6ODry9vcVlTF27dsWwYcNARNiyZQt69OiB1NRUnD9/HtWqVSvewv9LKCgoYMSIEejWrRu+fPmCUqVKoWrVqpy/K59kByoPHz7Ep0+fULduXejo6GDu3LlIT0/HjBkzkJaWhoEDB0JBQQEbN25ElSpVxNxdjDHG/tly2nk7Ozv4+flh3bp1yMrKQo8ePXD58mU4Ojpi6dKlaNGiBZKSknD16lXo6ekVc6kZY0we55RijJVosnkWPDw8MGfOHGRlZcHf3x9Dhw4Vjzt69CiGDh2KQYMGwdfXV+4zOM9C8eFdgPIm55aec83PmzcPe/fuRVxcHPT09NCiRQssXrwYioqKWL16NVavXo0yZcrAwsIC9+/fx6FDh6CkpMT1zhhjv4kTJ05gwoQJ2LZtG9q1aye+fuDAAWzevBknTpxAvXr1kJaWBiLCjRs3oKSkxPkxGWP/GNzjZIyVWKdOncL27dsBAJaWloiIiICHhweUlZVx/vx5uWNNTEwQHByMLVu2yCUGBTjPQnHiwEjeyA4wXFxcsGnTJqxfvx7R0dHQ19dHSEgInjx5Ag0NDUybNg2Ojo5ISEhAs2bNcPToUSgpKSEzM5PrnTHGfhOvXr2CiooKDAwMAEDMA9i3b1+sXbsWISEh6Nu3LywtLREZGSm28xyQYoz9U/B6CMZYiUNESExMxN9//4309HTs3LkT586dw8WLF1GvXj1kZWVh0qRJqFChApYuXSqe1717d1y+fBnNmjUrxtIzlnfz58+HlpYWrK2tIQgCEhMTcfr0aSxatAhdunTBkSNHsH//fqxevRpt27ZFeno6KlWqhHHjxkFbWxsjR44EkP23w0slGWPsny9nplNKSgqysrLE1wVBEGd4R0REoFmzZujRo4f4flZWFrfzjLF/FH4UyhgrcQRBgKqqKoKCghATE4PQ0FDY29ujfv36EAQBI0eOhJeXF1auXCkmP89haGgIRUXFXLvSMPZP9eXLF1y4cAEhISHw8/MDkJ3MNjExEe3atUN4eDiGDh0KZ2dnTJw4Eenp6di6dSsuXboEHR0dTJgwQbzm+ck5Y4z9HnLaayMjIzx+/Biurq7i6woKCkhMTERAQADCwsLkzuPZ34yxfxoOkzPGSiyJRILatWtDS0sLJ0+ehJ6eHszMzFC6dGmMHDkSgiBg6tSpiI+Px7p16+TO5aeI7HdARFBTU8POnTsxdepUBAQEICsrC+PHj4eamhqGDh2KmJgYrFu3DmPHjgUAxMbGYseOHRg1ahQ6dOggfhZf84wx9vupV68ePDw8YGVlhbi4OPTu3RvKyspYsWIFYmJiMGnSpOIuImOM/U+c6JwxVuLFxMRg3LhxSElJwbhx4zBq1CgAQEZGBlxdXXH48GGcPHmSZ4mw345sEv5Lly7BwcEBycnJcHBwQN26dWFhYYGUlBTcvn0baWlpSElJwciRI5GYmIhTp07xE3PGGCsBiAgHDhzAtGnTkJWVBTU1Nejq6oqbV/CGLYyxfzIOSjHG/hWeP38Oa2trpKenY8SIEfjrr79gYmKCJk2awNnZGYIg8E407Lc1a9YsPH36FNHR0bh//z50dHRgY2MDNTU1zJkzByoqKqhUqRIAICUlBVeuXOGBCmOMlTAfP35EfHw8pFIpateuDYlEgszMTJ4Jyxj7R+OgFGPsX+P58+eYPXs27t+/j9TUVJQtWxYRERFQVlbmgBT7bfn7+8PGxgbHjx9H9erVkZaWhtGjRyMjIwOjR4+GsbExtm3bhoyMDOjq6mLMmDFQUFDggQpjjJVwUqmUd1NljP3jcVCKMfavEh0djYiICLx//x6jR48WEzzz4Jz9rhYuXIgTJ07g7NmzEAQBgiDgzZs3GDhwIOLi4rBy5UoMGjRI7hyeIcUYY4wxxv4JeBTGGPtXqVKlCnr37i3+zFsjs99Vzuy+MmXKIC0tDWlpaShTpgwyMjKgp6eHv//+G/369cPChQuhqKiIfv36iedwQIoxxhhjjP0T8HxOxti/Gg/O2e8qZ7lpnz59cPPmTTg5OQEAlJSUAABpaWno2rUr+vXrhz59+sidwxhjjDHG2D8BTw9gjDHGfmMGBgbYuHEjJk6ciMTERAwdOhQaGhrYsGEDGjVqhOXLlwPg3CKMMcYYY+yfh3NKMcYYYyXA7t27YWlpCWVlZQCApqamuMseJ/JnjDHGGGP/RByUYowxxkqId+/e4e3bt0hKSkKHDh14lz3GGGOMMfaPxkEpxhhjrITiXfYYY4wxxtg/GQelGGOMMcYYY4wxxliR44ynjDHGGGOMMcYYY6zIcVCKMcYYY4wxxhhjjBU5DkoxxhhjjDHGGGOMsSLHQSnGGGOMMcYYY4wxVuQ4KMUYY4wxxhhjjDHGihwHpRhjjDHGGGOMMcZYkeOgFGOMMcYYY4wxxhgrchyUYowxxhhjjDHGGGNFjoNSjDHGGGOMMcYYY6zIcVCKMcYYY4wxxhhjjBU5DkoxxhhjjDHGGGOMsSL3H47NfrDVneNUAAAAAElFTkSuQmCC",
      "text/plain": [
       "\u001b[1m<\u001b[0m\u001b[1;95mFigure\u001b[0m\u001b[39m size 120\u001b[0m\u001b[1;36m0x600\u001b[0m\u001b[39m with \u001b[0m\u001b[1;36m1\u001b[0m\u001b[39m Axes\u001b[0m\u001b[1m>\u001b[0m"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "ves.pop(\"overall\")\n",
    "\n",
    "plt.figure(figsize=(12, 6))\n",
    "plt.bar(ves.keys(), ves.values(), color='skyblue')\n",
    "plt.xticks(rotation=45, ha='right')\n",
    "plt.ylabel('Values')\n",
    "plt.title('Values of Different Categories Excluding Overall')\n",
    "plt.grid(axis='y', linestyle='--', alpha=0.6)\n",
    "plt.tight_layout()\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "d9f46324-4fd1-40d4-824e-7252b830f2fd",
   "metadata": {},
   "source": [
    "### Writing your executor\n",
    "\n",
    "As mentioned earlier, premsql.evaluators can be used as a plug and play tool for evaluating models and pipelines on any kind of databases. Here is an example how to write your custom executor for postgresql.\n",
    "\n",
    "Simply you need to inherit the BaseExecutor class and define the `execute_sql` function. You can also change some additional functions but those are out of scope of this example. "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "bf84c62a-10d7-470b-937d-4cf15588f1c0",
   "metadata": {},
   "outputs": [],
   "source": [
    "import psycopg2\n",
    "import time\n",
    "from premsql.evaluator.base import BaseExecutor\n",
    "\n",
    "class PostgreSQLExecutor(BaseExecutor):\n",
    "    def execute_sql(self, sql: str, dsn_or_db_path: str) -> dict:\n",
    "        conn = psycopg2.connect(dsn_or_db_path)\n",
    "        cursor = conn.cursor()\n",
    "\n",
    "        start_time = time.time()\n",
    "        try:\n",
    "            cursor.execute(sql)\n",
    "            result = cursor.fetchall()\n",
    "            error = None\n",
    "        except Exception as e:\n",
    "            result = None\n",
    "            error = str(e)\n",
    "\n",
    "        end_time = time.time()\n",
    "        cursor.close()\n",
    "        conn.close()\n",
    "\n",
    "        result = {\n",
    "            \"result\": result,\n",
    "            \"error\": error,\n",
    "            \"execution_time\": end_time - start_time,\n",
    "        }\n",
    "        return result"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "4985577f-b8a3-4068-8584-98ef36b62db4",
   "metadata": {},
   "source": [
    "That's it, it is that easy to do customizations. You can extend it to any kind of connectors as long as it outputs the dict shown above. "
   ]
  },
  {
   "cell_type": "markdown",
   "id": "e87d89f1-e039-48c5-83d9-db8e303ecc67",
   "metadata": {},
   "source": [
    "### Future Improvements:\n",
    "\n",
    "Currently evaluations are not parallizable. So in our next iterations, we are going to do those optimizations and additionally we are also going to introduce some more metrics (example: F1 score) to  visualize more such metrics. "
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.12"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}


================================================
FILE: examples/finetuning.ipynb
================================================
{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "/root/anindya/Submission/text2sql/text2sql\n"
     ]
    }
   ],
   "source": [
    "# cd .."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Finetuner\n",
    "\n",
    "premsql fine-tuner is the module that fine-tunes model for text to SQL tasks. We support the following ways of fine-tuning\n",
    "\n",
    "1. Full fine-tuning \n",
    "2. PEFT using LoRA\n",
    "3. PEFT using QLoRA\n",
    "\n",
    "You can even make your own custom fine-tuning pipeline using our components and the set of tools that premsql provides. This tutorial expects you to know the following topics. \n",
    "\n",
    "1. [premsql datasets](/examples/datasets.ipynb)\n",
    "2. [premsql generators](/examples/generators.ipynb)\n",
    "3. [premsql evaluators](/examples/evaluation.ipynb)\n",
    "4. [premsql error handling datasets](/examples/error_dataset.ipynb)\n",
    "\n",
    "Additionally it would be great if you have some ideas on how huggingface transformers [TRL](https://huggingface.co/docs/trl/en/index) library works. We start by importing some packages. "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/root/miniconda3/envs/deep/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
      "  from .autonotebook import tqdm as notebook_tqdm\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[2024-09-07 10:41:00,820] [INFO] [real_accelerator.py:203:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/root/miniconda3/envs/deep/compiler_compat/ld: cannot find -laio: No such file or directory\n",
      "collect2: error: ld returned 1 exit status\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\u001b[93m [WARNING] \u001b[0m async_io requires the dev libaio .so object and headers but these were not found.\n",
      "\u001b[93m [WARNING] \u001b[0m async_io: please install the libaio-dev package with apt\n",
      "\u001b[93m [WARNING] \u001b[0m If libaio is already installed (perhaps from source), try setting the CFLAGS and LDFLAGS environment variables to where it can be found.\n",
      "\u001b[93m [WARNING] \u001b[0m Please specify the CUTLASS repo directory as environment variable $CUTLASS_PATH\n",
      "\u001b[93m [WARNING] \u001b[0m sparse_attn requires a torch version >= 1.5 and < 2.0 but detected 2.4\n",
      "\u001b[93m [WARNING] \u001b[0m using untested triton version (3.0.0), only 1.0.0 is known to be compatible\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/root/miniconda3/envs/deep/lib/python3.10/site-packages/deepspeed/runtime/zero/linear.py:49: FutureWarning: `torch.cuda.amp.custom_fwd(args...)` is deprecated. Please use `torch.amp.custom_fwd(args..., device_type='cuda')` instead.\n",
      "  def forward(ctx, input, weight, bias=None):\n",
      "/root/miniconda3/envs/deep/lib/python3.10/site-packages/deepspeed/runtime/zero/linear.py:67: FutureWarning: `torch.cuda.amp.custom_bwd(args...)` is deprecated. Please use `torch.amp.custom_bwd(args..., device_type='cuda')` instead.\n",
      "  def backward(ctx, grad_output):\n"
     ]
    }
   ],
   "source": [
    "from premsql.datasets import (\n",
    "    BirdDataset,\n",
    "    SpiderUnifiedDataset,\n",
    "    DomainsDataset,\n",
    "    GretelAIDataset\n",
    ")\n",
    "\n",
    "from premsql.evaluator.from_sqlite import SQLiteExecutor\n",
    "from premsql.datasets import Text2SQLDataset\n",
    "from premsql.tuner.peft import Text2SQLPeftTuner\n",
    "from premsql.datasets.error_dataset import ErrorDatasetGenerator"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "path = \"/root/anindya/text2sql/data\"\n",
    "model_name_or_path = \"premai-io/prem-1B-SQL\""
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Defining different datasets\n",
    "\n",
    "Now first we need some training datasets. In our tutorial, we are using small subsets (only for demo purposes, during actual fine-tuning you should be using the full dataset) of various datasets that prem sql provides. We start off by importing the BirdBench training datasets. "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2024-09-07 10:41:16,255 - [BIRD-DATASET] - INFO - Loaded Bird Dataset\n",
      "2024-09-07 10:41:16,257 - [BIRD-DATASET] - INFO - Setting up Bird Dataset\n",
      "Applying prompt: 100%|██████████| 100/100 [00:00<00:00, 3519.80it/s]\n",
      "2024-09-07 10:41:16,891 - [DATASET] - INFO - Casted dataset with model chat template\n",
      "2024-09-07 10:41:16,892 - [DATASET] - INFO - Starting Tokenization ...\n",
      "Tokenizing: 100%|██████████| 100/100 [00:00<00:00, 188.71it/s]\n",
      "Tokenizing: 100%|██████████| 100/100 [00:00<00:00, 199.78it/s]\n"
     ]
    }
   ],
   "source": [
    "bird_train = BirdDataset(split=\"train\", dataset_folder=path).setup_dataset(\n",
    "    num_rows=100,\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Followed by we then load the Spider dataset"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2024-09-07 10:41:44,000 - [SPIDER-DATASET] - INFO - Loaded Spider Dataset\n",
      "2024-09-07 10:41:44,005 - [SPIDER-DATASET] - INFO - Setting up Spider Dataset\n",
      "Applying prompt: 100%|██████████| 100/100 [00:00<00:00, 4144.69it/s]\n",
      "2024-09-07 10:41:44,636 - [DATASET] - INFO - Casted dataset with model chat template\n",
      "2024-09-07 10:41:44,637 - [DATASET] - INFO - Starting Tokenization ...\n",
      "Tokenizing: 100%|██████████| 100/100 [00:00<00:00, 399.31it/s]\n",
      "Tokenizing: 100%|██████████| 100/100 [00:00<00:00, 436.83it/s]\n"
     ]
    }
   ],
   "source": [
    "spider_train = SpiderUnifiedDataset(split=\"train\", dataset_folder=\"./data\").setup_dataset(\n",
    "    num_rows=100\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "We load the domains dataset here. "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2024-09-07 10:42:00,249 - [DOMAINS-DATASET] - INFO - Loaded Domains Dataset\n",
      "2024-09-07 10:42:00,252 - [DOMAINS-DATASET] - INFO - Setting up Domains Dataset\n",
      "Applying prompt: 100%|██████████| 100/100 [00:00<00:00, 2671.91it/s]\n",
      "2024-09-07 10:42:00,681 - [DATASET] - INFO - Casted dataset with model chat template\n",
      "2024-09-07 10:42:00,682 - [DATASET] - INFO - Starting Tokenization ...\n",
      "Tokenizing: 100%|██████████| 100/100 [00:00<00:00, 226.39it/s]\n",
      "Tokenizing: 100%|██████████| 100/100 [00:00<00:00, 241.73it/s]\n"
     ]
    }
   ],
   "source": [
    "domains_dataset = DomainsDataset(split=\"train\", dataset_folder=\"./data\").setup_dataset(\n",
    "    num_rows=100,\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "We also load the Gretel AI synthetic Text to SQL dataset. "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Applying prompt: 100%|██████████| 100/100 [00:00<00:00, 162130.03it/s]\n",
      "2024-09-07 10:42:14,958 - [DATASET] - INFO - Casted dataset with model chat template\n",
      "2024-09-07 10:42:14,958 - [DATASET] - INFO - Starting Tokenization ...\n",
      "Tokenizing: 100%|██████████| 100/100 [00:00<00:00, 517.27it/s]\n",
      "Tokenizing: 100%|██████████| 100/100 [00:00<00:00, 579.19it/s]\n"
     ]
    }
   ],
   "source": [
    "gertelai_dataset = GretelAIDataset(split=\"train\", dataset_folder=\"./data\",).setup_dataset(\n",
    "    num_rows=100,\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Last but not the least we also load an error dataset. You can learn more about error handling dataset [here](/examples/error_dataset.ipynb). "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2024-09-07 10:42:28,011 - [DATASET] - INFO - Casted dataset with model chat template\n",
      "2024-09-07 10:42:28,012 - [DATASET] - INFO - Starting Tokenization ...\n",
      "Tokenizing: 100%|██████████| 10/10 [00:00<00:00, 160.95it/s]\n",
      "Tokenizing: 100%|██████████| 10/10 [00:00<00:00, 180.55it/s]\n"
     ]
    }
   ],
   "source": [
    "existing_error_dataset = ErrorDatasetGenerator.from_existing(\n",
    "    experiment_name=\"testing_error_gen\",\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### NOTE:\n",
    "\n",
    "Since this tutorial is about fine-tuning using PEFT (using LoRA), so internally this workflow uses TRL. So the datasets we are instantiating do need to be tokenized, since TRL will be tokenizing under the hood. \n",
    "\n",
    "\n",
    "Now let's Merge all the datasets. We can pack different datasets into one single entity just like this. "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "merged_dataset = [\n",
    "    *spider_train,\n",
    "    *bird_train,\n",
    "    *domains_dataset,\n",
    "    *gertelai_dataset,\n",
    "    *existing_error_dataset\n",
    "    \n",
    "]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Additionally we also initialize the BirdBench validation dataset so that we can use it during the time of validation. \n",
    "\n",
    "Text-to-SQL validation methods are different from normal LLM fine-tuning tasks validation processes. Here we execute generated SQL on the database and check if it matches with the ground truth tables or not. So premsql offers a custom and a robust [huggingface callback](/premsql/tuner/callback.py) that helps you to evaluate during each evaluate steps of model training which is the same evaluation method we do using evaluators. \n",
    "\n",
    "So in this case, all you need to do is to define your validation datasets and thats it, our callback will take care of rest of things. If you are unfamiliar with the syntaxes below, you should check out [datasets](/examples/datasets.ipynb) and [evaluator](/examples/evaluation.ipynb) section. "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2024-09-07 10:43:00,302 - [BIRD-DATASET] - INFO - Loaded Bird Dataset\n",
      "2024-09-07 10:43:00,303 - [BIRD-DATASET] - INFO - Setting up Bird Dataset\n",
      "Applying prompt: 100%|██████████| 10/10 [00:00<00:00, 1762.53it/s]\n"
     ]
    }
   ],
   "source": [
    "bird_dev = Text2SQLDataset(dataset_name=\"bird\", split=\"validation\", dataset_folder=path).setup_dataset(\n",
    "    num_rows=10,\n",
    "    filter_by=(\"difficulty\", \"challenging\")\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Now that we have set up everything, we need to initialize our tuner class. To initialize our tuner, we need to provide a `model_name_or_path` which will load the model (which is to be fine-tuned) and also provide an `experiment_name` which will save all the logs. "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2024-09-07 10:43:05,347 - [LORA-FINETUNE] - WARNING - Setting up Pretrained-Model: premai-io/prem-1B-SQL\n",
      "Unrecognized keys in `rope_scaling` for 'rope_type'='linear': {'type'}\n",
      "Loading checkpoint shards: 100%|██████████| 2/2 [00:05<00:00,  2.82s/it]\n"
     ]
    }
   ],
   "source": [
    "tuner = Text2SQLPeftTuner(\n",
    "    model_name_or_path=model_name_or_path,\n",
    "    experiment_name=\"lora_tuning\"\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Finally we call the train functions to provide the following things:\n",
    "\n",
    "1. train_datasets: The merged datasets which will be used for training\n",
    "2. output_dir: the output directory in which model weights will be stored\n",
    "3. num_train_epochs: Number of epochs\n",
    "4. per_device_train_batch_size: The train batch size per device \n",
    "5. gradient_accumulation_steps: Number of gradient accumulation steps\n",
    "6. evaluation_dataset: The evaluation dataset. It can also be None, and in that case it will not do evaluation steps during fine-tuning.\n",
    "7. eval_steps: After how many steps we need to start evaluation. \n",
    "8. max_seq_length: Maximum permissible sequence length of the model. \n",
    "9. executor: Only provide an [executor](/examples/evaluation.ipynb) when you have defined a evaluation_dataset. \n",
    "10. filter_eval_results_by: Make sure the filter key and filter value is present inside the dataset. This will filter the results out. In our case we are filtering by difficulty to only evaluate on challenging data points.\n",
    "\n",
    "Additionally you can provide your additional parameters (which should be compatible with [transformers TrainingArguments](https://huggingface.co/docs/transformers/v4.44.2/en/main_classes/trainer#transformers.TrainingArguments)) in form of **kwargs and it will override any other default settings. Now let's use this information to train the model. "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "tuner.train(\n",
    "    train_datasets=merged_dataset,\n",
    "    output_dir=\"./output\",\n",
    "    num_train_epochs=1,\n",
    "    per_device_train_batch_size=1,\n",
    "    gradient_accumulation_steps=1,\n",
    "    evaluation_dataset=bird_dev,\n",
    "    eval_steps=100,\n",
    "    max_seq_length=1024,\n",
    "    executor=SQLiteExecutor(),\n",
    "    filter_eval_results_by=(\"difficulty\", \"challenging\")\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "This will start training the model. And you will see all the model outputs being stored inside `./output` and all the model fine-tuning logs being stored inside `./experiments/train/` directory. You can checkout our [fine-tuning using LoRA script](/examples/lora_tuning.py) for an end to end example."
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "deep",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.12"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}


================================================
FILE: examples/generators.ipynb
================================================
{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "/root/anindya/Submission/text2sql/text2sql\n"
     ]
    }
   ],
   "source": [
    "# cd .."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Generators\n",
    "\n",
    "premsql generators is responsible to produce SQL from natural language question from the user. You can think this as of the inference api specific to text-to-sql. Generators are very much modular in nature, you can plug in any kind of third party API or model or any kind of pipeline (more on this below). \n",
    "\n",
    "This tutorial is going to cover how to use huggingface and premai provider to use local models and hosted models for free. Lastly, we are also going to show how can you write your own generators. Let's start by importing all the various packages. "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/root/miniconda3/envs/deep/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
      "  from .autonotebook import tqdm as notebook_tqdm\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[2024-09-09 12:33:27,045] [INFO] [real_accelerator.py:203:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/root/miniconda3/envs/deep/compiler_compat/ld: cannot find -laio: No such file or directory\n",
      "collect2: error: ld returned 1 exit status\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\u001b[93m [WARNING] \u001b[0m async_io requires the dev libaio .so object and headers but these were not found.\n",
      "\u001b[93m [WARNING] \u001b[0m async_io: please install the libaio-dev package with apt\n",
      "\u001b[93m [WARNING] \u001b[0m If libaio is already installed (perhaps from source), try setting the CFLAGS and LDFLAGS environment variables to where it can be found.\n",
      "\u001b[93m [WARNING] \u001b[0m Please specify the CUTLASS repo directory as environment variable $CUTLASS_PATH\n",
      "\u001b[93m [WARNING] \u001b[0m sparse_attn requires a torch version >= 1.5 and < 2.0 but detected 2.4\n",
      "\u001b[93m [WARNING] \u001b[0m using untested triton version (3.0.0), only 1.0.0 is known to be compatible\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/root/miniconda3/envs/deep/lib/python3.10/site-packages/deepspeed/runtime/zero/linear.py:49: FutureWarning: `torch.cuda.amp.custom_fwd(args...)` is deprecated. Please use `torch.amp.custom_fwd(args..., device_type='cuda')` instead.\n",
      "  def forward(ctx, input, weight, bias=None):\n",
      "/root/miniconda3/envs/deep/lib/python3.10/site-packages/deepspeed/runtime/zero/linear.py:67: FutureWarning: `torch.cuda.amp.custom_bwd(args...)` is deprecated. Please use `torch.amp.custom_bwd(args..., device_type='cuda')` instead.\n",
      "  def backward(ctx, grad_output):\n"
     ]
    }
   ],
   "source": [
    "from premsql.generators import Text2SQLGeneratorHF\n",
    "from premsql.datasets import Text2SQLDataset"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### How Generators work\n",
    "\n",
    "premsql generators provide two types of generation strategies. One is a simple generation strategy where we simply generate the SQL from the prompt (which contains the schema of the tables, user questions, few shot examples etc). \n",
    "\n",
    "There is another strategy which sometimes give a bump in the performance is, execution guided decoding. Simply, it means the model generates a SQL and it executes the SQL into the DB. If it gets an error, it uses that error in a self-correction prompt and generates once again, till the max number of trials maxes out. \n",
    "\n",
    "We will be showing both the examples below. Let's start with simple generation. We will be using BirdBench dev dataset for this example. "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2024-09-09 12:34:11,944 - [BIRD-DATASET] - INFO - Loaded Bird Dataset\n",
      "2024-09-09 12:34:11,946 - [BIRD-DATASET] - INFO - Setting up Bird Dataset\n",
      "Applying prompt: 100%|██████████| 10/10 [00:00<00:00, 3060.42it/s]\n"
     ]
    }
   ],
   "source": [
    "bird_dataset = Text2SQLDataset(\n",
    "    dataset_name='bird', split=\"train\", force_download=False,\n",
    "    dataset_folder=\"/root/anindya/text2sql/data\"\n",
    ").setup_dataset(num_rows=10)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "The input of the generator is not just prompt but a `data_blob` which should contain the following information:\n",
    "\n",
    "- `prompt`: The prompt which needs to be passed\n",
    "- `db_path`: The db path \n",
    "\n",
    "If you have these two information you can use the generators for your own inference using your own data. Make sure the prompt contains all the schema of the tables belonging to the DB. Now let's define our generators. We will be using [Prem-1B-SQL](https://huggingface.co/premai-io/prem-1B-SQL) for this experiment. "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2024-09-09 12:33:37,338 - [GENERATOR] - INFO - Experiment folder found in: experiments/test/test_generators\n",
      "Unrecognized keys in `rope_scaling` for 'rope_type'='linear': {'type'}\n",
      "Loading checkpoint shards: 100%|██████████| 2/2 [00:06<00:00,  3.05s/it]\n"
     ]
    }
   ],
   "source": [
    "generator = Text2SQLGeneratorHF(\n",
    "    model_or_name_or_path=\"premai-io/prem-1B-SQL\",\n",
    "    experiment_name=\"test_generators\",\n",
    "    device=\"cuda:0\",\n",
    "    type=\"test\"\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "`Text2SQLGeneratorHF` internally uses HuggingFace transformers. You instantiate the class with a `experiment_name`. A folder `./experiments/` is created in your current directory (You can also change that directory by assigning the path to `experiment_folder` argument). \n",
    "\n",
    "This folders are created to store the generation and evaluation result, so that you do need to generate results everytime. It caches them inside the experiment directory. Now let's generate results using a single datapoint. "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/root/miniconda3/envs/deep/lib/python3.10/site-packages/transformers/generation/configuration_utils.py:567: UserWarning: `do_sample` is set to `False`. However, `temperature` is set to `0.1` -- this flag is only used in sample-based generation modes. You should set `do_sample=True` or unset `temperature`. This was detected when initializing the generation config instance, which means the corresponding file may hold incorrect parameterization and should be fixed.\n",
      "  warnings.warn(\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "SELECT movie_title FROM movies WHERE movie_release_year = 1945 ORDER BY movie_popularity DESC LIMIT 1;\n"
     ]
    }
   ],
   "source": [
    "sample = bird_dataset[0]\n",
    "\n",
    "response = generator.generate(\n",
    "    data_blob={\n",
    "        \"prompt\": sample[\"prompt\"],\n",
    "    },\n",
    "    temperature=0.1,\n",
    "    max_new_tokens=256\n",
    ")\n",
    "\n",
    "print(response)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "The `generate` method is used just for single response. This does not saves anything. Now let's try to generate for multiple question and save the results. "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Generating result ...:   0%|          | 0/10 [00:00\n",
       "\n",
       "\n",
       "  \n",
       "    \n",
       "      \n",
       "      \n",
       "    \n",
       "  \n",
       "  \n",
       "    \n",
       "      \n",
       "      \n",
       "    \n",
       "    \n",
       "      \n",
       "      \n",
       "    \n",
       "    \n",
       "      \n",
       "      \n",
       "    \n",
       "    \n",
       "      \n",
       "      \n",
       "    \n",
       "    \n",
       "      \n",
       "      \n",
       "    \n",
       "    \n",
       "      \n",
       "      \n",
       "    \n",
       "    \n",
       "      \n",
       "      \n",
       "    \n",
       "    \n",
       "      \n",
       "      \n",
       "    \n",
       "    \n",
       "      \n",
       "      \n",
       "    \n",
       "    \n",
       "      \n",
       "      \n",
       "    \n",
       "    \n",
       "      \n",
       "      \n",
       "    \n",
       "  \n",
       "
Phone
0None
1(209) 229-4700
2(209) 253-1208
3(209) 365-4060
4(209) 368-4934
......
716(951) 672-2400
717(951) 678-5217
718(951) 824-1358
719(951) 926-6776
720(970) 258-0518
\n", "

721 rows × 1 columns

\n", "" ], "text/plain": [ " Phone\n", "0 None\n", "1 (209) 229-4700\n", "2 (209) 253-1208\n", "3 (209) 365-4060\n", "4 (209) 368-4934\n", ".. ...\n", "716 (951) 672-2400\n", "717 (951) 678-5217\n", "718 (951) 824-1358\n", "719 (951) 926-6776\n", "720 (970) 258-0518\n", "\n", "[721 rows x 1 columns]" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "response = agent.query(\n", " question=\"please list the phone numbers of the direct charter-funded schools that are opened after 2000/1/1\",\n", ")\n", "\n", "response[\"table\"]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Here is the raw response. " ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{'table': Phone\n", "0 None\n", "1 (209) 229-4700\n", "2 (209) 253-1208\n", "3 (209) 365-4060\n", "4 (209) 368-4934\n", ".. ...\n", "716 (951) 672-2400\n", "717 (951) 678-5217\n", "718 (951) 824-1358\n", "719 (951) 926-6776\n", "720 (970) 258-0518\n", "\n", "[721 rows x 1 columns], 'error': None, 'sql': \"SELECT Phone FROM schools WHERE Charter = 1 AND OpenDate > '2000-01-01' AND FundingType = 'Directly funded' GROUP BY Phone\"}\n" ] } ], "source": [ "print(response)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Inside the pipeline, we are using [execution guided decoding](/examples/generators.ipynb) which executes the SQL to the DB and checks if there is an error and does several retries till it gets a correct SQL (max retries is set to 5) \n", "\n", "Here is an another example:" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/root/miniconda3/envs/deep/lib/python3.10/site-packages/transformers/generation/configuration_utils.py:567: UserWarning: `do_sample` is set to `False`. However, `temperature` is set to `0.1` -- this flag is only used in sample-based generation modes. You should set `do_sample=True` or unset `temperature`. This was detected when initializing the generation config instance, which means the corresponding file may hold incorrect parameterization and should be fixed.\n", " warnings.warn(\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
School
0Millikan High
1Polytechnic High
2Troy High
\n", "
" ], "text/plain": [ " School\n", "0 Millikan High\n", "1 Polytechnic High\n", "2 Troy High" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "agent.query(\n", " question=\"Among the schools with the SAT test takers of over 500, please list the schools that are magnet schools or offer a magnet program.\",\n", " additional_knowledge=\"Magnet schools or offer a magnet program means that Magnet = 1\"\n", ")[\"table\"]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "An another example:" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/root/miniconda3/envs/deep/lib/python3.10/site-packages/transformers/generation/configuration_utils.py:567: UserWarning: `do_sample` is set to `False`. However, `temperature` is set to `0.1` -- this flag is only used in sample-based generation modes. You should set `do_sample=True` or unset `temperature`. This was detected when initializing the generation config instance, which means the corresponding file may hold incorrect parameterization and should be fixed.\n", " warnings.warn(\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
CDSCode
001100170109835
101100170112607
201100170118489
301100170123968
401100170124172
......
998158727516056832
998258727516056840
998358727516118806
998458727690123570
998558727695838305
\n", "

9986 rows × 1 columns

\n", "
" ], "text/plain": [ " CDSCode\n", "0 01100170109835\n", "1 01100170112607\n", "2 01100170118489\n", "3 01100170123968\n", "4 01100170124172\n", "... ...\n", "9981 58727516056832\n", "9982 58727516056840\n", "9983 58727516118806\n", "9984 58727690123570\n", "9985 58727695838305\n", "\n", "[9986 rows x 1 columns]" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "agent.query(\"list all the distinct CDSCode\")['table']" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/root/miniconda3/envs/deep/lib/python3.10/site-packages/transformers/generation/configuration_utils.py:567: UserWarning: `do_sample` is set to `False`. However, `temperature` is set to `0.1` -- this flag is only used in sample-based generation modes. You should set `do_sample=True` or unset `temperature`. This was detected when initializing the generation config instance, which means the corresponding file may hold incorrect parameterization and should be fixed.\n", " warnings.warn(\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
District
0ABC Unified
1Acalanes Union High
2Ackerman Charter
3Acton-Agua Dulce Unified
4Adelanto Elementary
......
1406Yreka Union Elementary
1407Yreka Union High
1408Yuba City Unified
1409Yuba County Office of Education
1410Yucaipa-Calimesa Joint Unified
\n", "

1411 rows × 1 columns

\n", "
" ], "text/plain": [ " District\n", "0 ABC Unified\n", "1 Acalanes Union High\n", "2 Ackerman Charter\n", "3 Acton-Agua Dulce Unified\n", "4 Adelanto Elementary\n", "... ...\n", "1406 Yreka Union Elementary\n", "1407 Yreka Union High\n", "1408 Yuba City Unified\n", "1409 Yuba County Office of Education\n", "1410 Yucaipa-Calimesa Joint Unified\n", "\n", "[1411 rows x 1 columns]" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "agent.query(\"what are the unique districts in schools and sorted\")['table']" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Sometimes, the model hallucinates, since it a very small model. And in those cases we get an error like this. However it will still return a dataframe such that the pipeline does not break in terms of response consistency. " ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/root/miniconda3/envs/deep/lib/python3.10/site-packages/transformers/generation/configuration_utils.py:567: UserWarning: `do_sample` is set to `False`. However, `temperature` is set to `0.1` -- this flag is only used in sample-based generation modes. You should set `do_sample=True` or unset `temperature`. This was detected when initializing the generation config instance, which means the corresponding file may hold incorrect parameterization and should be fixed.\n", " warnings.warn(\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "2024-09-06 20:05:02,871 - [SIMPLE-AGENT] - INFO - => Going for final correction ...\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "{'table': error\n", "0 Error: (sqlite3.OperationalError) no such colu..., 'error': 'Error: (sqlite3.OperationalError) no such column: High_Grade\\n[SQL: SELECT max(High_Grade) FROM frpm;]\\n(Background on this error at: https://sqlalche.me/e/20/e3q8)', 'sql': 'SELECT max(High_Grade) FROM frpm;'}\n" ] } ], "source": [ "response = agent.query(\"what is the max high grade\")\n", "print(response)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "We are using a very small model above and sometimes it fails to generate correct response. So in those cases, we also have a `correct_with_gpt` method (which runs internally) that corrects any furthur SQL responses so that we can maximize the chances of getting error free SQLs. \n", "\n", "In order to use this, you need to have a [premai-io](https://premai.io) account. You can get started [here](https://docs.premai.io) to get start a new project and get a project_id and API key. \n", "\n", "The final auto-correct with gpt only triggers when you provide `premai_api_key` and `premai_project_id` parameters while instantiating the pipeline. Here how it looks like: " ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "2024-09-06 20:05:04,760 - [GENERATOR] - INFO - Experiment folder found in: experiments/test/test_nli\n", "Unrecognized keys in `rope_scaling` for 'rope_type'='linear': {'type'}\n", "Loading checkpoint shards: 100%|██████████| 2/2 [00:05<00:00, 2.91s/it]\n", "2024-09-06 20:05:11,380 - [SIMPLE-AGENT] - INFO - Everything set\n", "2024-09-06 20:05:11,380 - [SIMPLE-AGENT] - INFO - Using gpt-4o as the final corrector\n" ] } ], "source": [ "premai_api_key=\"Fqxxxxx-xxxxxx-xxxxx-xxxx\" # Replace this\n", "premai_project_id=1234 # Replace this \n", "\n", "agent_with_corrector = SimpleText2SQLAgent(\n", " dsn_or_db_path=db,\n", " generator=Text2SQLGeneratorHF(\n", " model_or_name_or_path=\"premai-io/prem-1B-SQL\",\n", " experiment_name=\"test_nli\",\n", " device=\"cuda:0\",\n", " type=\"test\"\n", " ),\n", " premai_api_key=premai_api_key,\n", " premai_project_id=premai_project_id\n", ")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "And now asking the same question, we get the correct answer. You can also see a info being logged which tells it is using GPT for final correction." ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/root/miniconda3/envs/deep/lib/python3.10/site-packages/transformers/generation/configuration_utils.py:567: UserWarning: `do_sample` is set to `False`. However, `temperature` is set to `0.1` -- this flag is only used in sample-based generation modes. You should set `do_sample=True` or unset `temperature`. This was detected when initializing the generation config instance, which means the corresponding file may hold incorrect parameterization and should be fixed.\n", " warnings.warn(\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "2024-09-06 20:05:14,629 - [SIMPLE-AGENT] - INFO - => Going for final correction ...\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
MAX(`High Grade`)
0Post Secondary
\n", "
" ], "text/plain": [ " MAX(`High Grade`)\n", "0 Post Secondary" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "agent_with_corrector.query(\"what is the max high grade\")[\"table\"]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Future Plans\n", "\n", "Currently local LLMs for text to SQL still do not have very good autonomous capabilities. So still there becomes a dependency of closed source models to some extent. However in upcoming versions we are going to replace that with fully local autonomous and reliable text to SQL pipelines. " ] } ], "metadata": { "kernelspec": { "display_name": "deep", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.12" } }, "nbformat": 4, "nbformat_minor": 2 } ================================================ FILE: premsql/__init__.py ================================================ ================================================ FILE: premsql/agents/__init__.py ================================================ from premsql.agents.baseline.main import BaseLineAgent from premsql.agents.memory import AgentInteractionMemory __all__ = ["BaseLineAgent", "AgentInteractionMemory"] ================================================ FILE: premsql/agents/base.py ================================================ from abc import ABC, abstractmethod from typing import Optional, Union import pandas as pd from premsql.executors.base import BaseExecutor from premsql.executors.from_langchain import SQLDatabase from premsql.generators.base import Text2SQLGeneratorBase from premsql.logger import setup_console_logger from premsql.agents.memory import AgentInteractionMemory from premsql.agents.models import ( AgentOutput, AnalyserWorkerOutput, ChartPlotWorkerOutput, ExitWorkerOutput, RouterWorkerOutput, Text2SQLWorkerOutput, ) logger = setup_console_logger("[PIPELINE-BASE]") # If someone wants to make a new worker class class WorkerBase(ABC): @abstractmethod def run(self): return NotImplementedError() class AnalysisWorkerBase(ABC): @abstractmethod def run( self, question: str, input_dataframe: Optional[pd.DataFrame] = None ) -> AnalyserWorkerOutput: raise NotImplementedError class ChartPlotWorkerBase(ABC): @abstractmethod def run( self, question: str, input_dataframe: Optional[pd.DataFrame] = None ) -> ChartPlotWorkerOutput: raise NotImplementedError class RouterWorkerBase(ABC): @abstractmethod def run( self, question: str, input_dataframe: Optional[pd.DataFrame] = None ) -> RouterWorkerOutput: raise NotImplementedError class Text2SQLWorkerBase(ABC): def __init__( self, db_connection_uri: str, generator: Text2SQLGeneratorBase, executor: BaseExecutor, include_tables: Optional[str] = None, exclude_tables: Optional[str] = None, ) -> None: self.generator, self.executor = generator, executor self.db_connection_uri = db_connection_uri self.db = self.initialize_database( db_connection_uri=db_connection_uri, include_tables=include_tables, exclude_tables=exclude_tables, ) @abstractmethod def run(self, question: str, **kwargs) -> Text2SQLWorkerOutput: raise NotImplementedError def initialize_database( self, db_connection_uri: str, include_tables: Optional[list] = None, exclude_tables: Optional[list] = None, ) -> SQLDatabase: """This method should return a db object To customise this method you make a different db object but it should have similar methods and behaviour like langchain SQLDatbase. You can find the implementation of SQLDatabase here: https://api.python.langchain.com/en/latest/_modules/langchain_community/utilities/sql_database.html#SQLDatabase """ try: return SQLDatabase.from_uri( database_uri=db_connection_uri, sample_rows_in_table_info=0, ignore_tables=exclude_tables, include_tables=include_tables ) except Exception as e: logger.error(f"Error loading the database: {e}") raise RuntimeError(f"Error loading the database: {e}") class AgentBase(ABC): def __init__( self, session_name: str, db_connection_uri: str, session_db_path: Optional[str] = None, route_worker_kwargs: Optional[dict] = None, ) -> None: self.session_name, self.db_connection_uri = session_name, db_connection_uri self.history = AgentInteractionMemory( session_name=session_name, db_path=session_db_path ) self.session_db_path = self.history.db_path self.route_worker_kwargs = route_worker_kwargs @abstractmethod def run( self, question: str, input_dataframe: Optional[dict] = None, server_mode: Optional[bool] = False, ) -> Union[ExitWorkerOutput, AgentOutput]: # Make sure you convert the dataframe to a table raise NotImplementedError() def convert_exit_output_to_agent_output( self, exit_output: ExitWorkerOutput ) -> AgentOutput: return AgentOutput( session_name=exit_output.session_name, question=exit_output.question, db_connection_uri=exit_output.db_connection_uri, route_taken=exit_output.route_taken, input_dataframe=exit_output.sql_input_dataframe or exit_output.analysis_input_dataframe or exit_output.plot_input_dataframe, output_dataframe=exit_output.sql_output_dataframe or exit_output.plot_output_dataframe, sql_string=exit_output.sql_string, analysis=exit_output.analysis, reasoning=exit_output.sql_reasoning or exit_output.analysis_reasoning or exit_output.plot_reasoning, plot_config=exit_output.plot_config, image_to_plot=exit_output.image_to_plot, followup_route=exit_output.followup_route_to_take, followup_suggestion=exit_output.followup_suggestion, error_from_pipeline=( exit_output.error_from_sql_worker or exit_output.error_from_analysis_worker or exit_output.error_from_plot_worker or exit_output.error_from_followup_worker ), ) def __call__( self, question: str, input_dataframe: Optional[dict] = None, server_mode: Optional[bool] = False, ) -> Union[ExitWorkerOutput, AgentOutput]: if server_mode: kwargs = self.route_worker_kwargs.get("plot", None) kwargs = ( {"plot_image": False} if kwargs is None else {**kwargs, "plot_image": False} ) self.route_worker_kwargs["plot"] = kwargs output = self.run(question=question, input_dataframe=input_dataframe) # TODO: Watch out dict here type mismatch with run self.history.push(output=output) if server_mode: output = self.convert_exit_output_to_agent_output(exit_output=output) return output ================================================ FILE: premsql/agents/baseline/__init__.py ================================================ from premsql.agents.baseline.workers import ( BaseLineAnalyserWorker, BaseLineFollowupWorker, BaseLinePlotWorker, BaseLineText2SQLWorker, ) __all__ = [ "BaseLineAnalyserWorker", "BaseLineFollowupWorker", "BaseLinePlotWorker", "BaseLineText2SQLWorker", ] ================================================ FILE: premsql/agents/baseline/main.py ================================================ from typing import Any, Optional import pandas as pd from premsql.executors.base import BaseExecutor from premsql.generators.base import Text2SQLGeneratorBase from premsql.agents.base import AgentBase, ExitWorkerOutput from premsql.agents.baseline.workers import ( BaseLineAnalyserWorker, BaseLineFollowupWorker, BaseLinePlotWorker, BaseLineText2SQLWorker, ) from premsql.agents.router import SimpleRouterWorker from premsql.agents.tools.plot.base import BasePlotTool # TODO: Should the name be changed from baseline to eda or autoeda? class BaseLineAgent(AgentBase): def __init__( self, session_name: str, db_connection_uri: str, specialized_model1: Text2SQLGeneratorBase, specialized_model2: Text2SQLGeneratorBase, executor: BaseExecutor, plot_tool: BasePlotTool, session_db_path: Optional[str] = None, include_tables: Optional[list] = None, exclude_tables: Optional[list] = None, auto_filter_tables: Optional[bool] = False, route_worker_kwargs: Optional[dict] = {}, ) -> None: super().__init__( session_name=session_name, db_connection_uri=db_connection_uri, session_db_path=session_db_path, route_worker_kwargs=route_worker_kwargs, ) self.text2sql_worker = BaseLineText2SQLWorker( db_connection_uri=db_connection_uri, generator=specialized_model1, helper_model=specialized_model1, executor=executor, include_tables=include_tables, exclude_tables=exclude_tables, auto_filter_tables=auto_filter_tables, ) self.analysis_worker = BaseLineAnalyserWorker(generator=specialized_model2) self.plotter_worker = BaseLinePlotWorker( generator=specialized_model2, plot_tool=plot_tool ) self.followup_worker = BaseLineFollowupWorker(generator=specialized_model2) self.router = SimpleRouterWorker() def run( self, question: str, input_dataframe: Optional[pd.DataFrame] = None ) -> ExitWorkerOutput: decision = self.router.run(question=question, input_dataframe=input_dataframe) dataframe_from_history = None # TODO: This is an assumption that the output tables will be in last # 10 conversation history_entries = self.history.get(limit=10) for entry in history_entries: content = entry["message"] df = content.show_output_dataframe() if df is not None and len(df) > 0: dataframe_from_history = content.show_output_dataframe() break if decision.route_to in ["query", "analyse", "plot"]: worker_output = self._execute_worker( question=question, route_to=decision.route_to, input_dataframe=input_dataframe, dataframe_from_history=dataframe_from_history, ) exit_output = self._create_exit_worker_output( question=question, route_taken=decision.route_to, worker_output=worker_output, ) if any( [ exit_output.error_from_analysis_worker, exit_output.error_from_plot_worker, exit_output.error_from_sql_worker, ] ): followup_output = self._handle_followup(exit_output) exit_output.followup_suggestion = followup_output.suggestion exit_output.followup_route_to_take = ( followup_output.alternative_route or "query" ) # This is the default route exit_output.error_from_followup_worker = ( followup_output.error_from_model ) else: exit_output = self._handle_followup_route(question=question) return exit_output def _execute_worker( self, question: str, route_to: str, input_dataframe: Optional[pd.DataFrame], dataframe_from_history: Optional[pd.DataFrame], ): decision_mappign = { "query": lambda: self.text2sql_worker.run( question=question, render_results_using="json", **self.route_worker_kwargs.get("query", {}) ), "analyse": lambda: self.analysis_worker.run( question=question, input_dataframe=( dataframe_from_history if input_dataframe is None else input_dataframe ), **self.route_worker_kwargs.get("analyse", {}) ), "plot": lambda: self.plotter_worker.run( question=question, input_dataframe=( dataframe_from_history if input_dataframe is None else input_dataframe ), **self.route_worker_kwargs.get("plot", {}) ), } return decision_mappign[route_to]() def _create_exit_worker_output( self, question: str, route_taken: str, worker_output: Any, # TODO: change it Literal of worker fixed outputs ) -> ExitWorkerOutput: exit_output = ExitWorkerOutput( session_name=self.session_name, question=question, route_taken=route_taken, db_connection_uri=self.db_connection_uri, additional_input=getattr(worker_output, "additional_input", None), ) if route_taken == "query": exit_output.sql_string = worker_output.sql_string exit_output.sql_reasoning = worker_output.sql_reasoning exit_output.sql_output_dataframe = worker_output.output_dataframe exit_output.error_from_sql_worker = worker_output.error_from_model elif route_taken == "analyse": exit_output.analysis = worker_output.analysis exit_output.analysis_reasoning = worker_output.analysis_reasoning exit_output.analysis_input_dataframe = worker_output.input_dataframe exit_output.error_from_analysis_worker = worker_output.error_from_model elif route_taken == "plot": exit_output.plot_config = worker_output.plot_config exit_output.plot_input_dataframe = worker_output.input_dataframe exit_output.plot_output_dataframe = worker_output.output_dataframe exit_output.image_to_plot = worker_output.image_plot exit_output.plot_reasoning = worker_output.plot_reasoning exit_output.error_from_plot_worker = worker_output.error_from_model return exit_output def _handle_followup(self, prev_output: ExitWorkerOutput): return self.followup_worker.run( prev_output=prev_output, db_schema=self.text2sql_worker.db.get_context()["table_info"], user_feedback=None, ) def _handle_followup_route(self, question: str) -> ExitWorkerOutput: history_entries = self.history.get() if len(history_entries) == 0: return ExitWorkerOutput( session_name=self.session_name, question=question, route_taken="followup", db_connection_uri=self.db_connection_uri, additional_input=None, followup_suggestion="Before Writing a followup please either query / analyse / plot", followup_route_to_take="query", error_from_followup_worker=None, ) else: followup_output = self.followup_worker.run( prev_output=self.history.get(limit=1)[0]["message"], user_feedback=question, db_schema=self.text2sql_worker.db.get_context()["table_info"], **self.route_worker_kwargs.get("followup", {}) ) return ExitWorkerOutput( session_name=self.session_name, question=question, route_taken="followup", db_connection_uri=self.db_connection_uri, additional_input=None, followup_suggestion=followup_output.suggestion, followup_route_to_take=followup_output.alternative_route or "query", # query should alaways be the default route error_from_followup_worker=followup_output.error_from_model, ) ================================================ FILE: premsql/agents/baseline/prompts.py ================================================ # --------------------------------- table selection --------------------------------- # BASELINE_TEXT2SQL_TABLE_SELECTION_PROMPT = """ ### Instruction: Respond only with valid JSON. No introduction or summary needed. Do not add ``` at start / end of the output. You will be given a database schema and user query. Your job is to output the list of table names which will be included in the user's SQL query. Here are some examples: CREATE TABLE customers ( customer_id INT, customer_name VARCHAR(100), contact_info VARCHAR(100) ); CREATE TABLE orders ( order_id INT, customer_id INT, order_date DATE, total_amount FLOAT ); CREATE TABLE products ( product_id INT, product_name VARCHAR(100), price FLOAT ); User Query: "What are all the tables in database" Output: {{ "include": ["customers", "orders", "products"] }} Example: Schema: CREATE TABLE employees ( employee_id INT, employee_name VARCHAR(100), department VARCHAR(100), salary FLOAT ); CREATE TABLE departments ( department_id INT, department_name VARCHAR(100), location VARCHAR(100) ); CREATE TABLE projects ( project_id INT, project_name VARCHAR(100), budget FLOAT ); User Query: "List the names of employees and their salaries." Output: {{ "include": ["employees"] }} Example: Schema: CREATE TABLE students ( student_id INT, student_name VARCHAR(100), grade_level INT ); CREATE TABLE courses ( course_id INT, course_name VARCHAR(100), teacher_id INT ); CREATE TABLE enrollments ( student_id INT, course_id INT, enrollment_date DATE ); User Query: "Show the list of students enrolled in courses." Output: {{ "include": ["students", "enrollments"] }} Example: Schema: CREATE TABLE authors ( author_id INT, author_name VARCHAR(100) ); CREATE TABLE books ( book_id INT, book_title VARCHAR(100), author_id INT ); CREATE TABLE publishers ( publisher_id INT, publisher_name VARCHAR(100) ); CREATE TABLE book_sales ( sale_id INT, book_id INT, sale_amount FLOAT ); User Query: "Find the total sales for each book." Output: {{ "include": ["books", "book_sales"] }} ------ Assistant's turn ------ Like above examples, here is your DB schema: {schema} some additional info about the columns (optional): {additional_info} and the user question: {question} NOTE: The name of the tables should always match with the name of the tables present the above schema Respond only with valid JSON. Do not write an introduction or summary. output: """ # --------------------------------- text to sql --------------------------------- # BASELINE_TEXT2SQL_WORKER_PROMPT_NO_FEWSHOT = """ # Instruction: - You will be given a question and a database schema. - You need to write a SQL query to answer the question. Do not add ``` at start / end of the query. It should be a single line query in a single line (string format). - Make sure the column names are correct and exists in the table - For column names which has a space with it, make sure you have put `` in that column name # Database and Table Schema: {schemas} {additional_knowledge} # Question: {question} # SQL: """ BASELINE_TEXT2SQL_WORKER_PROMPT = """ # Instruction: - You will be given a question and a database schema. - You need to write a SQL query to answer the question. Do not add ``` at start / end of the query. It should be a single line query in a single line (string format). - Make sure the column names are correct and exists in the table - For column names which has a space with it, make sure you have put `` in that column name # Database and Table Schema: {schemas} {additional_knowledge} # Here are some Examples on how to generate SQL statements and use column names: {few_shot_examples} # Question: {question} # SQL: """ # --------------------------------- error handling --------------------------------- # BASELINE_TEXT2SQL_WORKER_ERROR_HANDLING_PROMPT = """ {existing_prompt} # Generated SQL: {sql} ## Error Message {error_msg} Carefully review the original question and error message, then rewrite the SQL query to address the identified issues. Ensure your corrected query uses correct column names, follows proper SQL syntax, and accurately answers the original question without introducing new errors. # SQL: """ # --------------------------------- analysis base --------------------------------- # BASELINE_ANALYSIS_WORKER_PROMPT = """ ### Instruction: You will receive a user question and a table. Do not add ``` at start / end of the output. Analyze the table and provide your analysis following the below structure # Analysis: (This should contain the answer from user's question) # Reasoning: (This should contain the reasoning behind the your answer or analysis) Example 1: table: | Country | Population | GDP | |---------|------------|-------| | A | 5M | 50B | | B | 10M | 100B | | C | 2M | 10B | User Question: Which country has the highest GDP per capita? Output: # Analysis: Country A has the highest GDP per capita. # Reasoning: Country A's GDP per capita is 10,000, higher than B (10,000) and C (5,000). Example 2: table: | Month | Product | Sales | Returns | Profit Margin (%) | |----------|---------|-------|---------|-------------------| | January | A | 1000 | 50 | 20 | | January | B | 1500 | 30 | 15 | | February | A | 1200 | 20 | 22 | | February | B | 1300 | 40 | 18 | | March | A | 1100 | 25 | 21 | | March | B | 1400 | 35 | 16 | User Question: Which product had the highest profit margin in February? Output: # Analysis: Product A had the highest profit margin in February. # Reasoning: Product A had a profit margin of 22% compared to Product B's 18% in February. ------ Assistant's turn ------ Dataframe to analyse: {dataframe} user question: {question} """ # --------------------------------- analysis merger --------------------------------- # BASELINE_ANALYSIS_MERGER_PROMPT = """Your task is to summarise You will be given a set of summaries and reasoning in the form of a list of json. Your task is to review the analuse and reasoning and summarise them: - analysis1 - analysis2 - analysis3 - analysis4 and so on.... You will see the analysis and summarise them in a good human readible format. ------ Assistant's turn ------ Here is your analysis: {analysis} Summary output: """ # --------------------------------- plot base --------------------------------- # BASELINE_CHART_WORKER_PROMPT_TEMPLATE = """ ### Instruction: Respond only with valid JSON. No introduction or summary needed. You are a senior data analyst. You know how to plot data when asked some analysis question. Do not add ``` at start / end of the output. You will be given a user question, a list of dataframe column names, and you will output a JSON with the following structure: {{ "x": # output the column name which should be on x-axis, "y": # output the column name which should be on y-axis, "plot_type": # The type of plot }} You can choose from these plot types: - area: if you think you need to plot an area chart, - bar: if you think you need to plot a bar chart, - scatter: if you think you need to plot a scatter plot, - histogram: if you think you need to plot a histogram, - line: if you think you need to plot a line chart, ### Examples: Example 1: User Question: "Show the relationship between sales and revenue." Dataframe columns: ["product_name", "sales", "revenue"] Output: {{ "x": "sales", "y": "revenue", "plot_type": "scatter" }} Example 2: User Question: "Show the distribution of product sales." Dataframe columns: ["product_name", "sales"] Output: {{ "x": "product_name", "y": "sales", "plot_type": "bar" }} Example 3: User Question: "Show the change in temperature over time." Dataframe columns: ["date", "temperature"] Output: {{ "x": "date", "y": "temperature", "plot_type": "line" }} Example 4: User Question: "Show the frequency distribution of employee ages." Dataframe columns: ["employee_name", "age"] Output: {{ "x": "age", "y": None, "plot_type": "histogram" }} ------ Assistant's turn ------ Like the above example, here is your dataframe columns: {columns} and here is user question: {question} NOTE: From user's question you need to find the column of the table that makes sense. Do not add any column name which is not present in {columns}. If there is nothing to add just put None Respond only with valid JSON. Do not write an introduction or summary. output: """ # --------------------------------- followup base --------------------------------- # BASELINE_FOLLOWUP_WORKER_PROMPT = """ ### Instruction: Respond only with valid JSON. No introduction or summary needed. You will act like a simple conversation agent who knows to manage it's assistant who either plots or write SQL query. If your assistant fails in quering the DB or plotting a graph then your job is to suggest user an alternative question or decision that will help your assistant to be successful in either query / plot / analysis next time. Do not add ``` at start / end of the output. You will be recieving the following input: 1. DB Schema: The schema of the database 2. Decision: The decision that your assistant took. Decision could be either 'query' or 'plot' or 'analyse' 3. Query: The question that user asked. 4. Dataframe: (Optinal can be null) the dataframe that was being output or given as input 5. Analysis: Some text analysis from the model 6. Error from assistant: The error that your assistant made. Your job is to output a JSON with the following structure: {{ "alternate_decision": # can be either query / plot, "suggestion": # the alternate suggestive question }} Here is an example: Example 1: DB Schema: CREATE TABLE sales ( product_id INT, product_name VARCHAR(100), sale_date DATE, revenue FLOAT ); Decision: "query" Query: "What is the average revenue for products sold after 2023?" Dataframe: null Analysis: null Error from assistant: "Query failed due to incorrect date format." Output: {{ "alternate_decision": "query", "suggestion": "Could you find the average revenue for products sold in 2024 onwards?" }} Example 2: DB Schema: CREATE TABLE employees ( employee_id INT, employee_name VARCHAR(100), department VARCHAR(100), salary FLOAT ); Decision: "plot" Query: "Show a line chart of the salary distribution by department." Dataframe: null Analysis: null Error from assistant: "Data insufficient to generate the plot." Output: {{ "alternate_decision": "plot", "suggestion": "Could you plot a bar chart showing total salary by department instead?" }} Example 1: DB Schema: CREATE TABLE sales ( product_id INT, product_name VARCHAR(100), sale_date DATE, revenue FLOAT ); Decision: "plot" Query: "Plot the monthly revenue trend for 2024." Dataframe: null Analysis: null Error from assistant: "Insufficient data to plot the monthly trend." Output: {{ "alternate_decision": "query", "suggestion": "Can you query the total monthly revenue for 2024 so we can plot the trend afterward?" }} Example: DB Schema: CREATE TABLE sales ( product_id INT, product_name VARCHAR(100), sale_date DATE, revenue FLOAT ); Decision: "analyse" Query: "Why xyz product name is doing unwell" Dataframe: null Analysis: "because they have less interaction, not good product" Error from assistant: "null" Output: {{ "alternate_decision": "analyse", "suggestion": "Why the product name xyz is not doing good and explain it in points" }} ------ Assistant's turn ------ DB Schema: {schema} Decision: {decision} Query: {question} Dataframe: {dataframe} Analysis: {analysis} Error from assistant: {error_from_model} Your JSON keys should be only: `alternate_decision` and `suggestion` Respond only with valid JSON. Do not write an introduction or summary. output: """ ================================================ FILE: premsql/agents/baseline/workers/__init__.py ================================================ from premsql.agents.baseline.workers.analyser import BaseLineAnalyserWorker from premsql.agents.baseline.workers.followup import BaseLineFollowupWorker from premsql.agents.baseline.workers.plotter import BaseLinePlotWorker from premsql.agents.baseline.workers.text2sql import BaseLineText2SQLWorker __all__ = [ "BaseLineText2SQLWorker", "BaseLineAnalyserWorker", "BaseLinePlotWorker", "BaseLineFollowupWorker", ] ================================================ FILE: premsql/agents/baseline/workers/analyser.py ================================================ from typing import Optional import pandas as pd from tqdm.auto import tqdm from premsql.generators.base import Text2SQLGeneratorBase from premsql.logger import setup_console_logger from premsql.agents.base import AnalyserWorkerOutput, AnalysisWorkerBase from premsql.agents.baseline.prompts import ( BASELINE_ANALYSIS_MERGER_PROMPT, BASELINE_ANALYSIS_WORKER_PROMPT, ) from premsql.agents.utils import convert_df_to_dict logger = setup_console_logger("[BASELINE-ANALYSER-WORKER]") CHUNK_TEMPLATE = """ # Analysis: {analysis} # Reasoning {reasoning} """ # TODO: Need to think of the case when there is no df being passed class BaseLineAnalyserWorker(AnalysisWorkerBase): def __init__(self, generator: Text2SQLGeneratorBase) -> None: self.generator = generator def run_chunkwise_analysis( self, question: str, input_dataframe: pd.DataFrame, chunk_size: Optional[int] = 20, max_chunks: Optional[int] = 20, temperature: Optional[float] = 0.19, max_new_tokens: Optional[int] = 600, analysis_prompt_template: Optional[str] = BASELINE_ANALYSIS_WORKER_PROMPT, merger_prompt_template: Optional[str] = BASELINE_ANALYSIS_MERGER_PROMPT, verbose: Optional[bool] = False, ) -> tuple[str, str]: num_chunks = (len(input_dataframe) + chunk_size - 1) // chunk_size chunks = [ input_dataframe[i * chunk_size : (i + 1) * chunk_size] for i in range(num_chunks) ][:max_chunks] analysis_list = [] num_errors = 0 for i, chunk in tqdm(enumerate(chunks), total=len(chunks)): analysis, error_from_model = self.analyse( question=question, input_dataframe=chunk, temperature=temperature, max_new_tokens=max_new_tokens, prompt_template=analysis_prompt_template, ) if error_from_model: num_errors += 1 logger.error(f"Error while analysing: {i}, Skipping ...") continue if verbose: logger.info( CHUNK_TEMPLATE.format( analysis=analysis["analysis"], reasoning=analysis["analysis_reasoning"], ) ) analysis_list.append(analysis) analysis_list_str = "\n".join( [ analysis["analysis"] + " " + analysis["analysis_reasoning"] for analysis in analysis_list ] ) if num_errors < len(chunks): summarized_analysis_prompt = merger_prompt_template.format( analysis=analysis_list_str ) summary = self.generator.generate( data_blob={"prompt": summarized_analysis_prompt}, temperature=temperature, max_new_tokens=max_new_tokens, postprocess=False, ) analysis = { "analysis": summary, "analysis_reasoning": "Analysis summarised by AI", } error_from_model = None else: analysis = { "analysis": "\n".join( [ content["analyse"] if "analyse" in content else "" for content in analysis_list ] ), "analysis_reasoning": "Appending all the analysis", } error_from_model = "Model not able to summarise analysis" return analysis, error_from_model def analyse( self, question: str, input_dataframe: pd.DataFrame, temperature: Optional[float] = 0.19, max_new_tokens: Optional[int] = 512, prompt_template: Optional[str] = BASELINE_ANALYSIS_WORKER_PROMPT, ) -> dict: output = self.generator.generate( data_blob={ "prompt": prompt_template.format( dataframe=str(input_dataframe), question=question ) }, temperature=temperature, max_new_tokens=max_new_tokens, postprocess=False, ) try: sections = output.split('# ') analysis_from_model, reasoning_from_model = '', '' for section in sections: if section.startswith('Analysis:'): analysis_from_model = section.strip() elif section.startswith('Reasoning:'): reasoning_from_model = section.strip() analysis = { "analysis": analysis_from_model, "analysis_reasoning": reasoning_from_model } error_from_model = None except Exception as e: analysis = { "analysis": output, "analysis_reasoning": "Not able to split analysis and reasoning", } error_from_model = str(e) logger.info(analysis) logger.info("------------") logger.info(error_from_model) return analysis, error_from_model def run( self, question: str, input_dataframe: pd.DataFrame, do_chunkwise_analysis: Optional[bool] = False, chunk_size: Optional[int] = 20, max_chunks: Optional[int] = 20, temperature: Optional[float] = 0.19, max_new_tokens: Optional[int] = 600, analysis_prompt_template: Optional[str] = BASELINE_ANALYSIS_WORKER_PROMPT, analysis_merger_template: Optional[str] = BASELINE_ANALYSIS_MERGER_PROMPT, verbose: Optional[bool] = False, ) -> AnalyserWorkerOutput: if len(input_dataframe) > chunk_size and do_chunkwise_analysis: logger.info("Going for chunk wise analysis ...") analysis, error_from_model = self.run_chunkwise_analysis( question=question, input_dataframe=input_dataframe, chunk_size=chunk_size, max_chunks=max_chunks, analysis_prompt_template=analysis_prompt_template, merger_prompt_template=analysis_merger_template, temperature=temperature, max_new_tokens=max_new_tokens, verbose=verbose, ) else: if len(input_dataframe) > chunk_size: logger.info( "Truncating table, you can also choose chunk wise analysis, but it takes more time." ) analysis, error_from_model = self.analyse( question=question, input_dataframe=input_dataframe.iloc[:chunk_size, :], temperature=temperature, max_new_tokens=max_new_tokens, prompt_template=analysis_prompt_template, ) return AnalyserWorkerOutput( question=question, input_dataframe=convert_df_to_dict(df=input_dataframe), analysis=analysis.get("analysis", "Not able to analyse"), analysis_reasoning=analysis.get("analysis_reasoning", None), error_from_model=error_from_model, additional_input={ "temperature": temperature, "max_new_tokens": max_new_tokens, "chunkwise_analysis": do_chunkwise_analysis, "chunk_size": chunk_size, "max_chunks": max_chunks, }, ) ================================================ FILE: premsql/agents/baseline/workers/followup.py ================================================ from typing import Optional import pandas as pd from premsql.generators.base import Text2SQLGeneratorBase from premsql.logger import setup_console_logger from premsql.agents.base import WorkerBase from premsql.agents.baseline.prompts import BASELINE_FOLLOWUP_WORKER_PROMPT from premsql.agents.models import ExitWorkerOutput, FollowupWorkerOutput logger = setup_console_logger("[BASELINE-FOLLOWUP-WORKER]") class BaseLineFollowupWorker(WorkerBase): def __init__(self, generator: Text2SQLGeneratorBase) -> None: self.generator = generator def run( self, prev_output: ExitWorkerOutput, db_schema: str, user_feedback: Optional[str] = None, prompt_template: Optional[str] = BASELINE_FOLLOWUP_WORKER_PROMPT, temperature: Optional[float] = 0.18, max_new_tokens: Optional[int] = 128, ) -> FollowupWorkerOutput: if prev_output.route_taken == "query": error = "\n".join( filter(None, [prev_output.error_from_sql_worker, user_feedback]) ) dataframe = prev_output.sql_output_dataframe elif prev_output.route_taken == "plot": error = "\n".join( filter(None, [prev_output.error_from_plot_worker, user_feedback]) ) dataframe = prev_output.plot_input_dataframe elif prev_output.route_taken == "analyse": dataframe = prev_output.analysis_input_dataframe error = "\n".join( filter(None, [prev_output.error_from_analysis_worker, user_feedback]) ) else: error = user_feedback dataframe = next( ( df for df in [ prev_output.sql_output_dataframe, prev_output.plot_input_dataframe, prev_output.analysis_input_dataframe, ] if df is not None ), None, ) if dataframe: if isinstance(dataframe, dict) and "data" in dataframe and "columns" in dataframe: dataframe = pd.DataFrame(dataframe["data"], columns=dataframe["columns"]) elif not isinstance(dataframe, pd.DataFrame): try: dataframe = pd.DataFrame(dataframe) except: dataframe = None prompt = prompt_template.format( schema=db_schema, decision=prev_output.route_taken, question=prev_output.question, dataframe=dataframe, analysis=prev_output.analysis, error_from_model=error, ) try: result = self.generator.generate( data_blob={"prompt": prompt}, temperature=temperature, max_new_tokens=max_new_tokens, postprocess=False, ) result = eval(result.replace("null", "None")) error_from_model = None assert "alternate_decision" in result assert "suggestion" in result except Exception as e: result = { "alternate_decision": prev_output.route_taken, "suggestion": "Worker unable to generate alternative suggestion", } error_from_model = str(e) return FollowupWorkerOutput( question=user_feedback or prev_output.question, error_from_model=error_from_model, route_taken=result["alternate_decision"], suggestion=result["suggestion"], additional_input={ "temperature": temperature, "max_new_tokens": max_new_tokens, }, ) ================================================ FILE: premsql/agents/baseline/workers/plotter.py ================================================ from typing import Optional import pandas as pd from premsql.generators.base import Text2SQLGeneratorBase from premsql.logger import setup_console_logger from premsql.agents.base import ChartPlotWorkerBase, ChartPlotWorkerOutput from premsql.agents.baseline.prompts import BASELINE_CHART_WORKER_PROMPT_TEMPLATE from premsql.agents.tools.plot.base import BasePlotTool from premsql.agents.utils import convert_df_to_dict logger = setup_console_logger("[PLOT-WORKER]") class BaseLinePlotWorker(ChartPlotWorkerBase): def __init__( self, generator: Text2SQLGeneratorBase, plot_tool: BasePlotTool ) -> None: self.generator, self.plot_tool = generator, plot_tool def run( self, question: str, input_dataframe: pd.DataFrame, temperature: Optional[float] = 0.1, max_new_tokens: Optional[int] = 100, plot_image: Optional[bool] = True, prompt_template: Optional[str] = BASELINE_CHART_WORKER_PROMPT_TEMPLATE, **kwargs, ) -> ChartPlotWorkerOutput: prompt = prompt_template.format( columns=list(input_dataframe.columns), question=question ) try: logger.info("Going for generation") to_plot = self.generator.generate( data_blob={"prompt": prompt}, temperature=temperature, max_new_tokens=max_new_tokens, postprocess=False, ) to_plot = to_plot.replace("null", "None") plot_config = eval(to_plot) fig = self.plot_tool.run(data=input_dataframe, plot_config=plot_config) logger.info(f"Plot config: {plot_config}") if plot_image: output = self.plot_tool.convert_image_to_base64( self.plot_tool.convert_plot_to_image(fig=fig) ) logger.info("Done base64 conversion") else: output = None return ChartPlotWorkerOutput( question=question, input_dataframe=convert_df_to_dict(input_dataframe), plot_config=plot_config, plot_reasoning=None, output_dataframe=None, image_plot=output, error_from_model=None, additional_input={ "temperature": temperature, "max_new_tokens": max_new_tokens, **kwargs, }, ) except Exception as e: error_message = f"Error during plot generation: {str(e)}" return ChartPlotWorkerOutput( question=question, input_dataframe=convert_df_to_dict(input_dataframe), plot_config=None, image_plot=None, plot_reasoning=None, error_from_model=error_message, additional_input={ "temperature": temperature, "max_new_tokens": max_new_tokens, **kwargs, }, ) ================================================ FILE: premsql/agents/baseline/workers/text2sql.py ================================================ from textwrap import dedent from typing import Literal, Optional from premsql.executors.base import BaseExecutor from premsql.generators.base import Text2SQLGeneratorBase from premsql.logger import setup_console_logger from premsql.agents.base import Text2SQLWorkerBase from premsql.agents.baseline.prompts import ( BASELINE_TEXT2SQL_TABLE_SELECTION_PROMPT, BASELINE_TEXT2SQL_WORKER_ERROR_HANDLING_PROMPT, BASELINE_TEXT2SQL_WORKER_PROMPT, BASELINE_TEXT2SQL_WORKER_PROMPT_NO_FEWSHOT, ) from premsql.agents.models import Text2SQLWorkerOutput from premsql.agents.utils import execute_and_render_result logger = setup_console_logger("[BASELINE-TEXT2SQL-WORKER]") class BaseLineText2SQLWorker(Text2SQLWorkerBase): def __init__( self, db_connection_uri: str, generator: Text2SQLGeneratorBase, helper_model: Optional[Text2SQLGeneratorBase] = None, executor: Optional[BaseExecutor] = None, include_tables: Optional[list] = None, exclude_tables: Optional[list] = None, auto_filter_tables: Optional[bool] = False, ): super().__init__( db_connection_uri=db_connection_uri, generator=generator, executor=executor, include_tables=include_tables, exclude_tables=exclude_tables, ) self.corrector = helper_model self.table_filer_worker = helper_model self.auto_filter_tables = auto_filter_tables @staticmethod def show_dataframe(output: Text2SQLWorkerOutput): import pandas as pd if output.output_dataframe: df = pd.DataFrame( output.output_dataframe["data"], columns=output.output_dataframe["columns"], ) return df return pd.DataFrame({}) def filer_tables_from_schema( self, question: str, additional_input: Optional[str] = None ) -> dict: prompt = BASELINE_TEXT2SQL_TABLE_SELECTION_PROMPT.format( schema=self.db.get_context()["table_info"], additional_info=additional_input, question=question, ) all_tables = self.db.get_usable_table_names() try: to_include = [] output = self.corrector.generate({"prompt": prompt}, postprocess=False) output = eval(output) for table in all_tables: if table in output["include"]: to_include.append(table) except Exception as e: logger.info(f"Error while selecting table: {e}") to_include = all_tables return to_include def _create_prompt( self, question: str, additional_knowledge: Optional[str] = None, fewshot_dict: Optional[dict] = None, prompt_template: Optional[str] = BASELINE_TEXT2SQL_WORKER_PROMPT, ) -> str: # Initialize database first self.db = self.initialize_database( db_connection_uri=self.db_connection_uri, include_tables=None ) # Get tables to include to_include = [] if self.auto_filter_tables: try: to_include = self.filer_tables_from_schema( question=question, additional_input=additional_knowledge ) logger.info(f"Selected tables in schema: {to_include}") except Exception as e: logger.warning(f"Error filtering tables: {e}. Using all tables.") # Get schema information schema_prompt = self.db.get_context()["table_info"] # Build knowledge prompt knowledge_prompt = "" if fewshot_dict: template = dedent(""" Question: {question} SQL: {sql} """) try: knowledge_prompt = "".join( template.format(question=sample_question, sql=sample_sql) for sample_question, sample_sql in fewshot_dict.items() ) except Exception as e: logger.warning(f"Error formatting fewshot examples: {e}") elif to_include and len(to_include) <= 10: try: self.db._sample_rows_in_table_info = 3 knowledge_prompt = str(self.db.get_table_info(table_names=to_include)) except Exception as e: logger.warning(f"Error getting table info: {e}") try: if fewshot_dict or (to_include and len(to_include) <= 10): prompt = prompt_template.format( schemas=schema_prompt if fewshot_dict else "", additional_knowledge=additional_knowledge or "", few_shot_examples=knowledge_prompt, question=question, ) else: prompt = BASELINE_TEXT2SQL_WORKER_PROMPT_NO_FEWSHOT.format( schemas=schema_prompt, additional_knowledge=additional_knowledge or "", question=question, ) return prompt except Exception as e: logger.error(f"Error formatting prompt: {e}") raise def run( self, question: str, additional_knowledge: Optional[str] = None, fewshot_dict: Optional[dict] = None, temperature: Optional[float] = 0.1, max_new_tokens: Optional[int] = 256, render_results_using: Optional[Literal["json", "dataframe"]] = "json", prompt_template: Optional[str] = BASELINE_TEXT2SQL_WORKER_PROMPT, error_handling_prompt_template: Optional[ str ] = BASELINE_TEXT2SQL_WORKER_ERROR_HANDLING_PROMPT, **kwargs, ) -> Text2SQLWorkerOutput: if question.startswith("`") and question.endswith("`"): result = execute_and_render_result( db=self.db, sql=question.replace('`', ''), using=render_results_using ) return Text2SQLWorkerOutput( db_connection_uri=self.db_connection_uri, sql_string=question.startswith("`"), sql_reasoning=None, input_dataframe=None, output_dataframe=result["dataframe"], # Truncating to question=question, error_from_model=result["error_from_model"], additional_input={ "additional_knowledge": additional_knowledge, "fewshot_dict": fewshot_dict, "temperature": temperature, "max_new_tokens": max_new_tokens, **kwargs, }, ) prompt = self._create_prompt( question=question, additional_knowledge=additional_knowledge, fewshot_dict=fewshot_dict, prompt_template=prompt_template, ) generated_sql = self.generator.execution_guided_decoding( data_blob={"prompt": prompt, "db_path": self.db_connection_uri}, executor=self.executor, temperature=temperature, max_new_tokens=max_new_tokens, max_retries=5, postprocess=True, **kwargs, ) result = execute_and_render_result( db=self.db, sql=generated_sql, using=render_results_using ) if result["error_from_model"] is not None: logger.info("=> Going for final correction ...") generated_sql = self.do_correction( question=question, result=result, additional_knowledge=additional_knowledge, fewshot_dict=fewshot_dict, prompt_template=prompt_template, error_handling_prompt_template=error_handling_prompt_template, **kwargs, ) result = execute_and_render_result( db=self.db, sql=generated_sql, using=render_results_using ) return Text2SQLWorkerOutput( db_connection_uri=self.db_connection_uri, sql_string=generated_sql, sql_reasoning=None, input_dataframe=None, output_dataframe=result["dataframe"], # Truncating to question=question, error_from_model=result["error_from_model"], additional_input={ "additional_knowledge": additional_knowledge, "fewshot_dict": fewshot_dict, "temperature": temperature, "max_new_tokens": max_new_tokens, **kwargs, }, ) def do_correction( self, question: str, result: dict, additional_knowledge: Optional[str] = None, fewshot_dict: Optional[dict] = None, prompt_template: Optional[str] = BASELINE_TEXT2SQL_WORKER_PROMPT, error_handling_prompt_template: Optional[ str ] = BASELINE_TEXT2SQL_WORKER_ERROR_HANDLING_PROMPT, **kwargs, ): if not self.corrector: logger.info("Corrector model not defined, no furthur correction possible.") error_prompt = error_handling_prompt_template.format( existing_prompt=self._create_prompt( question=question, additional_knowledge=additional_knowledge, fewshot_dict=fewshot_dict, prompt_template=prompt_template, ), error_msg=result["error_from_model"], sql=result["sql_string"], ) return self.generator.generate( data_blob={"prompt": error_prompt}, postprocess=True, **kwargs ) ================================================ FILE: premsql/agents/memory.py ================================================ import os import tempfile import sqlite3 from platformdirs import user_cache_dir from typing import List, Literal, Optional from premsql.logger import setup_console_logger from premsql.agents.models import ExitWorkerOutput from premsql.agents.utils import convert_exit_output_to_agent_output logger = setup_console_logger("[PIPELINE-MEMORY]") class AgentInteractionMemory: def __init__(self, session_name: str, db_path: Optional[str] = None): self.session_name = session_name self.db_path = db_path or os.path.join( os.getcwd(), "premsql", "premsql_pipeline_memory.db" ) logger.info(self.db_path) os.makedirs(os.path.dirname(self.db_path), exist_ok=True) self.conn = sqlite3.connect(self.db_path) self.create_table_if_not_exists() def list_sessions(self) -> List[str]: cursor = self.conn.cursor() cursor.execute("SELECT name FROM sqlite_master WHERE type='table';") tables = cursor.fetchall() return [table[0] for table in tables if table[0] != "sqlite_sequence"] def create_table_if_not_exists(self): cursor = self.conn.cursor() cursor.execute( f""" CREATE TABLE IF NOT EXISTS {self.session_name} ( message_id INTEGER PRIMARY KEY AUTOINCREMENT, question TEXT, db_connection_uri TEXT, route_taken TEXT, sql_string TEXT, sql_reasoning TEXT, sql_input_dataframe TEXT, sql_output_dataframe TEXT, error_from_sql_worker TEXT, analysis TEXT, analysis_reasoning TEXT, analysis_input_dataframe TEXT, error_from_analysis_worker TEXT, plot_config TEXT, plot_input_dataframe TEXT, plot_output_dataframe TEXT, image_to_plot TEXT, plot_reasoning TEXT, error_from_plot_worker TEXT, followup_route_to_take TEXT, followup_suggestion TEXT, error_from_followup_worker TEXT, additional_input TEXT, timestamp DATETIME DEFAULT CURRENT_TIMESTAMP ) """ ) self.conn.commit() def get( self, limit: Optional[int] = None, order: Optional[Literal["DESC", "ASC"]] = "DESC", ) -> List[tuple[int, ExitWorkerOutput]]: cursor = self.conn.cursor() query = f"SELECT * FROM {self.session_name} ORDER BY message_id {order}" if limit is not None: query += " LIMIT ?" cursor.execute(query, (limit,)) else: cursor.execute(query) rows = cursor.fetchall() return [ {"message_id": row[0], "message": self._row_to_exit_worker_output(row)} for row in rows ] def get_latest_message_id(self) -> Optional[int]: cursor = self.conn.cursor() query = f"SELECT message_id FROM {self.session_name} ORDER BY message_id DESC LIMIT 1" cursor.execute(query) row = cursor.fetchone() return row[0] if row else None def generate_messages_from_session( self, session_name: str, limit: int = 100, server_mode: bool=False ): cursor = self.conn.cursor() query = f"SELECT * FROM {session_name} ORDER BY message_id ASC LIMIT {limit}" cursor.execute(query) rows = cursor.fetchall() for row in rows: yield self._row_to_exit_worker_output(row=row) if server_mode == False else convert_exit_output_to_agent_output( self._row_to_exit_worker_output(row=row) ) def get_by_message_id(self, message_id: int) -> Optional[dict]: cursor = self.conn.cursor() query = f"SELECT * FROM {self.session_name} WHERE message_id = ?" cursor.execute(query, (message_id,)) row = cursor.fetchone() if row is None: return None return self._row_to_exit_worker_output(row=row) def push(self, output: ExitWorkerOutput): cursor = self.conn.cursor() cursor.execute( f""" INSERT INTO {self.session_name} ( question, db_connection_uri, route_taken, sql_string, sql_reasoning, sql_input_dataframe, sql_output_dataframe, error_from_sql_worker, analysis, analysis_reasoning, analysis_input_dataframe, error_from_analysis_worker, plot_config, plot_input_dataframe, plot_output_dataframe, image_to_plot, plot_reasoning, error_from_plot_worker, followup_route_to_take, followup_suggestion, error_from_followup_worker, additional_input ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) """, self._exit_worker_output_to_tuple(output), ) self.conn.commit() logger.info("Pushed to the database") def delete_table(self): cursor = self.conn.cursor() try: cursor.execute(f"DROP TABLE IF EXISTS {self.session_name}") self.conn.commit() logger.info(f"Table '{self.session_name}' has been deleted.") except sqlite3.Error as e: logger.error(f"Error deleting table '{self.session_name}': {e}") finally: cursor.close() def _row_to_exit_worker_output(self, row) -> ExitWorkerOutput: try: return ExitWorkerOutput( session_name=self.session_name, question=row[1], db_connection_uri=row[2], route_taken=row[3], sql_string=row[4], sql_reasoning=row[5], sql_input_dataframe=self._parse_json(row[6]), sql_output_dataframe=self._parse_json(row[7]), error_from_sql_worker=row[8], analysis=row[9], analysis_reasoning=row[10], analysis_input_dataframe=self._parse_json(row[11]), error_from_analysis_worker=row[12], plot_config=self._parse_json(row[13]), plot_input_dataframe=self._parse_json(row[14]), plot_output_dataframe=self._parse_json(row[15]), image_to_plot=row[16], plot_reasoning=row[17], error_from_plot_worker=row[18], followup_route_to_take=row[19], followup_suggestion=row[20], error_from_followup_worker=row[21], additional_input=self._parse_json(row[22]), ) except Exception as e: logger.error(f"Error converting row to ExitWorkerOutput: {e}") return None def _exit_worker_output_to_tuple(self, output: ExitWorkerOutput) -> tuple: return ( output.question, output.db_connection_uri, output.route_taken, output.sql_string, output.sql_reasoning, self._serialize_json(output.sql_input_dataframe), self._serialize_json(output.sql_output_dataframe), output.error_from_sql_worker, output.analysis, output.analysis_reasoning, self._serialize_json(output.analysis_input_dataframe), output.error_from_analysis_worker, self._serialize_json(output.plot_config), self._serialize_json(output.plot_input_dataframe), self._serialize_json(output.plot_output_dataframe), output.image_to_plot, output.plot_reasoning, output.error_from_plot_worker, output.followup_route_to_take, output.followup_suggestion, output.error_from_followup_worker, self._serialize_json(output.additional_input), ) def _parse_json(self, json_str): import json if not json_str: return None try: return json.loads(json_str) except json.JSONDecodeError: logger.warning(f"Failed to parse JSON: {json_str}") return None def _serialize_json(self, obj): import json if obj is None: return None try: return json.dumps(obj) except TypeError: logger.warning(f"Failed to serialize object to JSON: {obj}") return None def clear(self): cursor = self.conn.cursor() cursor.execute(f"DELETE FROM {self.session_name}") self.conn.commit() def close(self): self.conn.close() def __del__(self): self.close() def delete_table(self): cursor = self.conn.cursor() try: cursor.execute(f"DROP TABLE IF EXISTS {self.session_name}") self.conn.commit() logger.info(f"Table '{self.session_name}' has been deleted.") except sqlite3.Error as e: logger.error(f"Error deleting table '{self.session_name}': {e}") finally: cursor.close() def get_latest_dataframe( self, decision: Literal["plot", "analyse", "query", "followup"] ) -> dict: contents = self.get(limit=1) if not contents: return {} _, content = contents[0] if decision == "plot" and content.plot_input_dataframe: return content.plot_input_dataframe elif decision == "analyse" and content.analysis_input_dataframe: return content.analysis_input_dataframe elif decision in ("query", "followup") and content.sql_output_dataframe: return content.sql_output_dataframe return {} ================================================ FILE: premsql/agents/models.py ================================================ from datetime import datetime from typing import Dict, Literal, Optional import pandas as pd from pydantic import BaseModel, Field from premsql.logger import setup_console_logger logger = setup_console_logger("[BASE-MODELS]") class BaseWorkerOutput(BaseModel): """Base model for worker outputs with common fields.""" question: str error_from_model: Optional[str] = None additional_input: Optional[Dict] = Field( default=None, description="Additional input data" ) class Text2SQLWorkerOutput(BaseWorkerOutput): """Output model for Text2SQL worker.""" db_connection_uri: str sql_string: str sql_reasoning: Optional[str] = None input_dataframe: Optional[Dict] = None output_dataframe: Optional[Dict] = None def show_output_dataframe(self) -> pd.DataFrame: if self.output_dataframe: return pd.DataFrame( self.output_dataframe["data"], columns=self.output_dataframe["columns"] ) return pd.DataFrame() class AnalyserWorkerOutput(BaseWorkerOutput): """Output model for Analyser worker.""" analysis: str input_dataframe: Optional[Dict] = None analysis_reasoning: Optional[str] = None class ChartPlotWorkerOutput(BaseWorkerOutput): """Output model for ChartPlot worker.""" input_dataframe: Optional[Dict] = None plot_config: Optional[Dict] = None image_plot: Optional[str] = None plot_reasoning: Optional[str] = None output_dataframe: Optional[Dict] = None class RouterWorkerOutput(BaseWorkerOutput): """Output model for Router worker.""" route_to: Literal["followup", "plot", "analyse", "query"] input_dataframe: Optional[Dict] = None decision_reasoning: Optional[str] = None # This is a more of a custom worker class FollowupWorkerOutput(BaseWorkerOutput): """Output model for Followup worker.""" route_taken: Literal["followup", "plot", "analyse", "query"] suggestion: str alternative_route: Optional[Literal["followup", "plot", "analyse", "query"]] = None class ExitWorkerOutput(BaseModel): """Output model for Exit worker, combining results from all workers.""" session_name: str question: str db_connection_uri: str route_taken: Literal["plot", "analyse", "query", "followup"] # Text2SQL fields sql_string: Optional[str] = None sql_reasoning: Optional[str] = None sql_input_dataframe: Optional[Dict] = None sql_output_dataframe: Optional[Dict] = None error_from_sql_worker: Optional[str] = None # Analysis worker fields analysis: Optional[str] = None analysis_reasoning: Optional[str] = None analysis_input_dataframe: Optional[Dict] = None error_from_analysis_worker: Optional[str] = None # Plot Worker fields plot_config: Optional[Dict] = None plot_input_dataframe: Optional[Dict] = None plot_output_dataframe: Optional[Dict] = None image_to_plot: Optional[str] = None plot_reasoning: Optional[str] = None error_from_plot_worker: Optional[str] = None # Followup Worker fields followup_route_to_take: Optional[ Literal["plot", "analyse", "query", "followup"] ] = None followup_suggestion: Optional[str] = None error_from_followup_worker: Optional[str] = None # Additional input additional_input: Optional[Dict] = Field( default=None, description="Additional input data" ) def show_output_dataframe( self, ) -> pd.DataFrame: dataframe = None if self.route_taken == "query": dataframe = self.sql_output_dataframe elif self.route_taken == "plot": dataframe = self.plot_output_dataframe elif self.route_taken == "analyse": dataframe = self.analysis_input_dataframe if dataframe: return pd.DataFrame(dataframe["data"], columns=dataframe["columns"]) return pd.DataFrame() class AgentOutput(BaseModel): """Final output model for the entire pipeline.""" session_name: str question: str db_connection_uri: str route_taken: Literal["plot", "analyse", "query", "followup"] input_dataframe: Optional[Dict] = None output_dataframe: Optional[Dict] = None sql_string: Optional[str] = None analysis: Optional[str] = None reasoning: Optional[str] = None plot_config: Optional[Dict] = None image_to_plot: Optional[str] = None followup_route: Optional[Literal["plot", "analyse", "query", "followup"]] = None followup_suggestion: Optional[str] = None error_from_pipeline: Optional[str] = None created_at: datetime = Field(default_factory=datetime.now) def show_output_dataframe( self, ) -> pd.DataFrame: dataframe = None if self.route_taken == "query": dataframe = self.sql_output_dataframe elif self.route_taken == "plot": dataframe = self.plot_output_dataframe elif self.route_taken == "analyse": dataframe = self.analysis_input_dataframe if dataframe: return pd.DataFrame(dataframe["data"], columns=dataframe["columns"]) return pd.DataFrame() ================================================ FILE: premsql/agents/router.py ================================================ from typing import Optional import pandas as pd from premsql.logger import setup_console_logger from premsql.agents.base import RouterWorkerBase, RouterWorkerOutput from premsql.agents.utils import convert_df_to_dict logger = setup_console_logger("[BASELINE-ROUTER]") class SimpleRouterWorker(RouterWorkerBase): def run( self, question: str, input_dataframe: Optional[pd.DataFrame] ) -> RouterWorkerOutput: if question.startswith("/query"): route_to = "query" elif question.startswith("/analyse"): route_to = "analyse" elif question.startswith("/plot"): route_to = "plot" else: route_to = "followup" logger.info(f"Routing to: {route_to}") question = ( question.split(f"/{route_to}")[1] if route_to != "followup" else question ) return RouterWorkerOutput( question=question, route_to=route_to, input_dataframe=( convert_df_to_dict(df=input_dataframe) if input_dataframe else None ), decision_reasoning="Simple routing based on question prefix", additional_input={}, error_from_model=None, ) ================================================ FILE: premsql/agents/tools/__init__.py ================================================ from premsql.agents.tools.plot.matplotlib_tool import SimpleMatplotlibTool __all__ = ["SimpleMatplotlibTool"] ================================================ FILE: premsql/agents/tools/plot/base.py ================================================ import base64 import io from abc import ABC, abstractmethod import pandas as pd from PIL import Image class BasePlotTool(ABC): @abstractmethod def run(self, data: pd.DataFrame, plot_config: dict): raise NotImplementedError() @abstractmethod def convert_plot_to_image(self, fig): raise NotImplementedError def convert_image_to_base64(self, image: Image.Image) -> str: buffered = io.BytesIO() image.save(buffered, format="PNG") return base64.b64encode(buffered.getvalue()).decode() def save_image(self, image: Image.Image, file_path: str, format: str = "PNG"): image.save(file_path, format=format) def plot_from_base64(self, output_base64: str): image_data = base64.b64decode(output_base64) return Image.open(io.BytesIO(image_data)) ================================================ FILE: premsql/agents/tools/plot/matplotlib_tool.py ================================================ import io from typing import Callable, Dict import matplotlib.pyplot as plt import pandas as pd from matplotlib.axes import Axes from matplotlib.figure import Figure from PIL import Image from premsql.logger import setup_console_logger from premsql.agents.tools.plot.base import BasePlotTool logger = setup_console_logger("[MATPLOTLIB-TOOL]") class SimpleMatplotlibTool(BasePlotTool): def __init__(self): self.plot_functions: Dict[ str, Callable[[pd.DataFrame, str, str, Axes], None] ] = { "area": self._area_plot, "bar": self._bar_plot, "scatter": self._scatter_plot, "histogram": self._histogram_plot, "line": self._line_plot, } def run(self, data: pd.DataFrame, plot_config: Dict[str, str]) -> Figure: try: self._validate_config(data, plot_config) plot_type = plot_config["plot_type"] x = plot_config["x"] y = plot_config["y"] fig, ax = plt.subplots(figsize=(10, 6)) self.plot_functions[plot_type](data, x, y, ax) plt.title(f"{plot_type.capitalize()} Plot: {x} vs {y}") plt.xlabel(x) plt.ylabel(y) plt.tight_layout() return fig except Exception as e: logger.error(f"Error creating plot: {str(e)}") return plt.figure() # Return an empty figure on error def _validate_config(self, df: pd.DataFrame, plot_config: Dict[str, str]) -> None: required_keys = ["plot_type", "x", "y"] missing_keys = [key for key in required_keys if key not in plot_config] if missing_keys: raise ValueError( f"Missing required keys in plot_config: {', '.join(missing_keys)}" ) if plot_config["x"] not in df.columns: raise ValueError(f"Column '{plot_config['x']}' not found in DataFrame") if plot_config["y"] not in df.columns: raise ValueError(f"Column '{plot_config['y']}' not found in DataFrame") if plot_config["plot_type"] not in self.plot_functions: raise ValueError(f"Unsupported plot type: {plot_config['plot_type']}") def _area_plot(self, df: pd.DataFrame, x: str, y: str, ax: Axes) -> None: ax.fill_between(df[x], df[y]) def _bar_plot(self, df: pd.DataFrame, x: str, y: str, ax: Axes) -> None: ax.bar(df[x], df[y]) def _scatter_plot(self, df: pd.DataFrame, x: str, y: str, ax: Axes) -> None: ax.scatter(df[x], df[y]) def _histogram_plot(self, df: pd.DataFrame, x: str, y: str, ax: Axes) -> None: ax.hist(df[x], bins=20) def _line_plot(self, df: pd.DataFrame, x: str, y: str, ax: Axes) -> None: ax.plot(df[x], df[y]) def convert_plot_to_image(self, fig: Figure) -> Image.Image: buf = io.BytesIO() fig.savefig(buf, format="png") buf.seek(0) return Image.open(buf) ================================================ FILE: premsql/agents/utils.py ================================================ from typing import Any, Dict, Literal import pandas as pd from premsql.executors.from_langchain import SQLDatabase from premsql.logger import setup_console_logger from premsql.agents.models import AgentOutput, ExitWorkerOutput logger = setup_console_logger("[PIPELINE-UTILS]") def convert_df_to_dict(df: pd.DataFrame): return {"data": df.to_dict(), "columns": list(df.keys())} def execute_and_render_result( db: SQLDatabase, sql: str, using: Literal["dataframe", "json"] ): result = db.run_no_throw(command=sql, fetch="cursor") if isinstance(result, str): return _render_error(result, sql, using) return _render_data(result, sql, using) def _render_error(error: str, sql: str, using: str) -> Dict[str, Any]: to_show = {"sql_string": sql, "error_from_model": error, "dataframe": None} if using == "dataframe": to_show["dataframe"] = pd.DataFrame() # empty DataFrame elif using == "json": to_show["dataframe"] = {"data": {}, "columns": []} # empty JSON structure return to_show def _render_data(result, sql: str, using: str) -> Dict[str, Any]: table = pd.DataFrame(data=result.fetchall(), columns=result.keys()) if len(table) > 200: logger.info("Truncating output table to first 200 rows only") table = table.iloc[:200, :] if any(table.columns.duplicated()): logger.info(f"Found duplicate columns: {table.columns[table.columns.duplicated()].tolist()}") # Create unique column names by adding suffixes table.columns = [f"{col}_{i}" if i > 0 else col for i, col in enumerate(table.columns)] logger.info(f"Renamed columns to: {table.columns.tolist()}") to_show = {"sql_string": sql, "error_from_model": None, "dataframe": table} if using == "json": to_show["dataframe"] = {"columns": list(table.columns), "data": table.to_dict()} return to_show def convert_exit_output_to_agent_output(exit_output: ExitWorkerOutput) -> AgentOutput: return AgentOutput( session_name=exit_output.session_name, question=exit_output.question, db_connection_uri=exit_output.db_connection_uri, route_taken=exit_output.route_taken, input_dataframe=exit_output.sql_input_dataframe or exit_output.analysis_input_dataframe or exit_output.plot_input_dataframe, output_dataframe=exit_output.sql_output_dataframe or exit_output.plot_output_dataframe, sql_string=exit_output.sql_string, analysis=exit_output.analysis, reasoning=exit_output.sql_reasoning or exit_output.analysis_reasoning or exit_output.plot_reasoning, plot_config=exit_output.plot_config, image_to_plot=exit_output.image_to_plot, followup_route=exit_output.followup_route_to_take, followup_suggestion=exit_output.followup_suggestion, error_from_pipeline=( exit_output.error_from_sql_worker or exit_output.error_from_analysis_worker or exit_output.error_from_plot_worker or exit_output.error_from_followup_worker ), ) ================================================ FILE: premsql/cli.py ================================================ import os import subprocess import sys from pathlib import Path import click @click.group() @click.version_option() def cli(): """PremSQL CLI to manage API servers and Streamlit app""" pass @cli.group() def launch(): """Launch PremSQL services""" pass @launch.command(name='all') def launch_all(): """Launch both API server and Streamlit app""" premsql_path = Path(__file__).parent.parent.absolute() env = os.environ.copy() env["PYTHONPATH"] = str(premsql_path) # Start API server manage_py_path = premsql_path / "premsql" / "playground" / "backend" / "manage.py" if not manage_py_path.exists(): click.echo(f"Error: manage.py not found at {manage_py_path}", err=True) sys.exit(1) # Run migrations first click.echo("Running database migrations...") try: subprocess.run([sys.executable, str(manage_py_path), "makemigrations"], env=env, check=True) subprocess.run([sys.executable, str(manage_py_path), "migrate"], env=env, check=True) except subprocess.CalledProcessError as e: click.echo(f"Error running migrations: {e}", err=True) sys.exit(1) click.echo("Starting the PremSQL backend API server...") subprocess.Popen([sys.executable, str(manage_py_path), "runserver"], env=env) # Launch the streamlit app click.echo("Starting the PremSQL Streamlit app...") main_py_path = premsql_path / "premsql" / "playground" / "frontend" / "main.py" if not main_py_path.exists(): click.echo(f"Error: main.py not found at {main_py_path}", err=True) sys.exit(1) cmd = [sys.executable, "-m", "streamlit", "run", str(main_py_path), "--server.maxUploadSize=500"] try: subprocess.run(cmd, env=env, check=True) except KeyboardInterrupt: click.echo("Stopping all services...") stop() @launch.command(name='api') def launch_api(): """Launch only the API server""" premsql_path = Path(__file__).parent.parent.absolute() env = os.environ.copy() env["PYTHONPATH"] = str(premsql_path) manage_py_path = premsql_path / "premsql" / "playground" / "backend" / "manage.py" if not manage_py_path.exists(): click.echo(f"Error: manage.py not found at {manage_py_path}", err=True) sys.exit(1) # Run makemigrations click.echo("Running database migrations...") try: subprocess.run([sys.executable, str(manage_py_path), "makemigrations"], env=env, check=True) subprocess.run([sys.executable, str(manage_py_path), "migrate"], env=env, check=True) except subprocess.CalledProcessError as e: click.echo(f"Error running migrations: {e}", err=True) sys.exit(1) click.echo("Starting the PremSQL backend API server...") cmd = [sys.executable, str(manage_py_path), "runserver"] try: subprocess.run(cmd, env=env, check=True) except KeyboardInterrupt: click.echo("API server stopped.") @cli.command() def stop(): """Stop all PremSQL services""" click.echo("Stopping all PremSQL services...") try: if sys.platform == "win32": subprocess.run( ["taskkill", "/F", "/IM", "python.exe", "/FI", "WINDOWTITLE eq premsql*"], check=True, ) else: subprocess.run(["pkill", "-f", "manage.py runserver"], check=True) subprocess.run(["pkill", "-f", "streamlit"], check=True) click.echo("All services stopped successfully.") except subprocess.CalledProcessError: click.echo("No running services found.") except Exception as e: click.echo(f"Error stopping services: {e}", err=True) sys.exit(1) if __name__ == "__main__": cli() ================================================ FILE: premsql/datasets/__init__.py ================================================ from pathlib import Path from typing import Optional, Union from premsql.datasets.base import StandardDataset, Text2SQLBaseDataset from premsql.datasets.real.bird import BirdDataset from premsql.datasets.real.domains import DomainsDataset from premsql.datasets.real.spider import SpiderUnifiedDataset from premsql.datasets.synthetic.gretel import GretelAIDataset from premsql.utils import get_accepted_filters class Text2SQLDataset: def __init__( self, dataset_name: str, split: str, dataset_folder: Optional[Union[str, Path]] = "./data", hf_token: Optional[str] = None, force_download: Optional[bool] = False, **kwargs ): assert dataset_name in ["bird", "domains", "spider", "gretel"], ValueError( "Dataset should be one of bird, domains, spider, gretel" ) dataset_mapping = { "bird": BirdDataset, "domains": DomainsDataset, "spider": SpiderUnifiedDataset, "gretel": GretelAIDataset, } self._text2sql_dataset: Text2SQLBaseDataset = dataset_mapping[dataset_name]( split=split, dataset_folder=dataset_folder, hf_token=hf_token, force_download=force_download, **kwargs ) @property def raw_dataset(self): return self._text2sql_dataset.dataset @property def filter_availables(self): return get_accepted_filters(data=self._text2sql_dataset.dataset) def setup_dataset( self, filter_by: tuple | None = None, num_rows: int | None = None, num_fewshot: int | None = None, model_name_or_path: str | None = None, prompt_template: str | None = None, tokenize: bool | None = False ): return self._text2sql_dataset.setup_dataset( filter_by=filter_by, num_rows=num_rows, model_name_or_path=model_name_or_path, tokenize=tokenize, prompt_template=prompt_template, num_fewshot=num_fewshot ) __all__ = [ "StandardDataset", "GretelAIDataset", "SpiderUnifiedDataset", "BirdDataset", "DomainsDataset", "Text2SQLDataset", ] ================================================ FILE: premsql/datasets/base.py ================================================ import json import os import sqlite3 from abc import ABC, abstractmethod from copy import deepcopy from pathlib import Path from typing import Optional, Sequence, Union from tqdm.auto import tqdm from premsql.logger import setup_console_logger from premsql.prompts import BASE_TEXT2SQL_PROMPT from premsql.utils import ( filter_options, get_accepted_filters, get_random_few_shot_prompts, tokenize_fn, ) logger = setup_console_logger(name="[DATASET]") try: import torch from transformers import AutoTokenizer except ImportError: logger.warn("Ensure transformers and torch. Install using: pip install torch transformers") IGNORE_INDEX = -100 class Text2SQLBaseInstance: def __init__(self, dataset: list[dict]) -> None: assert "question" in dataset[0], "question is required" assert "SQL" in dataset[0], "sql is required" assert "db_path" in dataset[0], "db_path is required" assert "db_id" in dataset[0], "db_id is required" self.dataset = dataset def __repr__(self) -> str: return str(json.dumps(self.dataset[:3], indent=4)) def __len__(self) -> int: return len(self.dataset) def __getitem__(self, idx: int) -> dict: return dict(**self.dataset[idx]) def schema_prompt(self, db_path: str) -> str: schemas = {} conn = sqlite3.connect(db_path) cursor = conn.cursor() cursor.execute("SELECT name FROM sqlite_master WHERE type='table';") tables = cursor.fetchall() for table in tables: table_name = table[0] if table_name == "sqlite_sequence": continue cursor.execute( f"SELECT sql FROM sqlite_master WHERE type='table' AND name='{table_name}';" ) create_table_sql = cursor.fetchone() if create_table_sql: schemas[table_name] = create_table_sql[0] else: schemas[table_name] = "Schema does not exist" schema_prompt = "\n".join( schemas[table[0]] for table in tables if table[0] != "sqlite_sequence" ) return schema_prompt def additional_prompt(self, prompt: Optional[str] = None): return "" if prompt is None else f"# Additional Knowledge:\n{prompt}" def add_few_shot_examples(self, db_id: str, k: int = 3) -> str: assert k > 0, "k should be greater than 0" db_fewshot_prompts_map = get_random_few_shot_prompts( dataset=self.dataset, num_few_shot=k ) return db_fewshot_prompts_map[db_id] def apply_prompt( self, num_fewshot: Optional[int] = None, prompt_template: Optional[str] = None, ): prompt_template = ( BASE_TEXT2SQL_PROMPT if prompt_template is None else prompt_template ) for blob in tqdm(self.dataset, total=len(self.dataset), desc="Applying prompt"): few_shot_prompt = ( "" if num_fewshot is None else self.add_few_shot_examples(db_id=blob["db_id"], k=num_fewshot) ) final_prompt = prompt_template.format( schemas=self.schema_prompt(blob["db_path"]), additional_knowledge=( "" if "knowledge" not in blob else self.additional_prompt(blob["knowledge"]) ), few_shot_examples=few_shot_prompt, question=blob["question"], ) blob["prompt"] = final_prompt return self.dataset class SupervisedDatasetForTraining(torch.utils.data.Dataset): @classmethod def load_from_pth(cls, dataset_path: Union[str, Path]): dataset_path = str(dataset_path) dataset_dict = torch.load(dataset_path) assert "input_ids" in dataset_dict[0], "input_ids is required" assert "labels" in dataset_dict[0], "labels is required" assert "raw" in dataset_dict[0], "raw is required" return cls( dataset=dataset_dict, model_name_or_path=None, hf_token=None, ) def __init__( self, dataset: dict, model_name_or_path: Optional[str] = None, tokenize: Optional[bool] = False, hf_token: Optional[str] = None, ): assert "prompt" in dataset[0], "key prompt is required" assert "SQL" in dataset[0], "key SQL is required" self.is_tokenized = False if model_name_or_path is not None and tokenize: self.tokenizer = AutoTokenizer.from_pretrained( pretrained_model_name_or_path=model_name_or_path, padding_side="right", token=hf_token, ) self.dataset = dataset if self.tokenizer.chat_template: for content in self.dataset: content["prompt"] = self.tokenizer.apply_chat_template( [{"role": "user", "content": content["prompt"]}], tokenize=False ) logger.info("Casted dataset with model chat template") logger.info("Starting Tokenization ...") sources, targets = [], [] for example in self.dataset: sources.append(example["prompt"]) targets.append(f"{example['SQL']}{self.tokenizer.eos_token}") data_dict = self.preprocess(sources=sources, targets=targets) self.input_ids = data_dict["input_ids"] self.labels = data_dict["labels"] self.is_tokenized = True elif "input_ids" in dataset[0] and "labels" in dataset[0]: self.dataset = dataset self.input_ids = dataset["input_ids"] self.labels = dataset["labels"] self.is_tokenized = True elif model_name_or_path is not None and not tokenize: self.tokenizer = AutoTokenizer.from_pretrained( pretrained_model_name_or_path=model_name_or_path, padding_side="right", token=hf_token, ) self.dataset = dataset if self.tokenizer.chat_template: for content in self.dataset: content["prompt"] = self.tokenizer.apply_chat_template( [{"role": "user", "content": content["prompt"]}], tokenize=False ) logger.info("Casted dataset with model chat template") else: self.dataset = dataset def preprocess(self, sources: Sequence[str], targets: Sequence[str]): examples = [s + t for s, t in zip(sources, targets)] examples_tokenized, sources_tokenized = [ tokenize_fn(strings, self.tokenizer) for strings in (examples, sources) ] input_ids = examples_tokenized["input_ids"] labels = deepcopy(input_ids) for label, source_len in zip(labels, sources_tokenized["input_ids_lens"]): label[:source_len] = IGNORE_INDEX return dict(input_ids=input_ids, labels=labels) def __len__(self): return len(self.dataset) def __getitem__(self, idx: int): if self.is_tokenized: return dict( input_ids=self.input_ids[idx], labels=self.labels[idx], raw=dict(**self.dataset[idx]), ) else: return dict(**self.dataset[idx]) def save_tokenized_dataset(self, path_to_save: Union[str, Path]): torch.save(self.dataset, str(path_to_save)) logger.info("Dataset saved successfully in {}".format(path_to_save)) class Text2SQLBaseDataset(ABC): def __init__( self, split: str, dataset_path: Union[str, Path], database_folder_name: str, json_file_name: str, hf_token: Optional[str] = None, ): self.dataset_path = Path(dataset_path) self.database_folder_name = database_folder_name self.dataset = json.load(open(self.dataset_path / json_file_name, "r")) assert split in ["train", "validation", "test"], ValueError( "Split should be either train or validation" ) self.split = split self.hf_token = hf_token if hf_token else os.environ.get("HF_TOKEN", None) @property def raw_dataset(self): return self._text2sql_dataset.dataset @property def filter_availables(self): return get_accepted_filters(data=self.dataset) @abstractmethod def setup_dataset( self, filter_by: Optional[tuple] = None, num_rows: Optional[int] = None, num_fewshot: Optional[int] = None, model_name_or_path: Optional[str] = None, tokenize: Optional[bool] = False, prompt_template: Optional[str] = BASE_TEXT2SQL_PROMPT, ): for content in self.dataset: content["db_path"] = str( self.dataset_path / f"{self.database_folder_name}" / content["db_id"] / f"{content['db_id']}.sqlite" ) if filter_by: self.dataset = filter_options(data=self.dataset, filter_by=filter_by) if num_rows: self.dataset = self.dataset[:num_rows] self.dataset = Text2SQLBaseInstance(dataset=self.dataset).apply_prompt( num_fewshot=num_fewshot, prompt_template=prompt_template ) return SupervisedDatasetForTraining( dataset=self.dataset, model_name_or_path=model_name_or_path, hf_token=self.hf_token, tokenize=tokenize ) def __len__(self): return len(self.dataset) def __getitem__(self, idx): return dict(**self.dataset[idx]) class StandardDataset(Text2SQLBaseDataset): def __init__( self, split: str, dataset_path: Union[str, Path], database_folder_name: str, json_file_name: str, hf_token: Optional[str] = None, ): super().__init__( split=split, dataset_path=dataset_path, database_folder_name=database_folder_name, json_file_name=json_file_name, hf_token=hf_token, ) def setup_dataset( self, filter_by: tuple | None = None, num_rows: int | None = None, num_fewshot: int | None = None, model_name_or_path: str | None = None, prompt_template: str | None = None, tokenize: bool | None = False ): logger.info("Setting up Dataset") return super().setup_dataset( filter_by=filter_by, num_rows=num_rows, model_name_or_path=model_name_or_path, tokenize=tokenize, prompt_template=prompt_template, num_fewshot=num_fewshot ) ================================================ FILE: premsql/datasets/collator.py ================================================ from dataclasses import dataclass from typing import Sequence from premsql.logger import setup_console_logger logger = setup_console_logger("[DATASET-COLLATOR]") try: import torch import transformers except ImportError: logger.warn("Ensure torch and transformers are installed.") logger.warn("Install them by: pip install torch transformers") @dataclass class DataCollatorForSupervisedDataset: tokenizer: "transformers.PreTrainedTokenizer" def __call__(self, instances: Sequence[dict]) -> dict[str, torch.Tensor]: input_ids, labels = tuple( [instance[key] for instance in instances] for key in ("input_ids", "labels") ) input_ids = torch.nn.utils.rnn.pad_sequence( input_ids, batch_first=True, padding_value=self.tokenizer.pad_token_id, ) labels = torch.nn.utils.rnn.pad_sequence( labels, batch_first=True, padding_value=-100 ) return dict( input_ids=input_ids, labels=labels, attention_mask=input_ids.ne(self.tokenizer.pad_token_id), ) ================================================ FILE: premsql/datasets/error_dataset.py ================================================ import json from pathlib import Path from typing import Optional, Sequence from tqdm.auto import tqdm from premsql.datasets.base import ( SupervisedDatasetForTraining, Text2SQLBaseDataset, Text2SQLBaseInstance, ) from premsql.evaluator.base import BaseExecutor, Text2SQLEvaluator from premsql.generators.base import Text2SQLGeneratorBase from premsql.logger import setup_console_logger from premsql.prompts import ERROR_HANDLING_PROMPT logger = setup_console_logger("[ERROR-HANDLING-DATASET]") class ErrorDatasetInstance(Text2SQLBaseInstance): def __init__(self, dataset: list[dict]) -> None: super().__init__(dataset=dataset) def apply_prompt(self, prompt_template: Optional[str] = ERROR_HANDLING_PROMPT): data_to_return = [] for content in tqdm( self.dataset, total=len(self.dataset), desc="Applying error prompt" ): assert "error" in content, "key error is not present" error_msg = content["error"] if error_msg is not None: prompt = content["prompt"].split("# SQL:")[0].strip() prediction = content["generated"] error_prompt = prompt_template.format( existing_prompt=prompt, sql=prediction, error_msg=error_msg ) data_to_return.append( { "db_id": content["db_id"], "question": content["question"], "SQL": content["SQL"], "prompt": error_prompt, "db_path": content["db_path"], } ) return data_to_return class ErrorDatasetGenerator: @classmethod def from_existing( cls, experiment_name: str, experiment_folder: Optional[str] = None, tokenize_model_name_or_path: Optional[str] = None, hf_token: Optional[str] = None, ) -> dict: experiment_folder = Path("./experiments") or Path(experiment_folder) experiment_path = ( experiment_folder / "train" / experiment_name / "error_dataset.json" ) if not experiment_path.exists(): raise FileNotFoundError(f"Path {experiment_path} does not exists") dataset = json.load(open(experiment_path, "r")) return ( ErrorDatasetInstance(dataset=dataset) if not tokenize_model_name_or_path else SupervisedDatasetForTraining( dataset=dataset, model_name_or_path=tokenize_model_name_or_path, hf_token=hf_token, ) ) def __init__( self, generator: Text2SQLGeneratorBase, executor: BaseExecutor, ): self.generator = generator self.evaluator = Text2SQLEvaluator( executor=executor, experiment_path=self.generator.experiment_path ) def generate_and_save( self, datasets: Sequence[Text2SQLBaseDataset], path_to_save: Optional[str] = None, force: Optional[bool] = False, tokenize: Optional[bool] = False, prompt_template: Optional[str] = ERROR_HANDLING_PROMPT, hf_token: Optional[str] = None, ) -> None: path_to_save = ( (self.generator.experiment_path / "error_dataset.json") if path_to_save is None else Path(path_to_save) ) if path_to_save.exists() and force == False: logger.info("Error dataset already exists") with open(path_to_save, "r") as json_file: data_to_return = json.load(json_file) return data_to_return responses = self.generator.generate_and_save_results( dataset=datasets, temperature=0.1, max_new_tokens=256, force=force ) logger.info("Starting Evaluation") _ = self.evaluator.execute( metric_name="accuracy", model_responses=responses, ) del responses # Now iterate over the error dataset with open(self.generator.experiment_path / "predict.json", "r") as file: error_dataset = json.load(file) error_instances = ErrorDatasetInstance(dataset=error_dataset).apply_prompt( prompt_template=prompt_template ) with open(path_to_save, "w") as json_file: json.dump(error_instances, json_file, indent=4) return ( error_instances if not tokenize else SupervisedDatasetForTraining( dataset=error_instances, model_name_or_path=self.generator.model_name_or_path, hf_token=hf_token, ) ) ================================================ FILE: premsql/datasets/real/bird.py ================================================ from pathlib import Path from typing import Optional, Union from huggingface_hub import snapshot_download from premsql.datasets.base import Text2SQLBaseDataset from premsql.logger import setup_console_logger logger = setup_console_logger("[BIRD-DATASET]") class BirdDataset(Text2SQLBaseDataset): def __init__( self, split: str, dataset_folder: Optional[Union[str, Path]] = "./data", hf_token: Optional[str] = None, force_download: Optional[bool] = False, **kwargs ): dataset_folder = Path(dataset_folder) bird_folder = dataset_folder / "bird" if not bird_folder.exists() or force_download: bird_folder.mkdir(parents=True, exist_ok=True) # Download it from hf hub snapshot_download( repo_id="premai-io/birdbench", repo_type="dataset", local_dir=dataset_folder / "bird", force_download=force_download, ) dataset_path = bird_folder / split database_folder_name = kwargs.get("database_folder_name", None) or ( "train_databases" if split == "train" else "dev_databases" ) json_file_name = kwargs.get("json_file_name", None) or ( "train.json" if split == "train" else "validation.json" ) super().__init__( split=split, dataset_path=dataset_path, database_folder_name=database_folder_name, json_file_name=json_file_name, hf_token=hf_token, ) logger.info("Loaded Bird Dataset") def setup_dataset( self, filter_by: tuple | None = None, num_rows: int | None = None, num_fewshot: int | None = None, model_name_or_path: str | None = None, prompt_template: str | None = None, tokenize: bool | None = False ): logger.info("Setting up Bird Dataset") return super().setup_dataset( filter_by=filter_by, num_rows=num_rows, model_name_or_path=model_name_or_path, tokenize=tokenize, prompt_template=prompt_template, num_fewshot=num_fewshot ) ================================================ FILE: premsql/datasets/real/domains.py ================================================ from pathlib import Path from typing import Optional, Union from huggingface_hub import snapshot_download from premsql.datasets.base import Text2SQLBaseDataset from premsql.logger import setup_console_logger logger = setup_console_logger("[DOMAINS-DATASET]") class DomainsDataset(Text2SQLBaseDataset): def __init__( self, split: str, dataset_folder: Optional[Union[str, Path]] = "./data", hf_token: Optional[str] = None, force_download: Optional[bool] = False, ): dataset_folder = Path(dataset_folder) domains_folder = dataset_folder / "domains" if not domains_folder.exists() or force_download: domains_folder.mkdir(parents=True, exist_ok=True) # Download it from hf hub snapshot_download( repo_id="premai-io/domains", repo_type="dataset", local_dir=dataset_folder / "domains", force_download=force_download, ) assert split in ["train", "validation"], ValueError( "Split should be either train or validation" ) json_file_name = "train.json" if split == "train" else "validation.json" super().__init__( split=split, dataset_path=domains_folder, database_folder_name="databases", json_file_name=json_file_name, hf_token=hf_token, ) logger.info("Loaded Domains Dataset") # An extra step for Domains Dataset so that it can be # compatible with the Base dataset and Base instance for content in self.dataset: content["SQL"] = content["query"] def setup_dataset( self, filter_by: tuple | None = None, num_rows: int | None = None, num_fewshot: int | None = None, model_name_or_path: str | None = None, prompt_template: str | None = None, tokenize: bool | None = False ): logger.info("Setting up Domains Dataset") return super().setup_dataset( filter_by=filter_by, num_rows=num_rows, model_name_or_path=model_name_or_path, tokenize=tokenize, prompt_template=prompt_template, num_fewshot=num_fewshot ) ================================================ FILE: premsql/datasets/real/spider.py ================================================ from pathlib import Path from typing import Optional, Union from huggingface_hub import snapshot_download from premsql.datasets.base import Text2SQLBaseDataset from premsql.logger import setup_console_logger logger = setup_console_logger("[SPIDER-DATASET]") class SpiderUnifiedDataset(Text2SQLBaseDataset): def __init__( self, split: str, dataset_folder: Optional[Union[str, Path]] = "./data", hf_token: Optional[str] = None, force_download: Optional[bool] = False, ): dataset_folder = Path(dataset_folder) spider_folder = dataset_folder / "spider" if not spider_folder.exists() or force_download: spider_folder.mkdir(parents=True, exist_ok=True) # Download it from hf hub snapshot_download( repo_id="premai-io/spider", repo_type="dataset", local_dir=dataset_folder / "spider", force_download=force_download, ) assert split in ["train", "validation"], ValueError( "Split should be either train or validation" ) json_file_name = "train.json" if split == "train" else "validation.json" super().__init__( split=split, dataset_path=spider_folder, database_folder_name="database", json_file_name=json_file_name, hf_token=hf_token, ) logger.info("Loaded Spider Dataset") # An extra step for Spider Dataset so that it can be # compatible with the Base dataset and Base instance for content in self.dataset: content["SQL"] = content["query"] def setup_dataset( self, filter_by: tuple | None = None, num_rows: int | None = None, num_fewshot: int | None = None, model_name_or_path: str | None = None, prompt_template: str | None = None, tokenize: bool | None = False ): logger.info("Setting up Spider Dataset") return super().setup_dataset( filter_by=filter_by, num_rows=num_rows, model_name_or_path=model_name_or_path, tokenize=tokenize, prompt_template=prompt_template, num_fewshot=num_fewshot ) ================================================ FILE: premsql/datasets/synthetic/gretel.py ================================================ from pathlib import Path from typing import Optional, Union from datasets import load_dataset from tqdm.auto import tqdm from premsql.datasets.base import ( SupervisedDatasetForTraining, Text2SQLBaseDataset, Text2SQLBaseInstance, ) from premsql.logger import setup_console_logger from premsql.prompts import BASE_TEXT2SQL_PROMPT from premsql.utils import filter_options, save_to_json logger = setup_console_logger("[GRETELAI-DATASET]") class GretelAIInstance(Text2SQLBaseInstance): def __init__(self, dataset: list[dict]) -> None: super().__init__(dataset) def apply_prompt( self, num_fewshot: Optional[int] = None, prompt_template: Optional[str] = BASE_TEXT2SQL_PROMPT, ): prompt_template = ( BASE_TEXT2SQL_PROMPT if prompt_template is None else prompt_template ) for blob in tqdm(self.dataset, total=len(self.dataset), desc="Applying prompt"): few_shot_prompt = ( "" if num_fewshot is None else self.add_few_shot_examples(db_id=blob["db_id"], k=num_fewshot) ) final_prompt = prompt_template.format( schemas=blob["context"], additional_knowledge="", few_shot_examples=few_shot_prompt, question=blob["question"], ) blob["prompt"] = final_prompt return self.dataset class GretelAIDataset(Text2SQLBaseDataset): def __init__( self, split: Optional[str] = "train", dataset_folder: Optional[Union[str, Path]] = "./data", hf_token: Optional[str] = None, force_download: Optional[bool] = False, ): dataset_folder = Path(dataset_folder) dataset_path = dataset_folder / "gretel" if not dataset_path.exists() or force_download: dataset_path.mkdir(parents=True, exist_ok=True) dataset = [] raw_dataset = load_dataset("gretelai/synthetic_text_to_sql", token=hf_token) for split in ["train", "test"]: for content in raw_dataset[split]: blob_content = { "id": content["id"], "question": content["sql_prompt"], "schema": content["sql_context"], "SQL": content["sql"], "context": content["sql_context"], "task_type": content["sql_task_type"], "complexity": content["sql_complexity"], "db_id": content["domain"], "db_path": None, } dataset.append(blob_content) save_to_json(save_path=dataset_path / "train.json", json_object=dataset) super().__init__( split="train", dataset_path=dataset_path, database_folder_name=None, json_file_name="train.json", ) def setup_dataset( self, filter_by: Optional[tuple] = None, num_rows: Optional[int] = None, num_fewshot: Optional[int] = None, model_name_or_path: Optional[str] = None, prompt_template: Optional[str] = BASE_TEXT2SQL_PROMPT, ): if filter_by: self.dataset = filter_options(data=self.dataset, filter_by=filter_by) if num_rows: self.dataset = self.dataset[:num_rows] self.dataset = GretelAIInstance(dataset=self.dataset).apply_prompt( num_fewshot=num_fewshot, prompt_template=prompt_template ) return SupervisedDatasetForTraining( dataset=self.dataset, model_name_or_path=model_name_or_path, hf_token=self.hf_token, ) ================================================ FILE: premsql/evaluator/README.md ================================================ ## Evaluators premsql evaluators help you to evaluate your text-to-sql models on various validation datasets. Currently, we support two metrics for evaluation: - Execution Accuracy - Valid Efficiency Score **Execution Accuracy (EX):** From the name, it is clear that the correctness of the LLM is measured by comparing the executed results from the LLM with the ground truth. **Valid Efficiency Score (VES):** The primary objective of LLM-generated SQL queries is to be accurate. However, it also needs to be performance-optimized when dealing with big data. This metric asses both of the objectives. It quantifies how efficient the query is and whether the query is accurate or not. The figure below shows how it is computed. Here is a quick start on how to use evaluators using premsql ```python import json from pathlib import Path from premsql.datasets import Text2SQLDataset from premsql.generators.premai import Text2SQLGeneratorPremAI from premsql.evaluator import Text2SQLEvaluator, SQLiteExecutor # Get the validation dataset dataset = Text2SQLDataset( dataset_name="bird", split="test", database_folder_name="test_databases", json_file_name="test.json", dataset_folder="/root/anindya/Submission/text2sql/data", ).setup_dataset( num_rows=10, num_fewshot=3, ) generator = Text2SQLGeneratorPremAI( model_name="gpt-4o", project_id=1234, premai_api_key="FK-xxxx-xxx-xxx", experiment_name="test_generators", device="cuda:0", type="test" ) executor = SQLiteExecutor() evaluator = Text2SQLEvaluator( executor=executor, experiment_path=experiment_path ) # Calculate Execution Accuracy ex = evaluator.execute( metric_name="accuracy", model_responses=responses, filter_by="difficulty" ) # Similarity calculate Valid Efficiency Score ves = evaluator.execute( metric_name="ves", model_responses=responses, filter_by="difficulty" ) ``` **Output** Here is the output of execution accuracy of different models. ``` Accuracy: --------- +-------------+-------------------+-------------------+ | Category | num_correct (%) | total questions | +=============+===================+===================+ | simple | 58.4865 | 925 | +-------------+-------------------+-------------------+ | moderate | 43.75 | 464 | +-------------+-------------------+-------------------+ | challenging | 42.7586 | 145 | +-------------+-------------------+-------------------+ | overall | 52.5424 | 1534 | +-------------+-------------------+-------------------+ Valid Efficiency Score (VES): ----------------------------- +-------------+-----------+-------------------+ | Category | VES (%) | total questions | +=============+===========+===================+ | simple | 60.1844 | 925 | +-------------+-----------+-------------------+ | moderate | 46.4345 | 464 | +-------------+-----------+-------------------+ | challenging | 43.9845 | 145 | +-------------+-----------+-------------------+ | overall | 54.4941 | 1534 | +-------------+-----------+-------------------+ ``` We have also benchmarked several closed and open-source models. Here are some results for the following models: - gpt-4o - gpt-4o-mini - claude-3.5-sonnet - codellama-70b-instruct - claude-3-opus - llama-3.1-405-instruct **Accuracy** ![accuracy comparison](/assets/Model-Accuracy-Comparison.png) **Valid Efficiency Score** ![ves comparison](/assets/Models-VES-Comparison.png) We have also made a detailed blog about this. If you are more interested in the analysis, you can check out the [blog post here](https://blog.premai.io/text2sql-eval). ================================================ FILE: premsql/evaluator/__init__.py ================================================ from premsql.evaluator.base import Text2SQLEvaluator __all__ = ["Text2SQLEvaluator"] ================================================ FILE: premsql/evaluator/base.py ================================================ import math import traceback from pathlib import Path from typing import Optional, Union from func_timeout import FunctionTimedOut, func_timeout from tqdm.auto import tqdm from premsql.executors.base import BaseExecutor from premsql.utils import save_to_json class Text2SQLEvaluator: def __init__( self, executor: BaseExecutor, experiment_path: Union[str, Path] ) -> None: self.executor = executor self.experiment_path = Path(experiment_path) def _execute_model( self, metric_name: str, generated_sql: str, gold_sql: str, dsn_or_db_path: str, meta_time_out: Optional[int] = 1000, num_iterations: Optional[int] = None, debug: Optional[bool] = False, ): assert metric_name in ["accuracy", "ves"], "Invalid metric name" try: if metric_name == "accuracy": result = func_timeout( meta_time_out, self.executor.match_sqls, args=(generated_sql, gold_sql, dsn_or_db_path), ) elif metric_name == "ves": num_iterations = 10 if num_iterations is None else num_iterations result = func_timeout( meta_time_out, self.executor.iterated_execution, args=(generated_sql, gold_sql, dsn_or_db_path, num_iterations), ) else: raise ValueError(f"Invalid metric name: {metric_name}") return { metric_name: result["result"], "error": result["error"], } except FunctionTimedOut as e: return { metric_name: 0, "error": f"Function Timed out: {e}", } except Exception as e: if debug: traceback.print_exc() return { metric_name: 0, "error": f"Exception: {e}", } def execute( self, metric_name: str, model_responses: list[dict], filter_by: Optional[str] = None, num_iterations: Optional[int] = 10, meta_time_out: Optional[int] = 10, # change it later to 1000 debug: Optional[bool] = False, ) -> dict: data_with_results = [] for response in tqdm(model_responses, total=len(model_responses)): result = self._execute_model( metric_name=metric_name, generated_sql=response["generated"], gold_sql=response["SQL"], dsn_or_db_path=response["db_path"], num_iterations=num_iterations, meta_time_out=meta_time_out, debug=debug, ) data_with_results.append({**response, **result}) execution_result = {} if filter_by: if filter_by not in data_with_results[0]: raise KeyError(f"Filter key: {filter_by} is not found in responses") filter_values = {response[filter_by] for response in data_with_results} total_responses = len(data_with_results) overall_metric = 0.0 for value in filter_values: filtered_responses = [ response for response in data_with_results if response[filter_by] == value ] metric_value = self.compute_metric( results=filtered_responses, metric_name=metric_name ) execution_result[value] = metric_value overall_metric += ( metric_value * len(filtered_responses) / total_responses ) execution_result["overall"] = overall_metric else: execution_result["overall"] = self.compute_metric( results=data_with_results, metric_name=metric_name ) save_to_json( json_object=execution_result, save_path=self.experiment_path / f"{metric_name}.json", ) # also save the data_with_results save_to_json( json_object=data_with_results, save_path=self.experiment_path / "predict.json", ) return execution_result def compute_metric(self, results: list[dict], metric_name: str) -> float: if metric_name == "accuracy": return sum(res["accuracy"] for res in results) / len(results) * 100 elif metric_name == "ves": num_queries = len(results) total_ratio = 0.0 for result in results: total_ratio += math.sqrt(result["ves"]) * 100 ves = total_ratio / num_queries return ves else: raise ValueError(f"Invalid metric name: {metric_name}") ================================================ FILE: premsql/executors/__init__.py ================================================ from premsql.executors.from_langchain import ExecutorUsingLangChain from premsql.executors.from_sqlite import SQLiteExecutor, OptimizedSQLiteExecutor __all__ = ["ExecutorUsingLangChain", "SQLiteExecutor", "OptimizedSQLiteExecutor"] ================================================ FILE: premsql/executors/base.py ================================================ from abc import ABC, abstractmethod import numpy as np class BaseExecutor(ABC): @abstractmethod def execute_sql(self, sql: str, dsn_or_db_path: str) -> dict: return {"result": None, "execution_time": None, "error": None} def match_sqls( self, predicted_sql: str, gold_sql: str, dsn_or_db_path: str ) -> bool: prediction = self.execute_sql(sql=predicted_sql, dsn_or_db_path=dsn_or_db_path) gold = self.execute_sql(sql=gold_sql, dsn_or_db_path=dsn_or_db_path) if prediction["error"]: return { "result": 0, "error": prediction["error"], } is_match = set(prediction["result"]) == set(gold["result"]) return { "result": int(is_match), "error": None if is_match else "Table mismatch", } def clean_abnormal(self, input: list[float]) -> list[float]: input_array = np.asarray(input) mean = np.mean(input_array) std = np.std(input_array) return [x for x in input_array if mean - 3 * std < x < mean + 3 * std] def iterated_execution( self, predicted_sql: str, gold_sql: str, dsn_or_db_path: str, num_iterations: int, ) -> dict: is_match = self.match_sqls( predicted_sql=predicted_sql, gold_sql=gold_sql, dsn_or_db_path=dsn_or_db_path, ) if is_match["result"] == 1: diff_list = [ self.execute_sql(sql=gold_sql, dsn_or_db_path=dsn_or_db_path)[ "execution_time" ] / self.execute_sql(sql=gold_sql, dsn_or_db_path=dsn_or_db_path)[ "execution_time" ] for _ in range(num_iterations) ] processed_diff_list = self.clean_abnormal(diff_list) return { "result": sum(processed_diff_list) / len(processed_diff_list), "error": None, } return {"result": 0, "error": is_match["error"]} ================================================ FILE: premsql/executors/from_langchain.py ================================================ import time from typing import Union from langchain_community.utilities.sql_database import SQLDatabase from premsql.executors.base import BaseExecutor from premsql.utils import convert_sqlite_path_to_dsn class ExecutorUsingLangChain(BaseExecutor): def execute_sql(self, sql: str, dsn_or_db_path: Union[str, SQLDatabase]) -> dict: if isinstance(dsn_or_db_path, str): if dsn_or_db_path.endswith("sqlite"): dsn_or_db_path = convert_sqlite_path_to_dsn(path=dsn_or_db_path) db = SQLDatabase.from_uri(dsn_or_db_path) else: db = dsn_or_db_path start_time = time.time() response = db.run_no_throw(sql) end_time = time.time() error = response if response.startswith("Error") else None return { "result": None if error else response, "error": error, "execution_time": end_time - start_time, } ================================================ FILE: premsql/executors/from_sqlite.py ================================================ import sqlite3 import time from contextlib import contextmanager from typing import Any, Dict, Generator from premsql.executors.base import BaseExecutor from premsql.logger import setup_console_logger class OptimizedSQLiteExecutor(BaseExecutor): def __init__(self, timeout: float = 1000.0) -> None: self.timeout = timeout self.logger = setup_console_logger(name="[OPTIMIZED-SQLite-EXEC]") @contextmanager def get_connection(self, db_path: str) -> Generator[sqlite3.Connection, None, None]: if db_path.startswith("sqlite:///"): db_path = db_path.split("sqlite:///")[1] conn = sqlite3.connect(db_path, timeout=self.timeout) conn.execute("PRAGMA journal_mode = WAL") conn.execute("PRAGMA synchronous = NORMAL") conn.execute("PRAGMA cache_size = -64000") # 64MB cache conn.execute("PRAGMA temp_store = MEMORY") conn.row_factory = sqlite3.Row try: yield conn finally: conn.close() def execute_sql(self, sql: str, dsn_or_db_path: str) -> Dict[str, Any]: start_time = time.time() try: with self.get_connection(dsn_or_db_path) as conn: cursor = conn.cursor() cursor.execute("EXPLAIN QUERY PLAN " + sql) query_plan = cursor.fetchall() if any("SCAN TABLE" in str(row) for row in query_plan): self.logger.warn("Warning: Full table scan detected. Consider adding an index.") cursor.execute(sql) result = [dict(row) for row in cursor.fetchall()] error = None except sqlite3.Error as e: result = None error = str(e) finally: end_time = time.time() return { "result": result, "error": error, "execution_time": end_time - start_time, } def match_sqls(self, predicted_sql: str, gold_sql: str, dsn_or_db_path: str) -> Dict[str, Any]: with self.get_connection(dsn_or_db_path) as conn: prediction = self.execute_sql(predicted_sql, dsn_or_db_path) gold = self.execute_sql(gold_sql, dsn_or_db_path) if prediction["error"]: return {"result": 0, "error": prediction["error"]} is_match = set(map(tuple, prediction["result"])) == set(map(tuple, gold["result"])) return { "result": int(is_match), "error": None if is_match else "Table mismatch", } def iterated_execution(self, predicted_sql: str, gold_sql: str, dsn_or_db_path: str, num_iterations: int) -> Dict[str, Any]: is_match = self.match_sqls(predicted_sql, gold_sql, dsn_or_db_path) if is_match["result"] == 1: with self.get_connection(dsn_or_db_path) as conn: diff_list = [] for _ in range(num_iterations): gold_time = self.execute_sql(gold_sql, dsn_or_db_path)["execution_time"] predicted_time = self.execute_sql(predicted_sql, dsn_or_db_path)["execution_time"] diff_list.append(predicted_time / gold_time if gold_time > 0 else float('inf')) processed_diff_list = self.clean_abnormal(diff_list) return { "result": sum(processed_diff_list) / len(processed_diff_list) if processed_diff_list else 0, "error": None, } return {"result": 0, "error": is_match["error"]} class SQLiteExecutor(BaseExecutor): def execute_sql(self, sql: str, dsn_or_db_path: str) -> dict: if dsn_or_db_path.startswith("sqlite:///"): dsn_or_db_path = dsn_or_db_path.split("sqlite:///")[1] conn = sqlite3.connect(dsn_or_db_path) cursor = conn.cursor() start_time = time.time() try: cursor.execute(sql) result = cursor.fetchall() error = None except Exception as e: result = None error = str(e) end_time = time.time() cursor.close() conn.close() result = { "result": result, "error": error, "execution_time": end_time - start_time, } return result ================================================ FILE: premsql/generators/__init__.py ================================================ from premsql.generators.huggingface import Text2SQLGeneratorHF from premsql.generators.openai import Text2SQLGeneratorOpenAI from premsql.generators.premai import Text2SQLGeneratorPremAI from premsql.generators.mlx import Text2SQLGeneratorMLX from premsql.generators.ollama_model import Text2SQLGeneratorOllama __all__ = ["Text2SQLGeneratorHF", "Text2SQLGeneratorPremAI", "Text2SQLGeneratorOpenAI", "Text2SQLGeneratorMLX", "Text2SQLGeneratorOllama"] ================================================ FILE: premsql/generators/base.py ================================================ import json import re from abc import ABC, abstractmethod from pathlib import Path from typing import Optional import sqlparse from tqdm.auto import tqdm from platformdirs import user_cache_dir from premsql.evaluator.base import BaseExecutor from premsql.logger import setup_console_logger from premsql.prompts import ERROR_HANDLING_PROMPT logger = setup_console_logger(name="[GENERATOR]") class Text2SQLGeneratorBase(ABC): def __init__( self, experiment_name: str, type: str, experiment_folder: Optional[str] = None ): self.experiment_folder = ( Path(experiment_folder) if experiment_folder is not None else Path(user_cache_dir()) / "premsql" / "experiments" ) self.experiment_path = Path(self.experiment_folder) / type / experiment_name if not self.experiment_path.exists(): self.experiment_path.mkdir(parents=True, exist_ok=True) logger.info(f"Created new experiment folder: {self.experiment_path}") else: logger.info(f"Experiment folder found in: {self.experiment_path}") self.client = self.load_client self.tokenizer = self.load_tokenizer @property @abstractmethod def load_client(self): return NotImplementedError @property @abstractmethod def load_tokenizer(self): return NotImplementedError @property @abstractmethod def model_name_or_path(self): pass @abstractmethod def generate( self, data_blob: dict, temperature: Optional[float] = 0.0, max_new_tokens: Optional[int] = 256, postprocess: Optional[bool] = True, **kwargs, ) -> str: raise NotImplementedError def execution_guided_decoding( self, data_blob: dict, executor: BaseExecutor, temperature: Optional[float] = 0.0, max_new_tokens: Optional[int] = 256, max_retries: Optional[int] = 5, postprocess: Optional[bool] = True, **kwargs, ): error_already_found = False for _ in range(max_retries): sql = self.generate( data_blob=data_blob, temperature=temperature, max_new_tokens=max_new_tokens, postprocess=postprocess, **kwargs, ) error = executor.execute_sql(sql=sql, dsn_or_db_path=data_blob["db_path"])[ "error" ] if not error: return sql if not error_already_found: prompt = data_blob["prompt"].split("# SQL:")[0].strip() error_prompt = ERROR_HANDLING_PROMPT.format( existing_prompt=prompt, sql=sql, error_msg=error ) data_blob["prompt"] = error_prompt error_already_found = True return sql def postprocess(self, output_string: str): sql_start_keywords = [ r"\bSELECT\b", r"\bINSERT\b", r"\bUPDATE\b", r"\bDELETE\b", r"\bWITH\b", ] sql_start_pattern = re.compile("|".join(sql_start_keywords), re.IGNORECASE) match = sql_start_pattern.search(output_string) if match: start_pos = match.start() sql_statement = output_string[start_pos:] else: sql_statement = output_string return sqlparse.format(sql_statement.split("# SQL:")[-1].strip()) def load_results_from_folder(self): item_names = [item.name for item in self.experiment_path.iterdir()] if self.experiment_path.exists() and "predict.json" in item_names: return json.load(open(self.experiment_path / "predict.json", "r")) return None def generate_and_save_results( self, dataset: list[dict], temperature: Optional[float] = 0.0, max_new_tokens: Optional[int] = 256, force: Optional[bool] = False, postprocess: Optional[bool] = False, executor: Optional[BaseExecutor] = None, max_retries: Optional[int] = 5, **kwargs, ) -> dict: existing_response = self.load_results_from_folder() if existing_response is not None and force == False: logger.info("Already results found") return existing_response to_dump = [] for content in tqdm(dataset, total=len(dataset), desc="Generating result ..."): sql = ( self.execution_guided_decoding( data_blob=content, executor=executor, temperature=temperature, postprocess=postprocess, max_new_tokens=max_new_tokens, max_retries=max_retries, **kwargs, ) if executor is not None else self.generate( data_blob=content, temperature=temperature, max_new_tokens=max_new_tokens, postprocess=postprocess, **kwargs, ) ) to_dump.append({**content, "generated": sql}) json.dump(to_dump, open(self.experiment_path / "predict.json", "w"), indent=4) logger.info(f"All responses are written to: {self.experiment_path}") return to_dump ================================================ FILE: premsql/generators/huggingface.py ================================================ import os from typing import Optional, Union from premsql.generators.base import Text2SQLGeneratorBase from premsql.logger import setup_console_logger logger = setup_console_logger(name="[HF-GENERATOR]") try: import torch import transformers except ImportError: logger.warn("Ensure torch and transformers are installed.") logger.warn("Install them by: pip install torch transformers") class Text2SQLGeneratorHF(Text2SQLGeneratorBase): def __init__( self, model_or_name_or_path: Union[str, "transformers.PreTrainedModel"], experiment_name: str, type: str, experiment_folder: Optional[str] = None, hf_token: Optional[str] = None, device: Optional[str] = None, **kwargs ): self.hf_api_key = os.environ.get("HF_TOKEN") or hf_token self._kwargs = kwargs self.device = ( device if device is not None else ("cuda:0" if torch.cuda.is_available() else "cpu") ) self.model_or_name_or_path = model_or_name_or_path super().__init__( experiment_name=experiment_name, experiment_folder=experiment_folder, type=type, ) @property def load_client(self) -> "transformers.PreTrainedModel": if isinstance(self.model_or_name_or_path, str): return transformers.AutoModelForCausalLM.from_pretrained( pretrained_model_name_or_path=self.model_or_name_or_path, token=self.hf_api_key, **{ "device_map": self.device, "torch_dtype": torch.float16, **self._kwargs, } ) return self.model_or_name_or_path @property def load_tokenizer(self) -> "transformers.PreTrainedTokenizer": tokenizer = transformers.AutoTokenizer.from_pretrained( pretrained_model_name_or_path=self.client.config.name_or_path, token=self.hf_api_key, padding_side="right", ) tokenizer.pad_token = tokenizer.eos_token return tokenizer @property def model_name_or_path(self): return self.model_or_name_or_path def generate( self, data_blob: dict, temperature: Optional[float] = 0.0, max_new_tokens: Optional[int] = 256, postprocess: Optional[bool] = True, **kwargs ) -> str: prompt = data_blob["prompt"] input_ids = self.tokenizer.encode( text=prompt, return_tensors="pt", padding="longest", max_length=self.tokenizer.model_max_length, truncation=False, ).to(self.device) do_sample = False if temperature == 0.0 else True generation_config = transformers.GenerationConfig( **{**kwargs, "temperature": temperature, "max_new_tokens": max_new_tokens} ) output_tokens = ( self.client.generate( input_ids=input_ids, do_sample=do_sample, generation_config=generation_config, pad_token_id=self.tokenizer.eos_token_id, ) .detach() .tolist()[0] ) output_tokens = ( output_tokens[len(input_ids[0]) :] if len(output_tokens) > len(input_ids[0]) else output_tokens ) generated = self.tokenizer.decode(output_tokens, skip_special_tokens=True) return self.postprocess(output_string=generated) if postprocess else generated ================================================ FILE: premsql/generators/mlx.py ================================================ import os from typing import Optional from premsql.generators.base import Text2SQLGeneratorBase from premsql.logger import setup_console_logger logger = setup_console_logger(name="[MLX-GENERATOR]") try: from mlx_lm import generate from mlx_lm.tokenizer_utils import load_tokenizer from mlx_lm.utils import get_model_path, load_model except ImportError as e: logger.error("Install mlx using: pip install mlx mlx-lm") class Text2SQLGeneratorMLX(Text2SQLGeneratorBase): def __init__( self, model_name_or_path: str, experiment_name: str, type: str, experiment_folder: Optional[str] = None, hf_token: Optional[str] = None, **kwargs ): self.hf_api_key = os.environ.get("HF_TOKEN") or hf_token self._kwargs = kwargs self.mlx_model_name_or_path = model_name_or_path super().__init__( experiment_name=experiment_name, experiment_folder=experiment_folder, type=type, ) @property def load_client(self): model_path = get_model_path(self.model_name_or_path) model = load_model(model_path, **self._kwargs) return model @property def load_tokenizer(self): model_path = get_model_path(self.model_name_or_path) return load_tokenizer(model_path, **self._kwargs) @property def model_name_or_path(self): return self.mlx_model_name_or_path def generate( self, data_blob: dict, temperature: Optional[float] = 0.0, max_new_tokens: Optional[int] = 256, postprocess: Optional[bool] = True, **kwargs ) -> str: prompt = data_blob["prompt"] temp = temperature generation_args = {"temp": temp, **kwargs} output = generate( model=self.client, tokenizer=self.tokenizer, prompt=prompt, max_tokens=max_new_tokens, **generation_args ) return self.postprocess(output) if postprocess else output ================================================ FILE: premsql/generators/ollama_model.py ================================================ from typing import Optional from premsql.generators.base import Text2SQLGeneratorBase from premsql.logger import setup_console_logger logger = setup_console_logger(name="[OLLAMA-GENERATOR]") try: from ollama import Client except ImportError: logger.warn("Ensure ollama is installed") logger.warn("Install Ollama: curl -fsSL https://ollama.com/install.sh | sh") logger.warn("Install Ollama python: pip install ollama") class Text2SQLGeneratorOllama(Text2SQLGeneratorBase): def __init__( self, model_name: str, experiment_name: str, type: str, experiment_folder: Optional[str]=None, **kwargs ): self._kwargs = kwargs self.model_name = model_name super().__init__( experiment_name=experiment_name, experiment_folder=experiment_folder, type=type ) @property def load_client(self): return Client(host='http://localhost:11434') @property def load_tokenizer(self): pass @property def model_name_or_path(self): return self.model_name def generate( self, data_blob: dict, temperature: Optional[float] = 0.0, max_new_tokens: Optional[int] = 256, postprocess: Optional[bool] = True, **kwargs ) -> str: prompt = data_blob["prompt"] response = self.load_client.chat( model=self.model_name_or_path, messages=[{"role":"user", "content":prompt}], options=dict( temperature=temperature, num_ctx=2048 + max_new_tokens ) )["message"]["content"] return self.postprocess(output_string=response) if postprocess else response ================================================ FILE: premsql/generators/openai.py ================================================ import os from typing import Optional from premsql.generators.base import Text2SQLGeneratorBase try: from openai import OpenAI except ImportError: raise ImportError("Module openai is not installed") class Text2SQLGeneratorOpenAI(Text2SQLGeneratorBase): def __init__( self, model_name: str, experiment_name: str, type: str, experiment_folder: Optional[str] = None, openai_api_key: Optional[str] = None, ): self._api_key = openai_api_key or os.environ.get("OPENAI_API_KEY") self.model_name = model_name super().__init__( experiment_folder=experiment_folder, experiment_name=experiment_name, type=type, ) @property def load_client(self): client = OpenAI(api_key=self._api_key) return client @property def load_tokenizer(self): pass @property def model_name_or_path(self): return self.model_name def generate( self, data_blob: dict, temperature: Optional[float] = 0.0, max_new_tokens: Optional[int] = 256, postprocess: Optional[bool] = True, **kwargs ) -> str: prompt = data_blob["prompt"] max_tokens = max_new_tokens generation_config = { **kwargs, **{"temperature": temperature, "max_tokens": max_tokens}, } completion = ( self.client.chat.completions.create( model=self.model_name, messages=[{"role": "user", "content": prompt}], **generation_config ) .choices[0] .message.content ) return self.postprocess(output_string=completion) if postprocess else completion ================================================ FILE: premsql/generators/premai.py ================================================ import os from typing import Optional from premai import Prem from premsql.generators.base import Text2SQLGeneratorBase from premsql.logger import setup_console_logger logger = setup_console_logger(name="[PREMAI-GENERATOR]") class Text2SQLGeneratorPremAI(Text2SQLGeneratorBase): def __init__( self, model_name: str, project_id: str, experiment_name: str, type: str, experiment_folder: Optional[str] = None, premai_api_key: Optional[str] = None, **kwargs ): self.project_id = project_id self.premai_api_key = premai_api_key or os.environ.get("PREMAI_API_KEY") self._kwargs = kwargs self.model_name = model_name super().__init__( experiment_name=experiment_name, experiment_folder=experiment_folder, type=type, ) @property def load_client(self) -> Prem: return Prem(api_key=self.premai_api_key) @property def load_tokenizer(self) -> None: pass @property def model_name_or_path(self) -> str: return self.model_name def generate( self, data_blob: dict, temperature: Optional[float] = 0.0, max_new_tokens: Optional[int] = 256, postprocess: Optional[bool] = True, **kwargs ) -> str: prompt = data_blob["prompt"] max_tokens = max_new_tokens generation_config = { **kwargs, **{"temperature": temperature, "max_tokens": max_tokens}, } generated = ( self.client.chat.completions.create( project_id=self.project_id, messages=[{"role": "user", "content": prompt}], **generation_config ) .choices[0] .message.content ) return self.postprocess(output_string=generated) if postprocess else generated ================================================ FILE: premsql/logger.py ================================================ import logging def setup_console_logger(name, level=logging.INFO): """Function to setup a console logger.""" formatter = logging.Formatter( "%(asctime)s - %(name)s - %(levelname)s - %(message)s" ) console_handler = logging.StreamHandler() console_handler.setFormatter(formatter) logger = logging.getLogger(name) logger.setLevel(level) logger.addHandler(console_handler) return logger ================================================ FILE: premsql/playground/__init__.py ================================================ from premsql.playground.backend.backend_client import BackendAPIClient from premsql.playground.inference_server.api_client import InferenceServerAPIClient from premsql.playground.inference_server.service import AgentServer __all__ = ["AgentServer", "InferenceServerAPIClient", "BackendAPIClient"] ================================================ FILE: premsql/playground/backend/api/__init__.py ================================================ ================================================ FILE: premsql/playground/backend/api/admin.py ================================================ from django.contrib import admin from .models import Completions, Session admin.site.register(Session) admin.site.register(Completions) ================================================ FILE: premsql/playground/backend/api/apps.py ================================================ from django.apps import AppConfig class ApiConfig(AppConfig): default_auto_field = "django.db.models.BigAutoField" name = "api" ================================================ FILE: premsql/playground/backend/api/migrations/0001_initial.py ================================================ # Generated by Django 5.1.2 on 2024-10-26 09:06 import django.db.models.deletion from django.db import migrations, models class Migration(migrations.Migration): initial = True dependencies = [] operations = [ migrations.CreateModel( name="Session", fields=[ ("session_id", models.AutoField(primary_key=True, serialize=False)), ("db_connection_uri", models.URLField()), ("session_name", models.CharField(max_length=255, unique=True)), ("created_at", models.DateTimeField(auto_now_add=True)), ("base_url", models.URLField()), ("session_db_path", models.CharField(max_length=255)), ], options={ "ordering": ["created_at"], }, ), migrations.CreateModel( name="Completions", fields=[ ("chat_id", models.AutoField(primary_key=True, serialize=False)), ("message_id", models.IntegerField(blank=True, null=True)), ("session_name", models.CharField(max_length=255)), ("created_at", models.DateTimeField()), ("question", models.TextField(blank=True, null=True)), ( "session", models.ForeignKey( on_delete=django.db.models.deletion.CASCADE, related_name="messages", to="api.session", ), ), ], options={ "verbose_name_plural": "Completions", "ordering": ["-created_at"], }, ), ] ================================================ FILE: premsql/playground/backend/api/migrations/__init__.py ================================================ ================================================ FILE: premsql/playground/backend/api/models.py ================================================ from django.db import models class Session(models.Model): session_id = models.AutoField(primary_key=True) db_connection_uri = models.URLField() session_name = models.CharField(max_length=255, unique=True) created_at = models.DateTimeField(auto_now_add=True) base_url = models.URLField() session_db_path = models.CharField(max_length=255) class Meta: ordering = ["created_at"] class Completions(models.Model): chat_id = models.AutoField(primary_key=True) message_id = models.IntegerField(blank=True, null=True) session = models.ForeignKey( Session, on_delete=models.CASCADE, related_name="messages" ) session_name = models.CharField(max_length=255) created_at = models.DateTimeField() question = models.TextField(blank=True, null=True) class Meta: ordering = ["-created_at"] verbose_name_plural = "Completions" ================================================ FILE: premsql/playground/backend/api/pydantic_models.py ================================================ from datetime import datetime from typing import List, Literal, Optional from pydantic import BaseModel, ConfigDict, Field from premsql.agents.models import AgentOutput # All the Session Models class SessionCreationRequest(BaseModel): base_url: str = Field(...) model_config = ConfigDict(extra="forbid") class SessionCreationResponse(BaseModel): status_code: Literal[200, 500] = Field(...) status: Literal["success", "error"] = Field(...) session_id: Optional[int] = None session_name: Optional[str] = None db_connection_uri: str = Field(None) session_db_path: str = Field(None) created_at: Optional[datetime] = None error_message: Optional[str] = None class SessionSummary(BaseModel): session_id: int session_name: str created_at: datetime base_url: str db_connection_uri: str session_db_path: str model_config = ConfigDict(from_attributes=True) class SessionListResponse(BaseModel): status_code: Literal[200, 500] status: Literal["success", "error"] sessions: Optional[List[SessionSummary]] = None total_count: Optional[int] = None page: Optional[int] = None page_size: Optional[int] = None error_message: Optional[str] = None class SessionDeleteResponse(BaseModel): session_name: str status_code: Literal[200, 404, 500] status: Literal["success", "error"] error_message: Optional[str] = None # All the chat message models class CompletionCreationRequest(BaseModel): session_name: str question: str class CompletionCreationResponse(BaseModel): status_code: Literal[200, 500] status: Literal["success", "error"] message_id: Optional[int] = None session_name: Optional[str] = None created_at: Optional[datetime] = None message: Optional[AgentOutput] = None question: Optional[str] = None error_message: Optional[str] = None class CompletionSummary(BaseModel): message_id: int session_name: str base_url: str created_at: datetime question: Optional[str] = None model_config = ConfigDict(from_attributes=True) class CompletionListResponse(BaseModel): status_code: Literal[200, 500] status: Literal["success", "error"] completions: Optional[List[CompletionSummary]] = None total_count: Optional[int] = None error_message: Optional[str] = None ================================================ FILE: premsql/playground/backend/api/serializers.py ================================================ from rest_framework import serializers class AgentOutputSerializer(serializers.Serializer): session_name = serializers.CharField() question = serializers.CharField() db_connection_uri = serializers.CharField() route_taken = serializers.ChoiceField( choices=["plot", "analyse", "query", "followup"] ) input_dataframe = serializers.DictField(allow_null=True) output_dataframe = serializers.DictField(allow_null=True) sql_string = serializers.CharField(allow_null=True) analysis = serializers.CharField(allow_null=True) reasoning = serializers.CharField(allow_null=True) plot_config = serializers.DictField(allow_null=True) image_to_plot = serializers.CharField(allow_null=True) followup_route = serializers.ChoiceField( choices=["plot", "analyse", "query", "followup"], allow_null=True ) followup_suggestion = serializers.CharField(allow_null=True) error_from_pipeline = serializers.CharField(allow_null=True) # Sessions class SessionCreationRequestSerializer(serializers.Serializer): base_url = serializers.CharField() class SessionCreationResponseSerializer(serializers.Serializer): status_code = serializers.ChoiceField(choices=[200, 500]) status = serializers.ChoiceField(choices=["success", "error"]) session_id = serializers.IntegerField(allow_null=True) session_name = serializers.CharField(allow_null=True) db_connection_uri = serializers.CharField(allow_null=True) session_db_path = serializers.CharField(allow_null=True) created_at = serializers.DateTimeField(allow_null=True) error_message = serializers.CharField(allow_null=True) class SessionSummarySerializer(serializers.Serializer): session_id = serializers.IntegerField() session_name = serializers.CharField(max_length=255) created_at = serializers.DateTimeField() base_url = serializers.CharField() db_connection_uri = serializers.CharField() session_db_path = serializers.CharField() class SessionListResponseSerializer(serializers.Serializer): status_code = serializers.ChoiceField(choices=[200, 500]) status = serializers.ChoiceField(choices=["success", "error"]) sessions = SessionSummarySerializer(many=True, allow_null=True) total_count = serializers.IntegerField(allow_null=True) page = serializers.IntegerField(allow_null=True) page_size = serializers.IntegerField(allow_null=True) error_message = serializers.CharField(allow_null=True) class SessionDeletionResponse(serializers.Serializer): session_name = serializers.CharField(max_length=255) status_code = serializers.ChoiceField(choices=[200, 404, 500]) status = serializers.ChoiceField(choices=["success", "error"]) error_message = serializers.CharField(allow_null=True) # Chats (Completions) class CompletionCreationRequestSerializer(serializers.Serializer): session_name = serializers.CharField() question = serializers.CharField() class CompletionCreationResponseSerializer(serializers.Serializer): status_code = serializers.ChoiceField(choices=[200, 500]) status = serializers.ChoiceField(choices=["success", "error"]) message_id = serializers.IntegerField(allow_null=True) session_name = serializers.CharField(allow_null=True) message = message = AgentOutputSerializer(allow_null=True) created_at = serializers.DateTimeField(allow_null=True) question = serializers.CharField(allow_null=True) error_message = serializers.CharField(allow_null=True) class CompletionSummarySerializer(serializers.Serializer): message_id = serializers.IntegerField() session_name = serializers.CharField() base_url = serializers.CharField() created_at = serializers.DateTimeField() question = serializers.CharField(allow_null=True) class CompletionListResponseSerializer(serializers.Serializer): status_code = serializers.ChoiceField(choices=[200, 500]) status = serializers.ChoiceField(choices=["success", "error"]) completions = CompletionSummarySerializer(many=True, allow_null=True) total_count = serializers.IntegerField(allow_null=True) error_message = serializers.CharField(allow_null=True) # Utility function for creating model serializers def create_model_serializer(model_class): class ModelSerializer(serializers.ModelSerializer): class Meta: model = model_class fields = "__all__" return ModelSerializer ================================================ FILE: premsql/playground/backend/api/services.py ================================================ import subprocess from typing import Optional import requests from api.models import Completions, Session from api.pydantic_models import ( CompletionCreationRequest, CompletionCreationResponse, CompletionListResponse, CompletionSummary, SessionCreationRequest, SessionCreationResponse, SessionDeleteResponse, SessionListResponse, SessionSummary, ) from django.core.exceptions import ObjectDoesNotExist from django.core.paginator import Paginator from django.db import transaction from premsql.logger import setup_console_logger from premsql.agents.base import AgentOutput from premsql.agents.memory import AgentInteractionMemory from premsql.playground import InferenceServerAPIClient from premsql.playground.backend.api.utils import stop_server_on_port logger = setup_console_logger("[SESSION-MANAGER]") # TODO: # When delete a session, then it should delete the memory of the session # TODO: # when fetching the history it should just give out the message_id in the current django db # and then using that we can iteratively request the history to give history chats one by one class SessionManageService: def __init__(self) -> None: self.client = InferenceServerAPIClient() def create_session( self, request: SessionCreationRequest ) -> SessionCreationResponse: response = self.client.get_session_info(base_url=request.base_url) if response.get("status") == 500: return SessionCreationResponse( status_code=500, status="error", error_message="Can not start session, internal server error. Try Again!", ) try: session = Session.objects.create( session_name=response["session_name"], db_connection_uri=response["db_connection_uri"], created_at=response["created_at"], base_url=response["base_url"], session_db_path=response["session_db_path"], ) logger.info(f"Successfully created session: {response['session_name']}") return SessionCreationResponse( status_code=200, status="success", session_id=session.session_id, session_name=session.session_name, db_connection_uri=response["db_connection_uri"], session_db_path=response["session_db_path"], created_at=session.created_at, error_message=None, ) except Exception as e: return SessionCreationResponse( status_code=500, status="error", error_message=f"Can not start session. {e}", ) def get_session(self, session_name: str) -> Optional[Session]: try: return Session.objects.get(session_name=session_name) except ObjectDoesNotExist: return None def list_session(self, page: int, page_size: int = 20) -> SessionListResponse: try: sessions = Session.objects.all().order_by("-created_at") paginator = Paginator(sessions, page_size) page_obj = paginator.get_page(page) session_summaries = [ SessionSummary( session_id=session.session_id, session_name=session.session_name, created_at=session.created_at, base_url=session.base_url, db_connection_uri=session.db_connection_uri, session_db_path=session.session_db_path, ) for session in page_obj ] return SessionListResponse( status="success", status_code=200, sessions=session_summaries, total_count=len(session_summaries), page=page, page_size=page_size, ) except Exception as e: return SessionListResponse( status="error", status_code=500, session_summaries=None, total_count=0, page=page, page_size=page_size, error_message=f"Error listing sessions: {e}", ) def delete_session(self, session_name: str): try: with transaction.atomic(): session = Session.objects.get(session_name=session_name) try: running_port = int(session.base_url.split(":")[1]) stop_server_on_port(port=running_port) except Exception as e: logger.info( "process killing failed, please shut down inference server manually" ) pass # Proceed with deletion Completions.objects.filter(session_name=session_name).delete() session.delete() logger.info("Deleted all the chats") agent_memory = AgentInteractionMemory( session_name=session_name, db_path=session.session_db_path ) logger.info("Deleted the session registered inside PremSQL Agent") agent_memory.delete_table() return SessionDeleteResponse( session_name=session_name, status_code=200, status="success", error_message=None, ) except Session.DoesNotExist: return SessionDeleteResponse( session_name=session_name, status_code=404, status="error", error_message="Session does not exist", ) except Exception as e: return SessionDeleteResponse( session_name=session_name, status_code=500, status="error", error_message=f"Session does not exist: {e}", ) class CompletionService: def __init__(self) -> None: self.client = InferenceServerAPIClient() def completion( self, request: CompletionCreationRequest ) -> CompletionCreationResponse: try: session = Session.objects.get(session_name=request.session_name) except ObjectDoesNotExist: return CompletionCreationResponse( status_code=404, status="error", session_name=request.session_name, error_message=f"Session '{request.session_name}' not found", ) try: # Small Hack ;_) base_url = session.base_url base_url = f"http://{base_url}" session_inference_response = self.client.post_completion( base_url=base_url, question=request.question ) except Exception as e: logger.error(f"Unexpected error during completion: {str(e)}") return CompletionCreationResponse( status_code=500, status="error", session_name=session.session_name, error_message="An unexpected error occurred", ) try: chat = Completions.objects.create( session=session, session_name=session.session_name, question=request.question, message_id=session_inference_response.get("message_id"), created_at=session_inference_response.get("message").get("created_at"), ) logger.info( f"Chat completion created successfully for session: {session.session_name}" ) agent_output = AgentOutput(**session_inference_response.get("message")) return CompletionCreationResponse( status_code=200, status="success", message_id=chat.message_id, session_name=session.session_name, created_at=chat.created_at, question=chat.question, message=agent_output, ) except Exception as e: logger.error(f"Error saving completion: {str(e)}") return CompletionCreationResponse( status_code=500, status="error", session_name=session.session_name, error_message=f"Completion successful, but failed to save: {e}", ) def chat_history( self, session_name: str, page: int, page_size: int = 20 ) -> CompletionListResponse: try: session = Session.objects.get(session_name=session_name) except ObjectDoesNotExist: return CompletionListResponse( status="error", status_code=404, completions=[], total_count=0, page=page, page_size=page_size, error_message=f"Session '{session_name}' not found", ) try: completions = Completions.objects.filter(session=session).order_by( "created_at" ) paginator = Paginator(completions, page_size) page_obj = paginator.get_page(page) completion_summaries = [ CompletionSummary( message_id=completion.message_id, session_name=completion.session_name, base_url=completion.session.base_url, created_at=completion.created_at, question=completion.question, ) for completion in page_obj ] return CompletionListResponse( status="success", status_code=200, completions=completion_summaries, total_count=completions.count(), page=page, page_size=page_size, ) except Exception as e: return CompletionListResponse( status="error", status_code=500, completions=[], total_count=0, page=page, page_size=page_size, error_message=f"Error fetching chat history: {str(e)}", ) ================================================ FILE: premsql/playground/backend/api/tests.py ================================================ from django.test import TestCase # Create your tests here. ================================================ FILE: premsql/playground/backend/api/urls.py ================================================ from django.urls import path from . import views urlpatterns = [ path("session/list/", views.list_sessions, name="list_sessions"), path("session/create", views.create_session, name="create_session"), path("session//", views.get_session, name="get_session"), path("session/", views.delete_session, name="delete_session"), # Chat urls path("chat/completion", views.create_completion, name="completion"), path( "chat/history//", views.get_chat_history, name="chat_history" ), ] ================================================ FILE: premsql/playground/backend/api/utils.py ================================================ import logging import os import signal import subprocess from premsql.logger import setup_console_logger logger = setup_console_logger("[BACKEND-UTILS]") def stop_server_on_port(port: int): try: result = subprocess.run( ["lsof", "-ti", f":{port}"], capture_output=True, text=True ) if result.returncode == 0: pid = int(result.stdout.strip()) os.kill(pid, signal.SIGTERM) logger.info(f"Server running on port {port} (PID {pid}) stopped.") else: logger.info(f"No server found running on port {port}") except subprocess.CalledProcessError: logger.info(f"No server found running on port {port}") except ProcessLookupError: logger.info(f"Process on port {port} no longer exists") ================================================ FILE: premsql/playground/backend/api/views.py ================================================ import json from drf_yasg import openapi from drf_yasg.utils import swagger_auto_schema from rest_framework import status from rest_framework.decorators import api_view from rest_framework.exceptions import ValidationError from rest_framework.response import Response from premsql.logger import setup_console_logger from premsql.playground.backend.api.pydantic_models import ( CompletionCreationRequest, SessionCreationRequest, SessionListResponse, SessionSummary, ) from premsql.playground.backend.api.serializers import ( CompletionCreationRequestSerializer, CompletionCreationResponseSerializer, CompletionListResponseSerializer, SessionCreationRequestSerializer, SessionCreationResponseSerializer, SessionListResponseSerializer, SessionSummarySerializer, ) from .services import CompletionService, SessionManageService logger = setup_console_logger("[VIEWS]") @swagger_auto_schema( method="post", request_body=SessionCreationRequestSerializer, responses={ 200: SessionCreationResponseSerializer, 400: "Bad Request", 500: SessionCreationResponseSerializer, }, ) @api_view(["POST"]) def create_session(request): try: session_request = SessionCreationRequest(**request.data) response = SessionManageService().create_session(request=session_request) return Response(response.model_dump()) except json.JSONDecodeError: return Response( {"status": "error", "error_message": "Invalid JSON"}, status=status.HTTP_400_BAD_REQUEST, ) except Exception as e: return Response( {"status": "error", "error_message": str(e)}, status=status.HTTP_500_INTERNAL_SERVER_ERROR, ) @swagger_auto_schema( method="get", manual_parameters=[ openapi.Parameter( "session_name", openapi.IN_PATH, description="Name of the session", type=openapi.TYPE_STRING, ), ], responses={ 200: SessionSummarySerializer, 400: "Bad Request", 500: SessionSummarySerializer, }, ) @api_view(["GET"]) def get_session(request, session_name): session = SessionManageService().get_session(session_name=session_name) if session: session_summary = SessionSummary.model_validate(session) response = SessionListResponse( status="success", status_code=200, sessions=[session_summary.model_dump()], total_count=1, page=1, page_size=1, ) else: response = SessionListResponse( status="error", status_code=500, error_message="The requested session does not exist.", ) return Response( response.model_dump(), status=status.HTTP_200_OK if session else status.HTTP_404_NOT_FOUND, ) @swagger_auto_schema( method="get", manual_parameters=[ openapi.Parameter( "page", openapi.IN_QUERY, description="Page number", type=openapi.TYPE_INTEGER, default=1, ), openapi.Parameter( "page_size", openapi.IN_QUERY, description="Number of items per page", type=openapi.TYPE_INTEGER, default=20, ), ], responses={ 200: SessionListResponseSerializer, 400: "Bad Request", 500: SessionListResponseSerializer, }, ) @api_view(["GET"]) def list_sessions(request): page = int(request.query_params.get("page", 1)) page_size = int(request.query_params.get("page_size", 20)) response = SessionManageService().list_session(page=page, page_size=page_size) return Response(response.model_dump()) @swagger_auto_schema( method="delete", manual_parameters=[ openapi.Parameter( "session_name", openapi.IN_PATH, description="Name of the session to delete", type=openapi.TYPE_STRING, required=True, ), ], responses={ 200: openapi.Response( "Session deleted successfully", schema=openapi.Schema( type=openapi.TYPE_OBJECT, properties={ "status": openapi.Schema( type=openapi.TYPE_STRING, example="success" ), "message": openapi.Schema(type=openapi.TYPE_STRING), }, ), ), 404: "Not Found", 500: "Internal Server Error", }, ) @api_view(["DELETE"]) def delete_session(request, session_name): try: result = SessionManageService().delete_session(session_name=session_name) return Response(result.model_dump(), status=result.status_code) except Exception as e: return Response( {"status": "error", "error_message": str(e)}, status=status.HTTP_500_INTERNAL_SERVER_ERROR, ) # Completion Views @swagger_auto_schema( method="post", request_body=CompletionCreationRequestSerializer, responses={ 200: CompletionCreationResponseSerializer, 400: "Bad Request", 404: "Not Found", 500: "Internal Server Error", }, ) @api_view(["POST"]) def create_completion(request): try: completion_request = CompletionCreationRequest(**request.data) response = CompletionService().completion(request=completion_request) return Response( response.model_dump(), status=( status.HTTP_200_OK if response.status == "success" else status.HTTP_500_INTERNAL_SERVER_ERROR ), ) except ValidationError as e: return Response( {"status": "error", "error_message": str(e)}, status=status.HTTP_400_BAD_REQUEST, ) except Exception as e: return Response( {"status": "error", "error_message": str(e)}, status=status.HTTP_500_INTERNAL_SERVER_ERROR, ) @swagger_auto_schema( method="get", manual_parameters=[ openapi.Parameter( "session_name", openapi.IN_PATH, description="Name of the session", type=openapi.TYPE_STRING, required=True, ), openapi.Parameter( "page", openapi.IN_QUERY, description="Page number", type=openapi.TYPE_INTEGER, default=1, ), openapi.Parameter( "page_size", openapi.IN_QUERY, description="Number of items per page", type=openapi.TYPE_INTEGER, default=20, ), ], responses={ 200: CompletionListResponseSerializer, 400: "Bad Request", 404: "Not Found", 500: "Internal Server Error", }, ) @api_view(["GET"]) def get_chat_history(request, session_name): try: page = int(request.query_params.get("page", 1)) page_size = int(request.query_params.get("page_size", 20)) response = CompletionService().chat_history( session_name=session_name, page=page, page_size=page_size ) return Response( response.model_dump(), status=( status.HTTP_200_OK if response.status == "success" else status.HTTP_404_NOT_FOUND ), ) except ValueError: return Response( {"status": "error", "error_message": "Invalid page or page_size parameter"}, status=status.HTTP_400_BAD_REQUEST, ) except Exception as e: return Response( {"status": "error", "error_message": str(e)}, status=status.HTTP_500_INTERNAL_SERVER_ERROR, ) ================================================ FILE: premsql/playground/backend/backend/__init__.py ================================================ ================================================ FILE: premsql/playground/backend/backend/asgi.py ================================================ """ ASGI config for backend project. It exposes the ASGI callable as a module-level variable named ``application``. For more information on this file, see https://docs.djangoproject.com/en/5.1/howto/deployment/asgi/ """ import os from django.core.asgi import get_asgi_application os.environ.setdefault("DJANGO_SETTINGS_MODULE", "backend.settings") application = get_asgi_application() ================================================ FILE: premsql/playground/backend/backend/settings.py ================================================ """ Django settings for backend project. Generated by 'django-admin startproject' using Django 5.1.2. For more information on this file, see https://docs.djangoproject.com/en/5.1/topics/settings/ For the full list of settings and their values, see https://docs.djangoproject.com/en/5.1/ref/settings/ """ from pathlib import Path # Build paths inside the project like this: BASE_DIR / 'subdir'. BASE_DIR = Path(__file__).resolve().parent.parent # Quick-start development settings - unsuitable for production # See https://docs.djangoproject.com/en/5.1/howto/deployment/checklist/ # SECURITY WARNING: keep the secret key used in production secret! SECRET_KEY = "django-insecure-v3#txach78pic91j!s=ia3w+h@58niky5ozim)j0+6r56m$pmj" # SECURITY WARNING: don't run with debug turned on in production! DEBUG = True ALLOWED_HOSTS = [] # Application definition INSTALLED_APPS = [ "django.contrib.admin", "django.contrib.auth", "django.contrib.contenttypes", "django.contrib.sessions", "django.contrib.messages", "django.contrib.staticfiles", "rest_framework", "drf_yasg", "api", ] MIDDLEWARE = [ "django.middleware.security.SecurityMiddleware", "django.contrib.sessions.middleware.SessionMiddleware", "django.middleware.common.CommonMiddleware", "django.middleware.csrf.CsrfViewMiddleware", "django.contrib.auth.middleware.AuthenticationMiddleware", "django.contrib.messages.middleware.MessageMiddleware", "django.middleware.clickjacking.XFrameOptionsMiddleware", ] ROOT_URLCONF = "backend.urls" TEMPLATES = [ { "BACKEND": "django.template.backends.django.DjangoTemplates", "DIRS": [], "APP_DIRS": True, "OPTIONS": { "context_processors": [ "django.template.context_processors.debug", "django.template.context_processors.request", "django.contrib.auth.context_processors.auth", "django.contrib.messages.context_processors.messages", ], }, }, ] WSGI_APPLICATION = "backend.wsgi.application" # Database # https://docs.djangoproject.com/en/5.1/ref/settings/#databases DATABASES = { "default": { "ENGINE": "django.db.backends.sqlite3", "NAME": BASE_DIR / "db.sqlite3", } } # Password validation # https://docs.djangoproject.com/en/5.1/ref/settings/#auth-password-validators AUTH_PASSWORD_VALIDATORS = [ { "NAME": "django.contrib.auth.password_validation.UserAttributeSimilarityValidator", }, { "NAME": "django.contrib.auth.password_validation.MinimumLengthValidator", }, { "NAME": "django.contrib.auth.password_validation.CommonPasswordValidator", }, { "NAME": "django.contrib.auth.password_validation.NumericPasswordValidator", }, ] # Internationalization # https://docs.djangoproject.com/en/5.1/topics/i18n/ LANGUAGE_CODE = "en-us" TIME_ZONE = "UTC" USE_I18N = True USE_TZ = True # Static files (CSS, JavaScript, Images) # https://docs.djangoproject.com/en/5.1/howto/static-files/ STATIC_URL = "static/" # Default primary key field type # https://docs.djangoproject.com/en/5.1/ref/settings/#default-auto-field DEFAULT_AUTO_FIELD = "django.db.models.BigAutoField" ================================================ FILE: premsql/playground/backend/backend/urls.py ================================================ from django.contrib import admin from django.urls import include, path from drf_yasg import openapi from drf_yasg.views import get_schema_view from rest_framework import permissions schema_view = get_schema_view( openapi.Info( title="PremSQL API", default_version="v0.0.1", description="API which controls PremSQL pipelines and agents", contact=openapi.Contact(email="anindyadeep@premai.io"), license=openapi.License(name="MIT"), ), public=True, permission_classes=(permissions.AllowAny,), ) urlpatterns = [ path("admin/", admin.site.urls), path("api/", include("api.urls")), path( "swagger/", schema_view.without_ui(cache_timeout=0), name="schema-json" ), path( "swagger/", schema_view.with_ui("swagger", cache_timeout=0), name="schema-swagger-ui", ), path("redoc/", schema_view.with_ui("redoc", cache_timeout=0), name="schema-redoc"), ] ================================================ FILE: premsql/playground/backend/backend/wsgi.py ================================================ """ WSGI config for backend project. It exposes the WSGI callable as a module-level variable named ``application``. For more information on this file, see https://docs.djangoproject.com/en/5.1/howto/deployment/wsgi/ """ import os from django.core.wsgi import get_wsgi_application os.environ.setdefault("DJANGO_SETTINGS_MODULE", "backend.settings") application = get_wsgi_application() ================================================ FILE: premsql/playground/backend/backend_client.py ================================================ import requests from premsql.logger import setup_console_logger from premsql.playground.backend.api.pydantic_models import ( SessionCreationResponse, SessionDeleteResponse, SessionListResponse, SessionCreationRequest, CompletionCreationRequest, CompletionCreationResponse, CompletionListResponse, ) BASE_URL = "http://127.0.0.1:8000/api" logger = setup_console_logger("BACKEND-API-CLIENT") class BackendAPIClient: def __init__(self): self.base_url = BASE_URL self.headers = { 'accept': 'application/json', 'Content-Type': 'application/json', } def create_session(self, request: SessionCreationRequest) -> SessionCreationResponse: try: response = requests.post( f"{self.base_url}/session/create", json=request.model_dump(), headers=self.headers ) response.raise_for_status() # Raises an HTTPError for bad responses return SessionCreationResponse(**response.json()) except requests.RequestException as e: logger.error(f"Error creating session: {str(e)}") logger.error(f"Response content: {response.text if 'response' in locals() else 'No response'}") return SessionCreationResponse( status="error", status_code=response.status_code if 'response' in locals() and hasattr(response, 'status_code') else 500, error_message=f"Failed to create session: {str(e)}" ) except ValueError as e: logger.error(f"Error parsing response: {str(e)}") logger.error(f"Response content: {response.text if 'response' in locals() else 'No response'}") return SessionCreationResponse( status="error", status_code=500, error_message=f"Failed to parse server response: {str(e)}" ) except requests.RequestException as e: logger.error(f"Error creating session: {str(e)}") logger.error(f"Response content: {response.text}") return SessionCreationResponse( status="error", status_code=response.status_code if hasattr(response, 'status_code') else 500, error_message=f"Failed to create session: {str(e)}" ) except ValueError as e: logger.error(f"Error parsing response: {str(e)}") logger.error(f"Response content: {response.text}") return SessionCreationResponse( status="error", status_code=500, error_message=f"Failed to parse server response: {str(e)}" ) def list_sessions(self, page: int = 1, page_size: int = 20) -> SessionListResponse: try: response = requests.get( f"{self.base_url}/session/list/", params={"page": page, "page_size": page_size}, headers=self.headers ) response.raise_for_status() return SessionListResponse(**response.json()) except requests.RequestException as e: logger.error(f"Error listing sessions: {str(e)}") logger.error(f"Response content: {response.text if 'response' in locals() else 'No response'}") return SessionListResponse( status="error", status_code=response.status_code if 'response' in locals() and hasattr(response, 'status_code') else 500, error_message=f"Failed to list sessions: {str(e)}", sessions=[], total_count=0 ) except ValueError as e: logger.error(f"Error parsing response: {str(e)}") logger.error(f"Response content: {response.text if 'response' in locals() else 'No response'}") return SessionListResponse( status="error", status_code=500, error_message=f"Failed to parse server response: {str(e)}", sessions=[], total_count=0 ) def get_session(self, session_name: str) -> SessionListResponse: try: response = requests.get( f"{self.base_url}/session/{session_name}/", headers=self.headers ) response.raise_for_status() return SessionListResponse(**response.json()) except requests.RequestException as e: logger.error(f"Error getting session: {str(e)}") logger.error(f"Response content: {response.text if 'response' in locals() else 'No response'}") return SessionListResponse( status="error", status_code=response.status_code if 'response' in locals() and hasattr(response, 'status_code') else 500, error_message=f"Failed to get session: {str(e)}", name="", created_at="", sessions=[] ) except (ValueError, KeyError, IndexError) as e: logger.error(f"Error parsing response: {str(e)}") logger.error(f"Response content: {response.text if 'response' in locals() else 'No response'}") return SessionListResponse( status="error", status_code=500, error_message=f"Failed to parse server response: {str(e)}", name="", created_at="", sessions=[] ) def delete_session(self, session_name: str) -> SessionDeleteResponse: try: response = requests.delete( f"{self.base_url}/session/{session_name}", headers=self.headers ) response.raise_for_status() return SessionDeleteResponse(**response.json()) except requests.RequestException as e: logger.error(f"Error deleting session: {str(e)}") logger.error(f"Response content: {response.text if 'response' in locals() else 'No response'}") return SessionDeleteResponse( status="error", status_code=response.status_code if 'response' in locals() and hasattr(response, 'status_code') else 500, error_message=f"Failed to delete session: {str(e)}" ) except ValueError as e: logger.error(f"Error parsing response: {str(e)}") logger.error(f"Response content: {response.text if 'response' in locals() else 'No response'}") return SessionDeleteResponse( status="error", status_code=500, error_message=f"Failed to parse server response: {str(e)}" ) # Chats def create_completion(self, request: CompletionCreationRequest) -> CompletionCreationResponse: try: response = requests.post( f"{self.base_url}/chat/completion", json=request.model_dump(), headers=self.headers ) response.raise_for_status() return CompletionCreationResponse(**response.json()) except requests.RequestException as e: logger.error(f"Error creating completion: {str(e)}") logger.error(f"Response content: {response.text if 'response' in locals() else 'No response'}") return CompletionCreationResponse( status="error", status_code=response.status_code if 'response' in locals() and hasattr(response, 'status_code') else 500, error_message=f"Failed to create completion: {str(e)}", completion="" ) except ValueError as e: logger.error(f"Error parsing response: {str(e)}") logger.error(f"Response content: {response.text if 'response' in locals() else 'No response'}") return CompletionCreationResponse( status="error", status_code=500, error_message=f"Failed to parse server response: {str(e)}", completion="" ) def get_chat_history(self, session_name: str, page: int = 1, page_size: int = 20) -> CompletionListResponse: try: response = requests.get( f"{self.base_url}/chat/history/{session_name}/", params={"page": page, "page_size": page_size}, headers=self.headers ) response.raise_for_status() return CompletionListResponse(**response.json()) except requests.RequestException as e: logger.error(f"Error getting chat history: {str(e)}") logger.error(f"Response content: {response.text if 'response' in locals() else 'No response'}") return CompletionListResponse( status="error", status_code=500, error_message=f"Failed to get chat history: {str(e)}", completions=[], total_count=0 ) except ValueError as e: logger.error(f"Error parsing response: {str(e)}") logger.error(f"Response content: {response.text if 'response' in locals() else 'No response'}") return CompletionListResponse( status="error", status_code=500, error_message=f"Failed to parse server response: {str(e)}", completions=[], total_count=0 ) ================================================ FILE: premsql/playground/backend/manage.py ================================================ #!/usr/bin/env python """Django's command-line utility for administrative tasks.""" import os import sys def main(): """Run administrative tasks.""" os.environ.setdefault("DJANGO_SETTINGS_MODULE", "backend.settings") try: from django.core.management import execute_from_command_line import django # Patch Django's CommandParser before executing command from django.core.management.base import CommandParser original_init = CommandParser.__init__ def new_init(self, **kwargs): kwargs.pop('allow_abbrev', None) # Remove allow_abbrev if present original_init(self, **kwargs) CommandParser.__init__ = new_init except ImportError as exc: raise ImportError( "Couldn't import Django. Are you sure it's installed and " "available on your PYTHONPATH environment variable? Did you " "forget to activate a virtual environment?" ) from exc execute_from_command_line(sys.argv) if __name__ == "__main__": main() ================================================ FILE: premsql/playground/frontend/components/chat.py ================================================ import pandas as pd import streamlit as st from premsql.playground.backend.backend_client import BackendAPIClient from premsql.playground.inference_server.api_client import InferenceServerAPIClient from premsql.playground.backend.api.pydantic_models import CompletionCreationRequest from premsql.playground.frontend.components.streamlit_plot import StreamlitPlotTool from premsql.agents.memory import AgentInteractionMemory from premsql.agents.utils import convert_exit_output_to_agent_output from premsql.agents.models import ExitWorkerOutput, AgentOutput from premsql.logger import setup_console_logger logger = setup_console_logger("FRONTEND-CHAT") class ChatComponent: def __init__(self) -> None: self.backend_client = BackendAPIClient() self.inference_client = InferenceServerAPIClient() self.plotter = StreamlitPlotTool() def _streamlit_chat_output(self, message: AgentOutput | ExitWorkerOutput): if isinstance(message, ExitWorkerOutput): message = convert_exit_output_to_agent_output(exit_output=message) if message.output_dataframe: try: df = message.output_dataframe df = pd.DataFrame(df["data"], columns=df["columns"]) if message.plot_config is None: st.dataframe(df) except Exception as e: st.error(f"Error: {e}") if message.analysis: st.markdown(message.analysis) if message.plot_config: df = message.input_dataframe if df: self.plotter.run( data=pd.DataFrame(df["data"], columns=df["columns"]), plot_config=message.plot_config ) if message.followup_suggestion: st.warning(message.followup_suggestion) with st.expander(label="Reasoning"): if message.sql_string: st.code(message.sql_string) if message.reasoning: st.markdown(message.reasoning) if message.plot_config: st.json(message.plot_config) if message.error_from_pipeline: st.error(message.error_from_pipeline) def render_chat_env(self, session_name: str) -> None: session_info = self.backend_client.get_session( session_name=session_name ) if session_info.status_code == 500: st.error(f"Failed to render chat History for session: {session_name}") session = session_info.sessions[0] session_db_path = session.session_db_path base_url = session.base_url # TODO: Need to understand how can I start the session history = AgentInteractionMemory( session_name=session_name, db_path=session_db_path ) messages = history.generate_messages_from_session(session_name=session_name, server_mode=True) if not messages: st.warning("No chat history available for this session.") else: for message in messages: with st.chat_message("user"): st.markdown(message.question) with st.chat_message("assistant"): self._streamlit_chat_output(message=message) base_url = f"http://{base_url}" if not base_url.startswith("http://") else base_url is_session_online_status = self.inference_client.is_online(base_url=base_url) if is_session_online_status != 200: st.divider() st.warning(f"Session ended. Restart Agent Server to start the session at: {base_url}") else: if prompt := st.chat_input("What is your question?"): with st.chat_message("user"): st.markdown(prompt) with st.chat_message("assistant"): with st.spinner("Thinking..."): response = self.backend_client.create_completion( CompletionCreationRequest( session_name=session_name, question=prompt ) ) if response.status_code == 200: self._streamlit_chat_output( message=history.get_by_message_id(message_id=response.message_id) ) else: st.error("Something went wrong. Try again") ================================================ FILE: premsql/playground/frontend/components/session.py ================================================ import streamlit as st from premsql.playground.backend.backend_client import BackendAPIClient from premsql.playground.backend.api.pydantic_models import SessionCreationRequest additional_link_markdown = """ Here are some quick links to get you started with Prem: - Head over to [Prem App](https://app.premai.io/projects/) to start building on Gen AI. - [Prem AI documentation](https://docs.premai.io/get-started/why-prem) - [PremSQL documentation](https://docs.premai.io/premsql/introduction) """ class SessionComponent: def __init__(self) -> None: self.backend_client = BackendAPIClient() def render_list_sessions(self): with st.sidebar: st.sidebar.title("Your Past Sessions") all_sessions = self.backend_client.list_sessions(page_size=100).sessions if all_sessions: all_sessions = [session.session_name for session in all_sessions] selected_session = st.selectbox( label="Your Sessions (refresh if you have created a new one)", options=all_sessions, ) return selected_session def render_register_session(self): with st.sidebar: st.sidebar.title("Register new Session") with st.form( key="session_creation", clear_on_submit=True, border=100, enter_to_submit=False, ): base_url = st.text_input( label="base_url", placeholder="the base url in which AgentServer is running" ) button = st.form_submit_button(label="Submit") if button: response = self.backend_client.create_session( request=SessionCreationRequest(base_url=base_url) ) if response.status_code == 500: st.toast(body=st.markdown(response.error_message), icon="❌") else: st.toast(body=f"Session: {response.session_name} created successfully", icon="🥳") return response def render_additional_links(self): with st.sidebar: with st.container(height=200): st.markdown(additional_link_markdown) def render_delete_session_view(self): with st.sidebar: with st.expander(label="Delete a session"): with st.form(key="delete_session", clear_on_submit=True, enter_to_submit=False): session_name = st.text_input(label="Enter session name") button = st.form_submit_button(label="Submit") if button: all_sessions = self.backend_client.list_sessions(page_size=100).sessions all_sessions = [session.session_name for session in all_sessions] if session_name not in all_sessions: st.error("Session does not exist") else: self.backend_client.delete_session(session_name=session_name) st.success(f"Deleted session: {session_name}. Please refresh") ================================================ FILE: premsql/playground/frontend/components/streamlit_plot.py ================================================ import traceback from typing import Dict, Any import pandas as pd import streamlit as st from premsql.logger import setup_console_logger from premsql.agents.tools.plot.base import BasePlotTool logger = setup_console_logger("[STREAMLIT-TOOL]") class StreamlitPlotTool(BasePlotTool): def __init__(self): self.plot_functions = { "area": self._area_plot, "bar": self._bar_plot, "scatter": self._scatter_plot, "histogram": self._histogram_plot, "line": self._line_plot, } def run(self, data: pd.DataFrame, plot_config: Dict[str, str]) -> Any: try: self._validate_config(data, plot_config) plot_type = plot_config["plot_type"] x = plot_config["x"] y = plot_config["y"] st.markdown(f"**{plot_type.capitalize()} Plot: {x} vs {y}**") return self.plot_functions[plot_type](data, x, y) except Exception as e: error_msg = f"Error creating plot: {str(e)}" stack_trace = traceback.format_exc() logger.error(f"{error_msg}\n{stack_trace}") logger.error(f"Error creating plot: {str(e)}") st.error(f"Error creating plot: {str(e)}") return None def _validate_config(self, df: pd.DataFrame, plot_config: Dict[str, str]) -> None: required_keys = ["plot_type", "x", "y"] missing_keys = [key for key in required_keys if key not in plot_config] if missing_keys: raise ValueError(f"Missing required keys in plot_config: {', '.join(missing_keys)}") for key in ["x", "y"]: if key not in plot_config: raise ValueError(f"'{key}' is missing from plot_config") if not isinstance(plot_config[key], str): raise TypeError(f"plot_config['{key}'] should be a string, but got {type(plot_config[key])}") if not isinstance(df, pd.DataFrame): raise TypeError(f"Expected df to be a pandas DataFrame, but got {type(df)}") if not hasattr(df, 'columns'): raise AttributeError(f"df does not have a 'columns' attribute. Type: {type(df)}") if plot_config["x"] not in df.columns: raise ValueError(f"Column '{plot_config['x']}' not found in DataFrame. Available columns: {', '.join(df.columns)}") if plot_config["y"] not in df.columns: raise ValueError(f"Column '{plot_config['y']}' not found in DataFrame. Available columns: {', '.join(df.columns)}") if plot_config["plot_type"] not in self.plot_functions: raise ValueError(f"Unsupported plot type: {plot_config['plot_type']}. Supported types: {', '.join(self.plot_functions.keys())}") def _area_plot(self, df: pd.DataFrame, x: str, y: str) -> Any: chart_data = df[[x, y]].set_index(x) return st.area_chart(chart_data) def _bar_plot(self, df: pd.DataFrame, x: str, y: str) -> Any: chart_data = df[[x, y]].set_index(x) return st.bar_chart(chart_data) def _scatter_plot(self, df: pd.DataFrame, x: str, y: str) -> Any: chart_data = df[[x, y]] return st.scatter_chart(chart_data, x=x, y=y) def _histogram_plot(self, df: pd.DataFrame, x: str, y: str) -> Any: # Streamlit doesn't have a built-in histogram function, so we'll use a bar chart hist_data = df[x].value_counts().sort_index() chart_data = pd.DataFrame({x: hist_data.index, 'count': hist_data.values}) return st.bar_chart(chart_data.set_index(x)) def _line_plot(self, df: pd.DataFrame, x: str, y: str) -> Any: chart_data = df[[x, y]].set_index(x) return st.line_chart(chart_data) def convert_plot_to_image(self, fig): pass ================================================ FILE: premsql/playground/frontend/components/uploader.py ================================================ import random import streamlit as st from typing import Tuple, Optional from pathlib import Path from premsql.playground.frontend.utils import ( download_from_kaggle, migrate_from_csv_to_sqlite, _is_valid_kaggle_id, migrate_local_csvs_to_sqlite ) COMMON = """ db_connection_uri = "sqlite:///{db_path}" baseline = BaseLineAgent( session_name="{session_name}", # An unique session name must be put db_connection_uri=db_connection_uri, # DB which needs to connect for Text to SQL specialized_model1=text2sql_model, # This referes to the Text to SQL model specialized_model2=analyser_plotter_model, # This refers to any model other than Text to SQL executor=ExecutorUsingLangChain(), # Which DB executor to use auto_filter_tables=False, # Whether to filter tables before Text to SQL plot_tool=SimpleMatplotlibTool() # Matplotlib Tool which will be used by plotter worker ) agent_server = AgentServer(agent=baseline, port={port}) agent_server.launch() """ STARTER_CODE_FILE_MLX = """ from premsql.playground import AgentServer from premsql.agents import BaseLineAgent from premsql.generators import Text2SQLGeneratorMLX from premsql.executors import ExecutorUsingLangChain from premsql.agents.tools import SimpleMatplotlibTool text2sql_model = Text2SQLGeneratorMLX( model_name_or_path="premai-io/prem-1B-SQL", experiment_name="text2sql_model", type="test" ) analyser_plotter_model = Text2SQLGeneratorMLX( model_name_or_path="meta-llama/Llama-3.2-1B-Instruct", experiment_name="analyser_model", type="test", ) """ STARTER_CODE_FILE_OLLAMA = """ from premsql.playground import AgentServer from premsql.agents import BaseLineAgent from premsql.generators import Text2SQLGeneratorOllama from premsql.agents.tools import SimpleMatplotlibTool from premsql.executors import ExecutorUsingLangChain text2sql_model = Text2SQLGeneratorOllama( model_name="anindya/prem1b-sql-ollama-fp116", experiment_name="ollama", type="test" ) analyser_plotter_model = Text2SQLGeneratorOllama( model_name="llama3.2:1b", experiment_name="ollama", type="test" ) """ STARTER_CODE_FILE_HF = """ from premsql.playground import AgentServer from premsql.agents import BaseLineAgent from premsql.generators import Text2SQLGeneratorHF from premsql.executors import ExecutorUsingLangChain from premsql.agents.tools import SimpleMatplotlibTool text2sql_model = Text2SQLGeneratorHF( model_name_or_path="premai-io/prem-1B-SQL", experiment_name="text2sql_model", type="test" ) analyser_plotter_model = Text2SQLGeneratorHF( model_name_or_path="meta-llama/Llama-3.2-1B-Instruct", experiment_name="analyser_model", type="test", ) """ STARTER_CODE_FILE_PREMAI = """ import os from dotenv import load_dotenv from premsql.playground import AgentServer from premsql.agents import BaseLineAgent from premsql.generators import Text2SQLGeneratorPremAI from premsql.executors import ExecutorUsingLangChain from premsql.agents.tools import SimpleMatplotlibTool load_dotenv() text2sql_model = Text2SQLGeneratorPremAI( model_name="gpt-4o", experiment_name="text2sql_model", type="test", premai_api_key=os.environ.get("PREMAI_API_KEY"), project_id=os.environ.get("PREMAI_PROJECT_ID") ) analyser_plotter_model = Text2SQLGeneratorPremAI( model_name="gpt-4o", experiment_name="analyser_plotter_model", type="test", premai_api_key=os.environ.get("PREMAI_API_KEY"), project_id=os.environ.get("PREMAI_PROJECT_ID") ) """ STARTER_CODE_FILE_OPENAI = """ import os from dotenv import load_dotenv from premsql.playground import AgentServer from premsql.agents import BaseLineAgent from premsql.generators import Text2SQLGeneratorOpenAI from premsql.executors import ExecutorUsingLangChain from premsql.agents.tools import SimpleMatplotlibTool load_dotenv() text2sql_model = Text2SQLGeneratorOpenAI( model_name="gpt-4o", experiment_name="text2sql_model", type="test", openai_api_key=os.environ.get("OPENAI_API_KEY") ) analyser_plotter_model = Text2SQLGeneratorOpenAI( model_name="gpt-4o", experiment_name="analyser_and_plotter_model", type="test", openai_api_key=os.environ.get("OPENAI_API_KEY") ) """ def render_starter_code(session_name, db_path): with st.expander(label="Start Locally with MLX", expanded=True): st.warning("Ensure you have mlx installed and using inside mac device.") st.info("Python PyPI: pip install mlx mlx-lm") code = (STARTER_CODE_FILE_MLX + COMMON).format( session_name=session_name, db_path=db_path, port=random.choice(range(7000, 9000)) ) st.code(code, language="python") with st.expander(label="Start Locally with HuggingFace"): st.warning("Ensure you have torch, transformers installed inside your device.") st.info("Python PyPI: pip install torch transformers") code = (STARTER_CODE_FILE_HF + COMMON).format( session_name=session_name, db_path=db_path, port=random.choice(range(7000, 9000)) ) st.code(code, language="python") with st.expander(label="Start Locally with Ollama"): st.warning("Ensure you have ollama installed inside your device.") st.info("Python PyPI: pip install ollama") st.info("Install Ollama: curl -fsSL https://ollama.com/install.sh | sh") code = (STARTER_CODE_FILE_OLLAMA + COMMON).format( session_name=session_name, db_path=db_path, port=random.choice(range(7000, 9000)) ) st.code(code, language="python") with st.expander(label="Start with PremAI"): code = (STARTER_CODE_FILE_PREMAI + COMMON).format( session_name=session_name, db_path=db_path, port=random.choice(range(7000, 9000)) ) st.code(code, language="python") with st.expander(label="Start with OpenAI"): code = (STARTER_CODE_FILE_OPENAI + COMMON).format( session_name=session_name, db_path=db_path, port=random.choice(range(7000, 9000)) ) st.code(code, language="python") class UploadComponent: @staticmethod def render_kaggle_view() -> Tuple[Optional[str], Optional[Path]]: session_name = None sqlite_db_path = None with st.sidebar: with st.expander(label="Upload From Kaggle"): with st.form(key="kaggle", clear_on_submit=True): session_name = st.text_input(label="Enter session name") kaggle_id = st.text_input(label="Enter kaggle id") submit = st.form_submit_button(label="Submit") if submit: if not session_name: st.error("Please enter a session name") if not _is_valid_kaggle_id(kaggle_id): st.error("Invalid Kaggle Id") try: with st.spinner(text="Downloading from Kaggle"): path = download_from_kaggle(kaggle_dataset_id=kaggle_id) sqlite_db_path = migrate_from_csv_to_sqlite( folder_containing_csvs=path, session_name=session_name ) st.success("Files downloaded and processed successfully!") except Exception as e: st.error(f"Error processing files: {str(e)}") if session_name and sqlite_db_path: render_starter_code( session_name=session_name, db_path=sqlite_db_path ) @staticmethod def render_csv_upload_view() -> Tuple[Optional[str], Optional[Path]]: with st.sidebar: with st.expander(label="Upload CSV Files"): with st.form(key="csv_upload", clear_on_submit=True): session_name = st.text_input(label="Enter session name") uploaded_files = st.file_uploader( label="Upload CSV files", type="csv", accept_multiple_files=True ) submit = st.form_submit_button(label="Submit") if submit: if not session_name: st.error("Please enter a session name") if not uploaded_files: st.error("Please upload at least one CSV file") try: with st.spinner(text="Processing CSV files"): sqlite_db_path = migrate_local_csvs_to_sqlite( uploaded_files=uploaded_files, session_name=session_name ) st.success("Files uploaded and processed successfully!") except Exception as e: st.error(f"Error processing files: {str(e)}") if session_name and sqlite_db_path: render_starter_code( session_name=session_name, db_path=sqlite_db_path ) ================================================ FILE: premsql/playground/frontend/main.py ================================================ import streamlit as st from premsql.playground.frontend.components.chat import ChatComponent from premsql.playground.frontend.components.session import SessionComponent from premsql.playground.frontend.components.uploader import UploadComponent st.set_page_config(page_title="PremSQL Playground", page_icon="🔍", layout="wide") def render_main_view(): session_component = SessionComponent() selected_session = session_component.render_list_sessions() session_creation = session_component.render_register_session() session_component.render_additional_links() if session_creation is not None: if session_creation.status_code == 200: new_session_name = session_creation.session_name st.success(f"New session created: {new_session_name}") ChatComponent().render_chat_env(session_name=new_session_name) elif selected_session is not None: ChatComponent().render_chat_env(session_name=selected_session) session_component.render_delete_session_view() def main(): _, col2, _ = st.sidebar.columns([1, 2, 1]) with col2: st.image( "https://static.premai.io/logo.svg", use_container_width=True, width=150, clamp=True, ) st.header("PremSQL Playground") st.title("PremSQL Playground") # Add navigation selected_page = st.sidebar.selectbox("Navigation", ["Chat", "Upload csvs or use Kaggle"]) if selected_page == "Chat": st.write("Welcome to the PremSQL Playground. Select or create a session to get started.") render_main_view() else: st.write( "You can either upload multiple csv files or enter a valid Kaggle ID. " "This will migrate all the csvs into a SQLite Database. You can then " "use them for natural language powered analysis using PremSQL." ) UploadComponent.render_kaggle_view() UploadComponent.render_csv_upload_view() if __name__ == "__main__": main() ================================================ FILE: premsql/playground/frontend/utils.py ================================================ import re import os import pandas as pd import kagglehub import sqlite3 from pathlib import Path from platformdirs import user_cache_dir from premsql.logger import setup_console_logger logger = setup_console_logger("[FRONTEND-UTILS]") def _is_valid_kaggle_id(kaggle_id: str) -> bool: pattern = r'^[a-zA-Z0-9_-]+/[a-zA-Z0-9_-]+$' return bool(re.match(pattern, kaggle_id)) def download_from_kaggle(kaggle_dataset_id: str): path = kagglehub.dataset_download(handle=kaggle_dataset_id) return path def _migrate_to_sqlite(csv_folder: Path, sqlite_db_path: Path) -> Path: """Common migration logic for both Kaggle and local CSV uploads.""" conn = sqlite3.connect(sqlite_db_path) try: for csv_file in csv_folder.glob('*.csv'): table_name = csv_file.stem df = pd.read_csv(csv_file) df.to_sql(table_name, conn, if_exists='replace', index=False) logger.info(f"Migrated {csv_file.name} to table '{table_name}'") logger.info(f"Successfully migrated all CSV files to {sqlite_db_path}") return sqlite_db_path except Exception as e: logger.error(f"Error during migration: {e}") raise finally: conn.close() def migrate_from_csv_to_sqlite( folder_containing_csvs: str, session_name: str ) -> Path: sqlite_db_folder = Path(user_cache_dir()) / "premsql" / "kaggle" os.makedirs(sqlite_db_folder, exist_ok=True) sqlite_db_path = sqlite_db_folder / f"{session_name}.sqlite" return _migrate_to_sqlite(Path(folder_containing_csvs), sqlite_db_path) def migrate_local_csvs_to_sqlite( uploaded_files: list, session_name: str ) -> Path: cache_dir = Path(user_cache_dir()) csv_folder = cache_dir / "premsql" / "csv_uploads" / session_name sqlite_db_folder = cache_dir / "premsql" / "csv_uploads" os.makedirs(csv_folder, exist_ok=True) os.makedirs(sqlite_db_folder, exist_ok=True) sqlite_db_path = sqlite_db_folder / f"{session_name}.sqlite" # Save uploaded files to CSV folder for uploaded_file in uploaded_files: file_path = csv_folder / uploaded_file.name with open(file_path, 'wb') as f: f.write(uploaded_file.getvalue()) return _migrate_to_sqlite(csv_folder, sqlite_db_path) ================================================ FILE: premsql/playground/inference_server/api_client.py ================================================ from typing import Any, Dict, Optional from urllib.parse import urljoin import requests class InferenceServerAPIError(Exception): pass class InferenceServerAPIClient: def __init__(self, timeout: int = 600) -> None: self.headers = { "accept": "application/json", "Content-Type": "application/json", } self.timeout = timeout def _make_request( self, base_url: str, method: str, endpoint: str, data: Optional[Dict[str, Any]] = None, ) -> Dict[str, Any]: url = urljoin(base_url.rstrip("/"), endpoint) try: response = requests.request( method, url, headers=self.headers, json=data, timeout=self.timeout ) response.raise_for_status() return response.json() except requests.RequestException as e: raise InferenceServerAPIError(f"API request failed: {str(e)}") def is_online(self, base_url: str) -> bool: endpoint = "/health" try: response = self._make_request(base_url, "GET", endpoint) return response.get("status_code") except Exception as e: return 500 def post_completion(self, base_url: str, question: str) -> Dict[str, Any]: if not question.strip(): raise ValueError("Question cannot be empty") endpoint = "/completion" data = {"question": question} return self._make_request(base_url, "POST", endpoint, data) def get_session_info(self, base_url: str) -> Dict[str, Any]: endpoint = "/session_info" return self._make_request(base_url, "GET", endpoint) def get_chat_history(self, base_url: str, message_id: int) -> Dict[str, Any]: if message_id < 1: raise ValueError("Message ID must be a positive integer") endpoint = f"/chat_history/{message_id}" return self._make_request(base_url, "GET", endpoint) def delete_session(self, base_url: str) -> Dict[str, Any]: endpoint = "/delete_session/" return self._make_request(base_url, "DELETE", endpoint) ================================================ FILE: premsql/playground/inference_server/service.py ================================================ import traceback from contextlib import asynccontextmanager from datetime import datetime from typing import Optional from fastapi import FastAPI, HTTPException from fastapi.middleware.cors import CORSMiddleware from pydantic import BaseModel from premsql.logger import setup_console_logger from premsql.agents.base import AgentBase, AgentOutput logger = setup_console_logger("[FASTAPI-INFERENCE-SERVICE]") class QuestionInput(BaseModel): question: str class SessionInfoResponse(BaseModel): status: int session_name: Optional[str] = None db_connection_uri: Optional[str] = None session_db_path: Optional[str] = None base_url: Optional[str] = None created_at: Optional[datetime] = None class ChatHistoryResponse(BaseModel): message_id: int agent_output: AgentOutput class CompletionResponse(BaseModel): message_id: int message: AgentOutput class AgentServer: def __init__( self, agent: AgentBase, url: Optional[str] = "localhost", port: Optional[int] = 8100, ) -> None: self.agent = agent self.port = port self.url = url self.app = self.create_app() @asynccontextmanager async def lifespan(self, app: FastAPI): # Startup: Log the initialization logger.info("Starting up the application") yield # Shutdown: Clean up resources logger.info("Shutting down the application") if hasattr(self.agent, "cleanup"): await self.agent.cleanup() def create_app(self): app = FastAPI(lifespan=self.lifespan) app.add_middleware( CORSMiddleware, allow_origins=["*"], # Allows all origins allow_credentials=True, allow_methods=["*"], # Allows all methods allow_headers=["*"], # Allows all headers ) @app.post("/completion", response_model=CompletionResponse) async def completion(input_data: QuestionInput): try: result = self.agent(question=input_data.question, server_mode=True) message_id = self.agent.history.get_latest_message_id() return CompletionResponse( message=AgentOutput(**result.model_dump()), message_id=message_id ) except Exception as e: stack_trace = traceback.format_exc() logger.error(stack_trace) logger.error(f"Error processing query: {str(e)}") raise HTTPException( status_code=500, detail=f"Error processing query: {str(e)}" ) # TODO: I need a method which will just get the "latets message_id" @app.get("/chat_history/{message_id}", response_model=ChatHistoryResponse) async def get_chat_history(message_id: int): try: exit_output = self.agent.history.get_by_message_id( message_id=message_id ) if exit_output is None: raise HTTPException( status_code=404, detail=f"Message with ID {message_id} not found", ) agent_output = self.agent.convert_exit_output_to_agent_output( exit_output=exit_output ) return ChatHistoryResponse( message_id=message_id, agent_output=agent_output ) except Exception as e: logger.error(f"Error retrieving chat history: {str(e)}") raise HTTPException( status_code=500, detail=f"Error retrieving chat history: {str(e)}" ) @app.get("/") async def health_check(): return { "status_code": 200, "status": f"healthy, running: {self.agent.session_name}" } @app.get("/health") async def health_check(): return {"status_code": 200, "status": "healthy"} @app.get("/session_info", response_model=SessionInfoResponse) async def get_session_info(): try: session_name = getattr(self.agent, "session_name", None) db_connection_uri = getattr(self.agent, "db_connection_uri", None) session_db_path = getattr(self.agent, "session_db_path", None) if any( attr is None for attr in [session_name, db_connection_uri, session_db_path] ): raise ValueError("One or more required attributes are None") return SessionInfoResponse( status=200, session_name=session_name, db_connection_uri=db_connection_uri, session_db_path=session_db_path, base_url=f"{self.url}:{self.port}", created_at=datetime.now(), ) except Exception as e: logger.error(f"Error getting session info: {str(e)}") return SessionInfoResponse( status=500, session_name=None, db_connection_uri=None, session_db_path=None, base_url=None, created_at=None, ) return app def launch(self): import uvicorn logger.info(f"Starting server on port {self.port}") uvicorn.run(self.app, host=self.url, port=int(self.port)) ================================================ FILE: premsql/prompts.py ================================================ BASE_TEXT2SQL_PROMPT = """ # Follow these instruction: You will be given schemas of tables of a database. Your job is to write correct error free SQL query based on the question asked. Please make sure: 1. Do not add ``` at start / end of the query. It should be a single line query in a single line (string format) 2. Make sure the column names are correct and exists in the table 3. For column names which has a space with it, make sure you have put `` in that column name 4. Think step by step and always check schema and question and the column names before writing the query. # Database and Table Schema: {schemas} {additional_knowledge} # Here are some Examples on how to generate SQL statements and use column names: {few_shot_examples} # Question: {question} # SQL: """ OLD_BASE_TEXT2SQL_PROMPT = """ # Instruction: - You will be given a question and a database schema. - You need to write a SQL query to answer the question. Do not add ``` at start / end of the query. It should be a single line query in a single line (string format). - Make sure the column names are correct and exists in the table - For column names which has a space with it, make sure you have put `` in that column name # Database and Table Schema: {schemas} {additional_knowledge} # Here are some Examples on how to generate SQL statements and use column names: {few_shot_examples} # Question: {question} # SQL: """ ERROR_HANDLING_PROMPT = """ {existing_prompt} # Generated SQL: {sql} ## Error Message {error_msg} Carefully review the original question and error message, then rewrite the SQL query to address the identified issues. Ensure your corrected query uses correct column names, follows proper SQL syntax, and accurately answers the original question without introducing new errors. # SQL: """ ================================================ FILE: premsql/tuner/__init__.py ================================================ from premsql.tuner.callback import Text2SQLEvaluationCallback from premsql.tuner.config import ( DefaultLoraConfig, DefaultPeftArguments, DefaultTrainingArguments, ) from premsql.tuner.full import Text2SQLFullFinetuner from premsql.tuner.peft import Text2SQLPeftTuner __all__ = [ "Text2SQLFullFinetuner", "Text2SQLPeftTuner", "DefaultLoraConfig", "DefaultPeftArguments", "Text2SQLEvaluationCallback", ] ================================================ FILE: premsql/tuner/callback.py ================================================ import os from typing import Optional from premsql.datasets.base import Text2SQLBaseDataset from premsql.evaluator.base import BaseExecutor, Text2SQLEvaluator from premsql.generators.huggingface import Text2SQLGeneratorHF from premsql.logger import setup_console_logger logger = setup_console_logger("[EVALUATION-CALLBACK]") try: from torch.utils.tensorboard import SummaryWriter from transformers import ( Trainer, TrainerCallback, TrainerControl, TrainerState, TrainingArguments, ) except ImportError: logger.warn("Unable to import torch and transformers. Install: pip install torch transformers") class Text2SQLEvaluationCallback(TrainerCallback): def __init__( self, trainer: Trainer, trainer_args: TrainingArguments, eval_dataset: Text2SQLBaseDataset, executor: BaseExecutor, experiment_name: str, model_or_name_or_id: str, eval_steps: int, hf_token: Optional[str] = None, filter_results_by: Optional[tuple] = None, ): self.trainer = trainer self.eval_steps = eval_steps self.experiment_name = experiment_name log_dir = trainer_args.logging_dir os.makedirs(log_dir, exist_ok=True) self.tb_writer = SummaryWriter(log_dir=log_dir) logger.info(f"TensorBoard log directory: {log_dir}") self.model_or_name_or_id = model_or_name_or_id self.hf_token = hf_token self.dataset = eval_dataset self.executor = executor self.filter_by = filter_results_by def on_step_end( self, args: TrainingArguments, state: TrainerState, control: TrainerControl, **kwargs, ): if args.local_rank == 0 and state.global_step % self.eval_steps == 0: logger.info(f"Evaluating at step {state.global_step}") model = Text2SQLGeneratorHF( model_or_name_or_path=self.trainer.model, experiment_name=f"{self.experiment_name}_step_{state.global_step}", type="test", device="cuda:0", ) responses = model.generate_and_save_results( dataset=self.dataset, temperature=0.1, max_new_tokens=256, force=True ) evaluator = Text2SQLEvaluator( executor=self.executor, experiment_path=model.experiment_path ) if self.filter_by: ex_score = evaluator.execute( metric_name="accuracy", model_responses=responses, filter_by=self.filter_by[0], ) else: ex_score = evaluator.execute( metric_name="accuracy", model_responses=responses, ) logger.info(f"Execution Accuracy at step {state.global_step} | {ex_score}") # Log into tensorboard logger.info(f"Logging to TensorBoard: {ex_score}") for difficulty, score in ex_score.items(): logger.info(f"Logging {difficulty}: {score}") self.tb_writer.add_scalar( f"execution_accuracy/{difficulty}", score, state.global_step ) self.tb_writer.flush() # Force writing to disk state.log_history.append( { "step": state.global_step, "execution_accuracy": ( ex_score.get(self.filter_by[1]) if self.filter_by else ex_score.get("overall") ), "selected_difficulty": ( self.filter_by[0] if self.filter_by else "overall" ), } ) return control def on_train_end( self, args: TrainingArguments, state: TrainerState, control: TrainerControl, **kwargs, ): self.tb_writer.close() logger.info("TensorBoard writer closed") ================================================ FILE: premsql/tuner/config.py ================================================ from dataclasses import dataclass, field from typing import List, Optional from premsql.logger import setup_console_logger logger = setup_console_logger("[TUNER-CONFIG]") try: from peft import LoraConfig, TaskType from transformers import TrainingArguments except ImportError: logger.warn("Unable to find peft and transformers. Install: pip install peft transformers") @dataclass class DefaultTrainingArguments(TrainingArguments): output_dir: str num_train_epochs: int per_device_train_batch_size: int gradient_accumulation_steps: int load_best_model_at_end: Optional[bool] = field(default=True) gradient_checkpointing: Optional[bool] = field(default=True) evaluation_strategy: Optional[str] = field(default="no") cache_dir: Optional[str] = field(default=None) optim: str = field(default="adamw_hf") model_max_length: int = field( default=1024, metadata={ "help": "Maximum sequence length. Sequences will be right padded (and possibly truncated)." }, ) max_seq_length: int = field(default=1024) ddp_find_unused_parameters: Optional[bool] = field(default=False) fp16: bool = field(default=False) bf16: bool = field(default=True) weight_decay: float = field(default=0.1) lr_scheduler_type: str = field(default="cosine") warmup_ratio: float = field(default=0.01) logging_steps: int = field(default=10) save_strategy: str = field(default="steps") save_steps: int = field(default=200) save_total_limit: int = field(default=3) auto_find_batch_size: Optional[bool] = field(default=False) report_to: List[str] = field(default_factory=lambda: ["tensorboard"]) @dataclass class DefaultPeftArguments(TrainingArguments): output_dir: str num_train_epochs: int per_device_train_batch_size: int gradient_accumulation_steps: int load_best_model_at_end: Optional[bool] = field(default=False) gradient_checkpointing: Optional[bool] = field(default=True) evaluation_strategy: Optional[str] = field(default="no") optim: str = field(default="adamw_hf") max_grad_norm: Optional[bool] = field(default=0.3) weight_decay: float = field(default=0.1) lr_scheduler_type: str = field(default="cosine") warmup_ratio: float = field(default=0.01) logging_steps: int = field(default=10) save_strategy: str = field(default="steps") save_steps: int = field(default=200) save_total_limit: int = field(default=3) auto_find_batch_size: Optional[bool] = field(default=False) report_to: List[str] = field(default_factory=lambda: ["tensorboard"]) fp16: Optional[bool] = field(default=False) bf16: Optional[bool] = field(default=True) neftune_noise_alpha: Optional[int] = field(default=5) @dataclass class DefaultLoraConfig(LoraConfig): lora_alpha: float = field(default=32) lora_dropout: float = field(default=0.1) r: int = field(default=64) target_modules: List[str] = field( default_factory=lambda: [ "q_proj", "v_proj", "k_proj", "o_proj", "gate_proj", "up_proj", "down_proj", "lm_head", ] ) task_type: TaskType = field(default=TaskType.CAUSAL_LM) ================================================ FILE: premsql/tuner/full.py ================================================ from typing import Optional, Sequence import transformers from premsql.datasets.base import Text2SQLBaseDataset from premsql.datasets.collator import DataCollatorForSupervisedDataset from premsql.evaluator.base import BaseExecutor from premsql.logger import setup_console_logger from premsql.tuner.callback import Text2SQLEvaluationCallback from premsql.tuner.config import DefaultTrainingArguments logger = setup_console_logger("[FULL-FINETUNE]") class Text2SQLFullFinetuner: def __init__( self, model_name_or_path: str, experiment_name: str, hf_token: Optional[str] = None, **model_kwargs, ): self.model_name_or_path = model_name_or_path logger.warning("Setting up Pretrained-Model: " + str(model_name_or_path)) self.model = transformers.AutoModelForCausalLM.from_pretrained( model_name_or_path, token=hf_token, **model_kwargs ) self.tokenizer = transformers.AutoTokenizer.from_pretrained( model_name_or_path, padding_size="right", token=hf_token ) self.data_collator = DataCollatorForSupervisedDataset(tokenizer=self.tokenizer) self._hf_token = hf_token self.experiment_name = experiment_name def train( self, train_datasets: Sequence[Text2SQLBaseDataset], output_dir: str, num_train_epochs: int, per_device_train_batch_size: int, gradient_accumulation_steps: int, evaluation_dataset: Optional[Text2SQLBaseDataset] = None, eval_steps: Optional[int] = 500, executor: Optional[BaseExecutor] = None, filter_eval_results_by: Optional[tuple] = None, **training_arguments, ): self.training_arguments = DefaultTrainingArguments( output_dir=output_dir, num_train_epochs=num_train_epochs, per_device_train_batch_size=per_device_train_batch_size, gradient_accumulation_steps=gradient_accumulation_steps, **training_arguments, ) data_module = dict( train_dataset=train_datasets, eval_dataset=None, data_collator=self.data_collator, ) trainer = transformers.Trainer( model=self.model, tokenizer=self.tokenizer, args=self.training_arguments, **data_module, ) if evaluation_dataset is not None and executor is not None: eval_callback = Text2SQLEvaluationCallback( trainer=trainer, trainer_args=self.training_arguments, eval_dataset=evaluation_dataset, experiment_name=self.experiment_name, model_or_name_or_id=self.model_name_or_path, eval_steps=eval_steps, executor=executor, filter_results_by=filter_eval_results_by, hf_token=self._hf_token, ) trainer.add_callback(eval_callback) trainer.train() trainer.save_model(output_dir=self.training_arguments.output_dir) ================================================ FILE: premsql/tuner/peft.py ================================================ from typing import Optional, Sequence from premsql.datasets.base import Text2SQLBaseDataset from premsql.datasets.collator import DataCollatorForSupervisedDataset from premsql.evaluator.base import BaseExecutor from premsql.logger import setup_console_logger from premsql.tuner.callback import Text2SQLEvaluationCallback from premsql.tuner.config import DefaultLoraConfig, DefaultPeftArguments logger = setup_console_logger("[LORA-FINETUNE]") try: import torch import transformers from peft import LoraConfig from transformers import BitsAndBytesConfig from trl import SFTTrainer except ImportError: logger.warn("Ensure torch transformers peft and trl are installed.") logger.warn("Install them by: pip install torch peft trl transformers") class Text2SQLPeftTuner: def __init__( self, model_name_or_path: str, experiment_name: str, peft_config: Optional[LoraConfig] = None, bnb_config: Optional[BitsAndBytesConfig] = None, hf_token: Optional[str] = None, **model_kwargs, ): self.peft_config = peft_config or DefaultLoraConfig() self.bnb_config = bnb_config self.model_name_or_path = model_name_or_path logger.warning("Setting up Pretrained-Model: " + str(model_name_or_path)) self.model = transformers.AutoModelForCausalLM.from_pretrained( model_name_or_path, token=hf_token, torch_dtype=torch.bfloat16, quantization_config=bnb_config, **model_kwargs, ) self.tokenizer = transformers.AutoTokenizer.from_pretrained( model_name_or_path, padding_size="right", token=hf_token ) self.data_collator = DataCollatorForSupervisedDataset(tokenizer=self.tokenizer) self._hf_token = hf_token self.experiment_name = experiment_name def train( self, train_datasets: Sequence[Text2SQLBaseDataset], output_dir: str, num_train_epochs: int, max_seq_length: int, per_device_train_batch_size: int, gradient_accumulation_steps: int, evaluation_dataset: Optional[Text2SQLBaseDataset] = None, eval_steps: Optional[int] = 500, executor: Optional[BaseExecutor] = None, filter_eval_results_by: Optional[tuple] = None, **training_arguments, ): self.training_arguments = transformers.TrainingArguments( **DefaultPeftArguments( output_dir=output_dir, num_train_epochs=num_train_epochs, per_device_train_batch_size=per_device_train_batch_size, gradient_accumulation_steps=gradient_accumulation_steps, **training_arguments, ).to_dict() ) if "raw" in train_datasets[0]: formatting_func = lambda x: x["raw"]["prompt"] else: formatting_func = lambda x: x["prompt"] trainer = SFTTrainer( model=self.model, train_dataset=train_datasets, peft_config=self.peft_config, tokenizer=self.tokenizer, args=self.training_arguments, packing=True, formatting_func=formatting_func, max_seq_length=max_seq_length, ) if evaluation_dataset is not None and executor is not None: eval_callback = Text2SQLEvaluationCallback( trainer=trainer, trainer_args=self.training_arguments, eval_dataset=evaluation_dataset, experiment_name=self.experiment_name, model_or_name_or_id=self.model_name_or_path, eval_steps=eval_steps, executor=executor, filter_results_by=filter_eval_results_by, hf_token=self._hf_token, ) trainer.add_callback(eval_callback) trainer.train() trainer.save_model(output_dir=self.training_arguments.output_dir) ================================================ FILE: premsql/utils.py ================================================ import json import os import random import re import sqlite3 from collections import defaultdict from pathlib import Path from textwrap import dedent from typing import Optional, Sequence, Union from tqdm.auto import tqdm from premsql.logger import setup_console_logger logger = setup_console_logger(name="[UTILS]") try: from transformers import PreTrainedTokenizer except ImportError: logger.warn("Unable to use transformers. Install using: pip install transformers") def convert_sqlite_path_to_dsn(path: str): sqlite3_pattern = r"^sqlite:\/\/\/.*" if re.match(sqlite3_pattern, path): return path return f"sqlite:///{os.path.abspath(path)}" def convert_sqlite_dsn_to_path(dsn: str) -> str: sqlite3_pattern = r"^sqlite:\/\/\/(.*)" match = re.match(sqlite3_pattern, dsn) if match: return os.path.abspath(match.group(1)) return dsn def print_data(data: dict): if "prompt" in data: prompt = data["prompt"] data["prompt"] = prompt[:100] + "...." + prompt[-100:] elif "prompt" in data["raw"]: prompt = data["raw"]["prompt"] data["raw"]["prompt"] = prompt[:100] + "...." + prompt[-100:] else: raise ValueError("Prompt key not found in data") return data def save_to_json(save_path: Union[str, Path], json_object: dict): try: save_path = Path(save_path) if isinstance(save_path, str) else save_path with open(save_path, "w") as json_file: json.dump(json_object, json_file, indent=4, ensure_ascii=False) logger.info(f"Saved JSON in: {save_path}") except Exception as e: logger.error(f"Unable to save JSON, Error: {e}") def load_from_json(result_json_path: str) -> dict: try: with open(result_json_path, "r") as json_file: return json.load(json_file) except Exception as e: logger.error(f"Unable to load JSON, Error: {e}") def sqlite_schema_prompt(db_path: str) -> str: schemas = {} conn = sqlite3.connect(db_path) cursor = conn.cursor() cursor.execute("SELECT name FROM sqlite_master WHERE type='table';") tables = cursor.fetchall() for table in tables: table_name = table[0] if table_name == "sqlite_sequence": continue cursor.execute( f"SELECT sql FROM sqlite_master WHERE type='table' AND name='{table_name}';" ) create_table_sql = cursor.fetchone() if create_table_sql: schemas[table_name] = create_table_sql[0] else: schemas[table_name] = "Schema does not exist" schema_prompt = "\n".join( schemas[table[0]] for table in tables if table[0] != "sqlite_sequence" ) return schema_prompt def get_random_few_shot_prompts(dataset: list[dict], num_few_shot: int): assert "db_id" in dataset[0], ValueError( "db_id key should be present to use this function" ) grouped_content = defaultdict(list) few_shot_prompts = {} template = dedent( """ Question: {question} SQL: {sql} """ ) for content in dataset: grouped_content[content["db_id"]].append(content) for db_id, contents in grouped_content.items(): num_few_shot = min(num_few_shot, len(contents)) random_sample = random.sample(contents, num_few_shot) few_shot_prompt = "".join( template.format(question=element["question"], sql=element["SQL"]) for element in random_sample ) few_shot_prompts[db_id] = few_shot_prompt return few_shot_prompts def get_accepted_filters(data: list[dict]) -> Sequence[str]: key_num_mapping = {} for key in data[0].keys(): key_num_mapping[key] = len(set([content[key] for content in data])) accepted_keys = [] for key, num in key_num_mapping.items(): if num < len(data) * 0.5 and key != "db_path": accepted_keys.append(key) return accepted_keys def filter_options( data: list[dict], filter_by: tuple, accepted_keys: Optional[Sequence[str]] = None ): filter_key, filter_value = filter_by accepted_keys = ( get_accepted_filters(data=data) if accepted_keys is None else accepted_keys ) assert filter_key in accepted_keys, ValueError( f"Filtering is supported for keys: `{''.join(accepted_keys)}`" ) for key in accepted_keys: if filter_key == key: accepted_values = set([content[key] for content in data]) assert filter_value in accepted_values, ValueError( f"Available values for key: {key} are: {', '.join(accepted_values)}" ) filtered_data = [content for content in data if content[filter_key] == filter_value] return filtered_data def tokenize_fn(strings: Sequence[str], tokenizer: "PreTrainedTokenizer") -> dict: """Tokenizes a list of string""" tokenized_list = [ tokenizer( text=text, return_tensors="pt", padding="longest", max_length=tokenizer.model_max_length, truncation=False, ) for text in tqdm(strings, total=len(strings), desc="Tokenizing") ] input_ids = labels = [tokenized.input_ids[0] for tokenized in tokenized_list] input_ids_lens = label_ids_lens = [ tokenized.input_ids.ne(tokenizer.pad_token_id).sum().item() for tokenized in tokenized_list ] return dict( input_ids=input_ids, labels=labels, input_ids_lens=input_ids_lens, label_ids_lens=label_ids_lens, ) ================================================ FILE: pyproject.toml ================================================ [tool.poetry] name = "premsql" version = "0.2.10" description = "" authors = ["Anindyadeep "] readme = "README.md" [tool.poetry.dependencies] python = "^3.10" datasets = "^2.20.0" einops = "^0.8.0" black = "^24.4.2" fastapi = "^0.112.0" huggingface-hub = "^0.24.5" isort = "^5.13.2" numpy = "^1.26.3" tqdm = "^4.66.4" mysql-connector-python = "^9.0.0" SQLAlchemy = "^2.0.30" sqlparse = "^0.5.1" click = "^8.1.3" langchain-community = "^0.3.3" openai = "^1.52.0" premai = "^0.3.73" django = "^5.1.2" djangorestframework = "^3.15.2" drf-yasg = "^1.21.8" func_timeout = "^4.3.5" matplotlib = "^3.9.2" pillow = ">=8,<11" uvicorn = "^0.32.0" streamlit = "^1.40.0" kagglehub = "^0.3.3" [tool.poetry.extras] mac = ["mlx", "mlx-lm"] [tool.poetry.group.mac] optional = true [tool.poetry.group.mac.dependencies] mlx = "^0.19.1" mlx-lm = "^0.19.2" [tool.poetry.group.linux.dependencies] transformers = "^4.43.3" torch = "^2.4.0" [tool.poetry.group.windows.dependencies] transformers = "^4.43.3" torch = "^2.4.0" [build-system] requires = ["poetry-core"] build-backend = "poetry.core.masonry.api" [tool.poetry.scripts] premsql = "premsql.cli:cli"