Repository: neuml/txtai Branch: master Commit: 7e3c0e16a450 Files: 562 Total size: 30.8 MB Directory structure: gitextract_nm3aplsu/ ├── .coveragerc ├── .github/ │ └── workflows/ │ ├── build.yml │ ├── docs.yml │ └── minimal.yml ├── .gitignore ├── .pre-commit-config.yaml ├── .pylintrc ├── CITATION.cff ├── LICENSE ├── Makefile ├── README.md ├── docker/ │ ├── api/ │ │ └── Dockerfile │ ├── aws/ │ │ ├── Dockerfile │ │ ├── api.py │ │ └── workflow.py │ ├── base/ │ │ └── Dockerfile │ ├── schedule/ │ │ └── Dockerfile │ └── workflow/ │ └── Dockerfile ├── docs/ │ ├── agent/ │ │ ├── configuration.md │ │ ├── index.md │ │ └── methods.md │ ├── api/ │ │ ├── cluster.md │ │ ├── configuration.md │ │ ├── customization.md │ │ ├── index.md │ │ ├── mcp.md │ │ ├── methods.md │ │ ├── openai.md │ │ └── security.md │ ├── cloud.md │ ├── embeddings/ │ │ ├── configuration/ │ │ │ ├── ann.md │ │ │ ├── cloud.md │ │ │ ├── database.md │ │ │ ├── general.md │ │ │ ├── graph.md │ │ │ ├── index.md │ │ │ ├── scoring.md │ │ │ └── vectors.md │ │ ├── format.md │ │ ├── index.md │ │ ├── indexing.md │ │ ├── methods.md │ │ └── query.md │ ├── examples.md │ ├── faq.md │ ├── further.md │ ├── images/ │ │ ├── agent.excalidraw │ │ ├── api.excalidraw │ │ ├── architecture.excalidraw │ │ ├── cloud.excalidraw │ │ ├── embeddings.excalidraw │ │ ├── examples.excalidraw │ │ ├── faq.excalidraw │ │ ├── flows.excalidraw │ │ ├── format.excalidraw │ │ ├── further.excalidraw │ │ ├── indexing.excalidraw │ │ ├── install.excalidraw │ │ ├── llm.excalidraw │ │ ├── models.excalidraw │ │ ├── pipeline.excalidraw │ │ ├── query.excalidraw │ │ ├── rag.excalidraw │ │ ├── schedule.excalidraw │ │ ├── search.excalidraw │ │ ├── task.excalidraw │ │ ├── why.excalidraw │ │ └── workflow.excalidraw │ ├── index.md │ ├── install.md │ ├── models.md │ ├── observability.md │ ├── overrides/ │ │ └── main.html │ ├── pipeline/ │ │ ├── audio/ │ │ │ ├── audiomixer.md │ │ │ ├── audiostream.md │ │ │ ├── microphone.md │ │ │ ├── texttoaudio.md │ │ │ ├── texttospeech.md │ │ │ └── transcription.md │ │ ├── data/ │ │ │ ├── filetohtml.md │ │ │ ├── htmltomd.md │ │ │ ├── segmentation.md │ │ │ ├── tabular.md │ │ │ ├── textractor.md │ │ │ └── tokenizer.md │ │ ├── image/ │ │ │ ├── caption.md │ │ │ ├── imagehash.md │ │ │ └── objects.md │ │ ├── index.md │ │ ├── llm/ │ │ │ ├── llm.md │ │ │ └── rag.md │ │ ├── text/ │ │ │ ├── entity.md │ │ │ ├── labels.md │ │ │ ├── reranker.md │ │ │ ├── similarity.md │ │ │ ├── summary.md │ │ │ └── translation.md │ │ └── train/ │ │ ├── hfonnx.md │ │ ├── mlonnx.md │ │ └── trainer.md │ ├── poweredby.md │ ├── usecases.md │ ├── why.md │ └── workflow/ │ ├── index.md │ ├── schedule.md │ └── task/ │ ├── console.md │ ├── export.md │ ├── file.md │ ├── image.md │ ├── index.md │ ├── retrieve.md │ ├── service.md │ ├── storage.md │ ├── template.md │ ├── url.md │ └── workflow.md ├── examples/ │ ├── 01_Introducing_txtai.ipynb │ ├── 02_Build_an_Embeddings_index_with_Hugging_Face_Datasets.ipynb │ ├── 03_Build_an_Embeddings_index_from_a_data_source.ipynb │ ├── 04_Add_semantic_search_to_Elasticsearch.ipynb │ ├── 05_Extractive_QA_with_txtai.ipynb │ ├── 06_Extractive_QA_with_Elasticsearch.ipynb │ ├── 07_Apply_labels_with_zero_shot_classification.ipynb │ ├── 08_API_Gallery.ipynb │ ├── 09_Building_abstractive_text_summaries.ipynb │ ├── 10_Extract_text_from_documents.ipynb │ ├── 11_Transcribe_audio_to_text.ipynb │ ├── 12_Translate_text_between_languages.ipynb │ ├── 13_Similarity_search_with_images.ipynb │ ├── 14_Run_pipeline_workflows.ipynb │ ├── 15_Distributed_embeddings_cluster.ipynb │ ├── 16_Train_a_text_labeler.ipynb │ ├── 17_Train_without_labels.ipynb │ ├── 18_Export_and_run_models_with_ONNX.ipynb │ ├── 19_Train_a_QA_model.ipynb │ ├── 20_Extractive_QA_to_build_structured_data.ipynb │ ├── 21_Export_and_run_other_machine_learning_models.ipynb │ ├── 22_Transform_tabular_data_with_composable_workflows.ipynb │ ├── 23_Tensor_workflows.ipynb │ ├── 24_Whats_new_in_txtai_4_0.ipynb │ ├── 25_Generate_image_captions_and_detect_objects.ipynb │ ├── 26_Entity_extraction_workflows.ipynb │ ├── 27_Workflow_scheduling.ipynb │ ├── 28_Push_notifications_with_workflows.ipynb │ ├── 29_Anatomy_of_a_txtai_index.ipynb │ ├── 30_Embeddings_SQL_custom_functions.ipynb │ ├── 31_Near_duplicate_image_detection.ipynb │ ├── 32_Model_explainability.ipynb │ ├── 33_Query_translation.ipynb │ ├── 34_Build_a_QA_database.ipynb │ ├── 35_Pictures_are_worth_a_thousand_words.ipynb │ ├── 36_Run_txtai_in_native_code.ipynb │ ├── 37_Embeddings_index_components.ipynb │ ├── 38_Introducing_the_Semantic_Graph.ipynb │ ├── 39_Classic_Topic_Modeling_with_BM25.ipynb │ ├── 40_Text_to_Speech_Generation.ipynb │ ├── 41_Train_a_language_model_from_scratch.ipynb │ ├── 42_Prompt_driven_search_with_LLMs.ipynb │ ├── 43_Embeddings_in_the_Cloud.ipynb │ ├── 44_Prompt_templates_and_task_chains.ipynb │ ├── 45_Customize_your_own_embeddings_database.ipynb │ ├── 46_Whats_new_in_txtai_6_0.ipynb │ ├── 47_Building_an_efficient_sparse_keyword_index_in_Python.ipynb │ ├── 48_Benefits_of_hybrid_search.ipynb │ ├── 49_External_database_integration.ipynb │ ├── 50_All_about_vector_quantization.ipynb │ ├── 51_Custom_API_Endpoints.ipynb │ ├── 52_Build_RAG_pipelines_with_txtai.ipynb │ ├── 53_Integrate_LLM_Frameworks.ipynb │ ├── 54_API_Authorization_and_Authentication.ipynb │ ├── 55_Generate_knowledge_with_Semantic_Graphs_and_RAG.ipynb │ ├── 56_External_vectorization.ipynb │ ├── 57_Build_knowledge_graphs_with_LLM_driven_entity_extraction.ipynb │ ├── 58_Advanced_RAG_with_graph_path_traversal.ipynb │ ├── 59_Whats_new_in_txtai_7_0.ipynb │ ├── 60_Advanced_RAG_with_guided_generation.ipynb │ ├── 61_Integrate_txtai_with_Postgres.ipynb │ ├── 62_RAG_with_llama_cpp_and_external_API_services.ipynb │ ├── 63_How_RAG_with_txtai_works.ipynb │ ├── 64_Embeddings_index_format_for_open_data_access.ipynb │ ├── 65_Speech_to_Speech_RAG.ipynb │ ├── 66_Generative_Audio.ipynb │ ├── 67_Whats_new_in_txtai_8_0.ipynb │ ├── 68_Analyzing_Hugging_Face_Posts_with_Graphs_and_Agents.ipynb │ ├── 69_Granting_autonomy_to_agents.ipynb │ ├── 70_Getting_started_with_LLM_APIs.ipynb │ ├── 71_Analyzing_LinkedIn_Company_Posts_with_Graphs_and_Agents.ipynb │ ├── 72_Parsing_the_stars_with_txtai.ipynb │ ├── 73_Chunking_your_data_for_RAG.ipynb │ ├── 74_OpenAI_Compatible_API.ipynb │ ├── 75_Medical_RAG_Research_with_txtai.ipynb │ ├── 76_Whats_new_in_txtai_9_0.ipynb │ ├── 77_GraphRAG_with_Wikipedia_and_GPT_OSS.ipynb │ ├── 78_Accessing_Low_Level_Vector_APIs.ipynb │ ├── 79_RAG_is_more_than_Vector_Search.ipynb │ ├── 80_Distilling_Knowledge_into_Tiny_LLMs.ipynb │ ├── 81_OpenCode_as_a_txtai_LLM.ipynb │ ├── 82_Agentic_College_Search.ipynb │ ├── 83_TxtAI_got_skills.ipynb │ ├── 84_Agent_Tools.ipynb │ ├── agent_quickstart.py │ ├── article.py │ ├── baseball.py │ ├── benchmarks.py │ ├── books.py │ ├── images.py │ ├── rag_quickstart.py │ ├── similarity.py │ ├── wiki.py │ ├── workflow_quickstart.py │ └── workflows.py ├── mkdocs.yml ├── pyproject.toml ├── setup.py ├── src/ │ └── python/ │ └── txtai/ │ ├── __init__.py │ ├── agent/ │ │ ├── __init__.py │ │ ├── base.py │ │ ├── factory.py │ │ ├── model.py │ │ ├── placeholder.py │ │ └── tool/ │ │ ├── __init__.py │ │ ├── bash.py │ │ ├── edit.py │ │ ├── embeddings.py │ │ ├── factory.py │ │ ├── function.py │ │ ├── glob.py │ │ ├── grep.py │ │ ├── read.py │ │ ├── skill.py │ │ ├── todo.py │ │ └── write.py │ ├── ann/ │ │ ├── __init__.py │ │ ├── base.py │ │ ├── dense/ │ │ │ ├── __init__.py │ │ │ ├── annoy.py │ │ │ ├── factory.py │ │ │ ├── faiss.py │ │ │ ├── ggml.py │ │ │ ├── hnsw.py │ │ │ ├── numpy.py │ │ │ ├── pgvector.py │ │ │ ├── sqlite.py │ │ │ └── torch.py │ │ └── sparse/ │ │ ├── __init__.py │ │ ├── factory.py │ │ ├── ivfsparse.py │ │ └── pgsparse.py │ ├── api/ │ │ ├── __init__.py │ │ ├── application.py │ │ ├── authorization.py │ │ ├── base.py │ │ ├── cluster.py │ │ ├── extension.py │ │ ├── factory.py │ │ ├── responses/ │ │ │ ├── __init__.py │ │ │ ├── factory.py │ │ │ ├── json.py │ │ │ └── messagepack.py │ │ ├── route.py │ │ └── routers/ │ │ ├── __init__.py │ │ ├── agent.py │ │ ├── caption.py │ │ ├── embeddings.py │ │ ├── entity.py │ │ ├── extractor.py │ │ ├── labels.py │ │ ├── llm.py │ │ ├── objects.py │ │ ├── openai.py │ │ ├── rag.py │ │ ├── reranker.py │ │ ├── segmentation.py │ │ ├── similarity.py │ │ ├── summary.py │ │ ├── tabular.py │ │ ├── textractor.py │ │ ├── texttospeech.py │ │ ├── transcription.py │ │ ├── translation.py │ │ ├── upload.py │ │ └── workflow.py │ ├── app/ │ │ ├── __init__.py │ │ └── base.py │ ├── archive/ │ │ ├── __init__.py │ │ ├── base.py │ │ ├── compress.py │ │ ├── factory.py │ │ ├── tar.py │ │ └── zip.py │ ├── cloud/ │ │ ├── __init__.py │ │ ├── base.py │ │ ├── factory.py │ │ ├── hub.py │ │ └── storage.py │ ├── console/ │ │ ├── __init__.py │ │ ├── __main__.py │ │ └── base.py │ ├── data/ │ │ ├── __init__.py │ │ ├── base.py │ │ ├── labels.py │ │ ├── questions.py │ │ ├── sequences.py │ │ ├── texts.py │ │ └── tokens.py │ ├── database/ │ │ ├── __init__.py │ │ ├── base.py │ │ ├── client.py │ │ ├── duckdb.py │ │ ├── embedded.py │ │ ├── encoder/ │ │ │ ├── __init__.py │ │ │ ├── base.py │ │ │ ├── factory.py │ │ │ ├── image.py │ │ │ └── serialize.py │ │ ├── factory.py │ │ ├── rdbms.py │ │ ├── schema/ │ │ │ ├── __init__.py │ │ │ ├── orm.py │ │ │ └── statement.py │ │ ├── sql/ │ │ │ ├── __init__.py │ │ │ ├── aggregate.py │ │ │ ├── base.py │ │ │ ├── expression.py │ │ │ └── token.py │ │ └── sqlite.py │ ├── embeddings/ │ │ ├── __init__.py │ │ ├── base.py │ │ ├── index/ │ │ │ ├── __init__.py │ │ │ ├── action.py │ │ │ ├── autoid.py │ │ │ ├── configuration.py │ │ │ ├── documents.py │ │ │ ├── functions.py │ │ │ ├── indexes.py │ │ │ ├── indexids.py │ │ │ ├── reducer.py │ │ │ ├── stream.py │ │ │ └── transform.py │ │ └── search/ │ │ ├── __init__.py │ │ ├── base.py │ │ ├── errors.py │ │ ├── explain.py │ │ ├── hybrid.py │ │ ├── ids.py │ │ ├── query.py │ │ ├── scan.py │ │ └── terms.py │ ├── graph/ │ │ ├── __init__.py │ │ ├── base.py │ │ ├── factory.py │ │ ├── networkx.py │ │ ├── query.py │ │ ├── rdbms.py │ │ └── topics.py │ ├── models/ │ │ ├── __init__.py │ │ ├── models.py │ │ ├── onnx.py │ │ ├── pooling/ │ │ │ ├── __init__.py │ │ │ ├── base.py │ │ │ ├── cls.py │ │ │ ├── factory.py │ │ │ ├── late.py │ │ │ ├── mean.py │ │ │ └── muvera.py │ │ ├── registry.py │ │ └── tokendetection.py │ ├── pipeline/ │ │ ├── __init__.py │ │ ├── audio/ │ │ │ ├── __init__.py │ │ │ ├── audiomixer.py │ │ │ ├── audiostream.py │ │ │ ├── microphone.py │ │ │ ├── signal.py │ │ │ ├── texttoaudio.py │ │ │ ├── texttospeech.py │ │ │ └── transcription.py │ │ ├── base.py │ │ ├── data/ │ │ │ ├── __init__.py │ │ │ ├── filetohtml.py │ │ │ ├── htmltomd.py │ │ │ ├── segmentation.py │ │ │ ├── tabular.py │ │ │ ├── textractor.py │ │ │ └── tokenizer.py │ │ ├── factory.py │ │ ├── hfmodel.py │ │ ├── hfpipeline.py │ │ ├── image/ │ │ │ ├── __init__.py │ │ │ ├── caption.py │ │ │ ├── imagehash.py │ │ │ └── objects.py │ │ ├── llm/ │ │ │ ├── __init__.py │ │ │ ├── factory.py │ │ │ ├── generation.py │ │ │ ├── huggingface.py │ │ │ ├── litellm.py │ │ │ ├── llama.py │ │ │ ├── llm.py │ │ │ ├── opencode.py │ │ │ └── rag.py │ │ ├── nop.py │ │ ├── tensors.py │ │ ├── text/ │ │ │ ├── __init__.py │ │ │ ├── crossencoder.py │ │ │ ├── entity.py │ │ │ ├── labels.py │ │ │ ├── lateencoder.py │ │ │ ├── questions.py │ │ │ ├── reranker.py │ │ │ ├── similarity.py │ │ │ ├── summary.py │ │ │ └── translation.py │ │ └── train/ │ │ ├── __init__.py │ │ ├── hfonnx.py │ │ ├── hftrainer.py │ │ └── mlonnx.py │ ├── scoring/ │ │ ├── __init__.py │ │ ├── base.py │ │ ├── bm25.py │ │ ├── factory.py │ │ ├── normalize.py │ │ ├── pgtext.py │ │ ├── sif.py │ │ ├── sparse.py │ │ ├── terms.py │ │ └── tfidf.py │ ├── serialize/ │ │ ├── __init__.py │ │ ├── base.py │ │ ├── errors.py │ │ ├── factory.py │ │ ├── messagepack.py │ │ ├── pickle.py │ │ └── serializer.py │ ├── util/ │ │ ├── __init__.py │ │ ├── resolver.py │ │ ├── sparsearray.py │ │ └── template.py │ ├── vectors/ │ │ ├── __init__.py │ │ ├── base.py │ │ ├── dense/ │ │ │ ├── __init__.py │ │ │ ├── external.py │ │ │ ├── factory.py │ │ │ ├── huggingface.py │ │ │ ├── litellm.py │ │ │ ├── llama.py │ │ │ ├── m2v.py │ │ │ ├── sbert.py │ │ │ └── words.py │ │ ├── recovery.py │ │ └── sparse/ │ │ ├── __init__.py │ │ ├── base.py │ │ ├── factory.py │ │ └── sbert.py │ ├── version.py │ └── workflow/ │ ├── __init__.py │ ├── base.py │ ├── execute.py │ ├── factory.py │ └── task/ │ ├── __init__.py │ ├── base.py │ ├── console.py │ ├── export.py │ ├── factory.py │ ├── file.py │ ├── image.py │ ├── retrieve.py │ ├── service.py │ ├── storage.py │ ├── stream.py │ ├── template.py │ ├── url.py │ └── workflow.py └── test/ └── python/ ├── testagent.py ├── testann/ │ ├── __init__.py │ ├── testdense.py │ └── testsparse.py ├── testapi/ │ ├── __init__.py │ ├── testapiagent.py │ ├── testapiembeddings.py │ ├── testapipipeline.py │ ├── testapiworkflow.py │ ├── testauthorization.py │ ├── testcluster.py │ ├── testencoding.py │ ├── testextension.py │ ├── testmcp.py │ └── testopenai.py ├── testapp.py ├── testarchive.py ├── testcloud.py ├── testconsole.py ├── testdatabase/ │ ├── __init__.py │ ├── testclient.py │ ├── testcustom.py │ ├── testdatabase.py │ ├── testduckdb.py │ ├── testencoder.py │ ├── testrdbms.py │ ├── testsql.py │ └── testsqlite.py ├── testembeddings.py ├── testgraph.py ├── testmodels/ │ ├── __init__.py │ ├── testmodels.py │ └── testpooling.py ├── testoptional.py ├── testpipeline/ │ ├── __init__.py │ ├── testaudio/ │ │ ├── __init__.py │ │ ├── testaudiomixer.py │ │ ├── testaudiostream.py │ │ ├── testmicrophone.py │ │ ├── testtexttoaudio.py │ │ ├── testtexttospeech.py │ │ └── testtranscription.py │ ├── testdata/ │ │ ├── __init__.py │ │ ├── testfiletohtml.py │ │ ├── testtabular.py │ │ ├── testtextractor.py │ │ └── testtokenizer.py │ ├── testimage/ │ │ ├── __init__.py │ │ ├── testcaption.py │ │ ├── testimagehash.py │ │ └── testobjects.py │ ├── testllm/ │ │ ├── __init__.py │ │ ├── testgenerator.py │ │ ├── testlitellm.py │ │ ├── testllama.py │ │ ├── testllm.py │ │ ├── testopencode.py │ │ ├── testrag.py │ │ └── testsequences.py │ ├── testtext/ │ │ ├── __init__.py │ │ ├── testentity.py │ │ ├── testlabels.py │ │ ├── testreranker.py │ │ ├── testsimilarity.py │ │ ├── testsummary.py │ │ └── testtranslation.py │ └── testtrain/ │ ├── __init__.py │ ├── testonnx.py │ ├── testquantization.py │ └── testtrainer.py ├── testscoring/ │ ├── __init__.py │ ├── testkeyword.py │ └── testsparse.py ├── testserialize.py ├── testvectors/ │ ├── __init__.py │ ├── testdense/ │ │ ├── __init__.py │ │ ├── testcustom.py │ │ ├── testexternal.py │ │ ├── testhuggingface.py │ │ ├── testlitellm.py │ │ ├── testllama.py │ │ ├── testm2v.py │ │ ├── testsbert.py │ │ ├── testvectors.py │ │ └── testwordvectors.py │ └── testsparse/ │ ├── __init__.py │ ├── testsbert.py │ └── testvectors.py ├── testworkflow.py └── utils.py ================================================ FILE CONTENTS ================================================ ================================================ FILE: .coveragerc ================================================ [run] source = src/python concurrency = multiprocessing,thread disable_warnings = no-data-collected omit = **/__main__.py [combine] disable_warnings = no-data-collected ================================================ FILE: .github/workflows/build.yml ================================================ # GitHub Actions build workflow name: build on: ["push", "pull_request"] jobs: build: runs-on: ${{ matrix.os }} strategy: matrix: os: [ubuntu-latest, macos-latest, windows-latest] timeout-minutes: 60 steps: - name: Checkout code uses: actions/checkout@v6 - name: Install Python uses: actions/setup-python@v6 with: python-version: "3.10" - name: Install Java uses: actions/setup-java@v5 with: distribution: "zulu" java-version: 21 - name: Install dependencies - Linux run: | sudo apt-get update sudo apt-get install libportaudio2 libsndfile1 sudo rm -rf /usr/share/dotnet /usr/local/lib/android /opt/ghc if: matrix.os == 'ubuntu-latest' - name: Install dependencies - macOS run: | echo "PYTORCH_MPS_DISABLE=1" >> $GITHUB_ENV echo "LLAMA_NO_METAL=1" >> $GITHUB_ENV echo "TIKA_STARTUP_SLEEP=30" >> $GITHUB_ENV echo "TIKA_STARTUP_MAX_RETRY=10" >> $GITHUB_ENV brew install portaudio sudo xcode-select -s "/Applications/Xcode_16.app" if: matrix.os == 'macos-latest' - name: Install dependencies - Windows run: | "PYTHONIOENCODING=utf-8" >> $env:GITHUB_ENV choco install wget if: matrix.os == 'windows-latest' - name: Build run: | pip install -U wheel pip install .[all,dev] pip cache purge python -c "import nltk; nltk.download(['punkt', 'punkt_tab', 'averaged_perceptron_tagger_eng'])" python --version make data coverage env: HF_HUB_ETAG_TIMEOUT: 100 HF_HUB_DOWNLOAD_TIMEOUT: 100 HF_XET_CHUNK_CACHE_SIZE_BYTES: 0 - uses: pre-commit/action@v3.0.1 if: matrix.os == 'ubuntu-latest' - name: Test Coverage run: coveralls --service=github if: matrix.os == 'ubuntu-latest' env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} ================================================ FILE: .github/workflows/docs.yml ================================================ name: docs on: push: branches: - master jobs: deploy: runs-on: ubuntu-latest steps: - uses: actions/checkout@v6 - uses: actions/setup-python@v6 with: python-version: "3.10" - run: | pip install -U pip wheel pip install .[all,dev] - run: mkdocs gh-deploy --force ================================================ FILE: .github/workflows/minimal.yml ================================================ # GitHub Actions minimal build name: minimal on: ["push", "pull_request"] jobs: deploy: runs-on: ubuntu-latest steps: - uses: actions/checkout@v6 - uses: actions/setup-python@v6 with: python-version: "3.10" - run: | pip install -U pip wheel pip install . python -c "import txtai" ================================================ FILE: .gitignore ================================================ build/ dist/ docker/**/*.yml htmlcov/ *egg-info/ __pycache__/ .coverage .coverage.* *.pyc .vscode/ ================================================ FILE: .pre-commit-config.yaml ================================================ repos: - repo: https://github.com/pycqa/pylint rev: v3.3.1 hooks: - id: pylint args: - -d import-error - -d duplicate-code - -d too-many-positional-arguments - repo: https://github.com/ambv/black rev: 24.10.0 hooks: - id: black language_version: python3 ================================================ FILE: .pylintrc ================================================ [BASIC] module-rgx=[a-z_][a-zA-Z0-9_]{2,30}$ method-rgx=[a-z_][a-zA-Z0-9_]{2,30}$ function-rgx=[a-z_][a-zA-Z0-9_]{2,30}$ argument-rgx=[a-z_][a-zA-Z0-9_]{0,30}$ variable-rgx=[a-z_][a-zA-Z0-9_]{0,30}$ attr-rgx=[a-z_][a-zA-Z0-9_]{0,30}$ [DESIGN] max-args=10 max-locals=40 max-returns=10 max-attributes=20 min-public-methods=0 [FORMAT] max-line-length=150 ================================================ FILE: CITATION.cff ================================================ cff-version: 1.2.0 date-released: 2020-08-11 message: "If you use this software, please cite it as below." title: "txtai: the all-in-one AI framework" abstract: "txtai is an all-in-one open-source AI framework for semantic search, LLM orchestration and language model workflows" url: "https://github.com/neuml/txtai" authors: - family-names: "Mezzetti" given-names: "David" affiliation: NeuML license: Apache-2.0 ================================================ FILE: LICENSE ================================================ Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 1. Definitions. "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and (b) You must cause any modified files to carry prominent notices stating that You changed the files; and (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS Copyright 2020- NeuML LLC Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ================================================ FILE: Makefile ================================================ # Project utility scripts .PHONY: test # Setup environment export SRC_DIR := ./src/python export TEST_DIR := ./test/python export PYTHONPATH := ${SRC_DIR}:${TEST_DIR}:${PYTHONPATH} export PATH := ${TEST_DIR}:${PATH} export PYTHONWARNINGS := ignore # Disable tokenizer parallelism for tests export TOKENIZERS_PARALLELISM := false # Default python executable if not provided PYTHON ?= python # Check for wget WGET := $(shell wget --version 2> /dev/null) ifndef WGET $(error "Required binary `wget` not found, please install wget OS package") endif # Download test data data: mkdir -p /tmp/txtai wget -N https://github.com/neuml/txtai/releases/download/v6.2.0/tests.tar.gz -P /tmp tar -xvzf /tmp/tests.tar.gz -C /tmp # Unit tests test: ${PYTHON} -m unittest discover -v -s ${TEST_DIR} # Run tests while calculating code coverage coverage: coverage run -m unittest discover -v -k testagent -s ${TEST_DIR} coverage run -m unittest discover -v -k testann -s ${TEST_DIR} coverage run -m unittest discover -v -k testapi -s ${TEST_DIR} coverage run -m unittest discover -v -k testapp -s ${TEST_DIR} coverage run -m unittest discover -v -k testarchive -s ${TEST_DIR} coverage run -m unittest discover -v -k testcloud -s ${TEST_DIR} coverage run -m unittest discover -v -k testconsole -s ${TEST_DIR} coverage run -m unittest discover -v -k testdatabase -s ${TEST_DIR} coverage run -m unittest discover -v -k testembeddings -s ${TEST_DIR} coverage run -m unittest discover -v -k testgraph -s ${TEST_DIR} coverage run -m unittest discover -v -k testmodels -s ${TEST_DIR} coverage run -m unittest discover -v -k testoptional -s ${TEST_DIR} coverage run -m unittest discover -v -k testpipeline.testaudio -s ${TEST_DIR} coverage run -m unittest discover -v -k testpipeline.testdata -s ${TEST_DIR} coverage run -m unittest discover -v -k testpipeline.testimage -s ${TEST_DIR} coverage run -m unittest discover -v -k testpipeline.testllm -s ${TEST_DIR} coverage run -m unittest discover -v -k testpipeline.testtext -s ${TEST_DIR} coverage run -m unittest discover -v -k testpipeline.testtrain -s ${TEST_DIR} coverage run -m unittest discover -v -k testscoring -s ${TEST_DIR} coverage run -m unittest discover -v -k testserialize -s ${TEST_DIR} coverage run -m unittest discover -v -k testvectors -s ${TEST_DIR} coverage run -m unittest discover -v -k testworkflow -s ${TEST_DIR} coverage combine ================================================ FILE: README.md ================================================

All-in-one AI framework

Version GitHub last commit GitHub issues Join Slack Build Status Coverage Status

txtai is an all-in-one AI framework for semantic search, LLM orchestration and language model workflows. ![architecture](https://raw.githubusercontent.com/neuml/txtai/master/docs/images/architecture.png#gh-light-mode-only) ![architecture](https://raw.githubusercontent.com/neuml/txtai/master/docs/images/architecture-dark.png#gh-dark-mode-only) The key component of txtai is an embeddings database, which is a union of vector indexes (sparse and dense), graph networks and relational databases. This foundation enables vector search and/or serves as a powerful knowledge source for large language model (LLM) applications. Build autonomous agents, retrieval augmented generation (RAG) processes, multi-model workflows and more. Summary of txtai features: - 🔎 Vector search with SQL, object storage, topic modeling, graph analysis and multimodal indexing - 📄 Create embeddings for text, documents, audio, images and video - 💡 Pipelines powered by language models that run LLM prompts, question-answering, labeling, transcription, translation, summarization and more - ↪️️ Workflows to join pipelines together and aggregate business logic. txtai processes can be simple microservices or multi-model workflows. - 🤖 Agents that intelligently connect embeddings, pipelines, workflows and other agents together to autonomously solve complex problems - ⚙️ Web and Model Context Protocol (MCP) APIs. Bindings available for [JavaScript](https://github.com/neuml/txtai.js), [Java](https://github.com/neuml/txtai.java), [Rust](https://github.com/neuml/txtai.rs) and [Go](https://github.com/neuml/txtai.go). - 🔋 Batteries included with defaults to get up and running fast - ☁️ Run local or scale out with container orchestration txtai is built with Python 3.10+, [Hugging Face Transformers](https://github.com/huggingface/transformers), [Sentence Transformers](https://github.com/UKPLab/sentence-transformers) and [FastAPI](https://github.com/tiangolo/fastapi). txtai is open-source under an Apache 2.0 license. > [!NOTE] > > [NeuML](https://neuml.com) is the company behind txtai and we provide AI consulting services around our stack. [Schedule a meeting](https://cal.com/neuml/intro) or [send a message](mailto:info@neuml.com) to learn more. > > We're also building an easy and secure way to run hosted txtai applications with [txtai.cloud](https://txtai.cloud). ## Why txtai? ![why](https://raw.githubusercontent.com/neuml/txtai/master/docs/images/why.png#gh-light-mode-only) ![why](https://raw.githubusercontent.com/neuml/txtai/master/docs/images/why-dark.png#gh-dark-mode-only) New vector databases, LLM frameworks and everything in between are sprouting up daily. Why build with txtai? - Up and running in minutes with [pip](https://neuml.github.io/txtai/install/) or [Docker](https://neuml.github.io/txtai/cloud/) ```python # Get started in a couple lines import txtai embeddings = txtai.Embeddings() embeddings.index(["Correct", "Not what we hoped"]) embeddings.search("positive", 1) #[(0, 0.29862046241760254)] ``` - Built-in API makes it easy to develop applications using your programming language of choice ```yaml # app.yml embeddings: path: sentence-transformers/all-MiniLM-L6-v2 ``` ```bash CONFIG=app.yml uvicorn "txtai.api:app" curl -X GET "http://localhost:8000/search?query=positive" ``` - Run local - no need to ship data off to disparate remote services - Work with micromodels all the way up to large language models (LLMs) - Low footprint - install additional dependencies and scale up when needed - [Learn by example](https://neuml.github.io/txtai/examples) - notebooks cover all available functionality ## Use Cases The following sections introduce common txtai use cases. A comprehensive set of over 70 [example notebooks and applications](https://neuml.github.io/txtai/examples) are also available. ### Semantic Search Build semantic/similarity/vector/neural search applications. ![demo](https://raw.githubusercontent.com/neuml/txtai/master/demo.gif) Traditional search systems use keywords to find data. Semantic search has an understanding of natural language and identifies results that have the same meaning, not necessarily the same keywords. ![search](https://raw.githubusercontent.com/neuml/txtai/master/docs/images/search.png#gh-light-mode-only) ![search](https://raw.githubusercontent.com/neuml/txtai/master/docs/images/search-dark.png#gh-dark-mode-only) Get started with the following examples. | Notebook | Description | | |:----------|:-------------|------:| | [Introducing txtai](https://github.com/neuml/txtai/blob/master/examples/01_Introducing_txtai.ipynb) [▶️](https://www.youtube.com/watch?v=SIezMnVdmMs) | Overview of the functionality provided by txtai | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/01_Introducing_txtai.ipynb) | | [Similarity search with images](https://github.com/neuml/txtai/blob/master/examples/13_Similarity_search_with_images.ipynb) | Embed images and text into the same space for search | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/13_Similarity_search_with_images.ipynb) | | [Build a QA database](https://github.com/neuml/txtai/blob/master/examples/34_Build_a_QA_database.ipynb) | Question matching with semantic search | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/34_Build_a_QA_database.ipynb) | | [Semantic Graphs](https://github.com/neuml/txtai/blob/master/examples/38_Introducing_the_Semantic_Graph.ipynb) | Explore topics, data connectivity and run network analysis| [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/38_Introducing_the_Semantic_Graph.ipynb) | ### LLM Orchestration Autonomous agents, retrieval augmented generation (RAG), chat with your data, pipelines and workflows that interface with large language models (LLMs). ![llm](https://raw.githubusercontent.com/neuml/txtai/master/docs/images/llm.png) See below to learn more. | Notebook | Description | | |:----------|:-------------|------:| | [Prompt templates and task chains](https://github.com/neuml/txtai/blob/master/examples/44_Prompt_templates_and_task_chains.ipynb) | Build model prompts and connect tasks together with workflows | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/44_Prompt_templates_and_task_chains.ipynb) | | [Integrate LLM frameworks](https://github.com/neuml/txtai/blob/master/examples/53_Integrate_LLM_Frameworks.ipynb) | Integrate llama.cpp, LiteLLM and custom generation frameworks | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/53_Integrate_LLM_Frameworks.ipynb) | | [Build knowledge graphs with LLMs](https://github.com/neuml/txtai/blob/master/examples/57_Build_knowledge_graphs_with_LLM_driven_entity_extraction.ipynb) | Build knowledge graphs with LLM-driven entity extraction | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/57_Build_knowledge_graphs_with_LLM_driven_entity_extraction.ipynb) | | [Parsing the stars with txtai](https://github.com/neuml/txtai/blob/master/examples/72_Parsing_the_stars_with_txtai.ipynb) | Explore an astronomical knowledge graph of known stars, planets, galaxies | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/72_Parsing_the_stars_with_txtai.ipynb) | #### Agents Agents connect embeddings, pipelines, workflows and other agents together to autonomously solve complex problems. ![agent](https://raw.githubusercontent.com/neuml/txtai/master/docs/images/agent.png) txtai agents are built on top of the [smolagents](https://github.com/huggingface/smolagents) framework. This supports all LLMs txtai supports (Hugging Face, llama.cpp, OpenAI / Claude / AWS Bedrock via LiteLLM). Agent prompting with [`agents.md`](https://github.com/agentsmd/agents.md) and [`skill.md`](https://agentskills.io/specification) are also supported. Check out this [Agent Quickstart Example](https://github.com/neuml/txtai/blob/master/examples/agent_quickstart.py). Additional examples are listed below. | Notebook | Description | | |:----------|:-------------|------:| | [Granting autonomy to agents](https://github.com/neuml/txtai/blob/master/examples/69_Granting_autonomy_to_agents.ipynb) | Agents that iteratively solve problems as they see fit | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/69_Granting_autonomy_to_agents.ipynb) | | [TxtAI got skills](https://github.com/neuml/txtai/blob/master/examples/83_TxtAI_got_skills.ipynb) | Integrate skill.md files with your agent | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/83_TxtAI_got_skills.ipynb) | | [Agent Tools](https://github.com/neuml/txtai/blob/master/examples/84_Agent_Tools.ipynb) [▶️](https://www.youtube.com/watch?v=RDNaFXQy3GQ) | Learn about the txtai agent toolkit | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/84_Agent_Tools.ipynb) | | [Analyzing LinkedIn Company Posts with Graphs and Agents](https://github.com/neuml/txtai/blob/master/examples/71_Analyzing_LinkedIn_Company_Posts_with_Graphs_and_Agents.ipynb) | Exploring how to improve social media engagement with AI | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/71_Analyzing_LinkedIn_Company_Posts_with_Graphs_and_Agents.ipynb) | #### Retrieval augmented generation Retrieval augmented generation (RAG) reduces the risk of LLM hallucinations by constraining the output with a knowledge base as context. RAG is commonly used to "chat with your data". ![rag](https://raw.githubusercontent.com/neuml/txtai/master/docs/images/rag.png#gh-light-mode-only) ![rag](https://raw.githubusercontent.com/neuml/txtai/master/docs/images/rag-dark.png#gh-dark-mode-only) Check out this [RAG Quickstart Example](https://github.com/neuml/txtai/blob/master/examples/rag_quickstart.py). Additional examples are listed below. | Notebook | Description | | |:----------|:-------------|------:| | [Build RAG pipelines with txtai](https://github.com/neuml/txtai/blob/master/examples/52_Build_RAG_pipelines_with_txtai.ipynb) [▶️](https://www.youtube.com/watch?v=t_OeAc8NVfQ) | Guide on retrieval augmented generation including how to create citations | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/52_Build_RAG_pipelines_with_txtai.ipynb) | | [RAG is more than Vector Search](https://github.com/neuml/txtai/blob/master/examples/79_RAG_is_more_than_Vector_Search.ipynb) | Context retrieval via Web, SQL and other sources | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/79_RAG_is_more_than_Vector_Search.ipynb) | | [GraphRAG with Wikipedia and GPT OSS](https://github.com/neuml/txtai/blob/master/examples/77_GraphRAG_with_Wikipedia_and_GPT_OSS.ipynb) | Deep graph search powered RAG | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/77_GraphRAG_with_Wikipedia_and_GPT_OSS.ipynb) | | [Speech to Speech RAG](https://github.com/neuml/txtai/blob/master/examples/65_Speech_to_Speech_RAG.ipynb) [▶️](https://www.youtube.com/watch?v=tH8QWwkVMKA) | Full cycle speech to speech workflow with RAG | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/65_Speech_to_Speech_RAG.ipynb) | ### Language Model Workflows Language model workflows, also known as semantic workflows, connect language models together to build intelligent applications. ![flows](https://raw.githubusercontent.com/neuml/txtai/master/docs/images/flows.png#gh-light-mode-only) ![flows](https://raw.githubusercontent.com/neuml/txtai/master/docs/images/flows-dark.png#gh-dark-mode-only) While LLMs are powerful, there are plenty of smaller, more specialized models that work better and faster for specific tasks. This includes models for extractive question-answering, automatic summarization, text-to-speech, transcription and translation. Check out this [Workflow Quickstart Example](https://github.com/neuml/txtai/blob/master/examples/workflow_quickstart.py). Additional examples are listed below. | Notebook | Description | | |:----------|:-------------|------:| | [Run pipeline workflows](https://github.com/neuml/txtai/blob/master/examples/14_Run_pipeline_workflows.ipynb) [▶️](https://www.youtube.com/watch?v=UBMPDCn1gEU) | Simple yet powerful constructs to efficiently process data | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/14_Run_pipeline_workflows.ipynb) | | [Building abstractive text summaries](https://github.com/neuml/txtai/blob/master/examples/09_Building_abstractive_text_summaries.ipynb) | Run abstractive text summarization | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/09_Building_abstractive_text_summaries.ipynb) | | [Transcribe audio to text](https://github.com/neuml/txtai/blob/master/examples/11_Transcribe_audio_to_text.ipynb) | Convert audio files to text | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/11_Transcribe_audio_to_text.ipynb) | | [Translate text between languages](https://github.com/neuml/txtai/blob/master/examples/12_Translate_text_between_languages.ipynb) | Streamline machine translation and language detection | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/12_Translate_text_between_languages.ipynb) | ## Installation ![install](https://raw.githubusercontent.com/neuml/txtai/master/docs/images/install.png#gh-light-mode-only) ![install](https://raw.githubusercontent.com/neuml/txtai/master/docs/images/install-dark.png#gh-dark-mode-only) The easiest way to install is via pip and PyPI ``` pip install txtai ``` Python 3.10+ is supported. Using a Python [virtual environment](https://docs.python.org/3/library/venv.html) is recommended. See the detailed [install instructions](https://neuml.github.io/txtai/install) for more information covering [optional dependencies](https://neuml.github.io/txtai/install/#optional-dependencies), [environment specific prerequisites](https://neuml.github.io/txtai/install/#environment-specific-prerequisites), [installing from source](https://neuml.github.io/txtai/install/#install-from-source), [conda support](https://neuml.github.io/txtai/install/#conda) and how to [run with containers](https://neuml.github.io/txtai/cloud). ## Model guide ![models](https://raw.githubusercontent.com/neuml/txtai/master/docs/images/models.png) See the table below for the current recommended models. These models all allow commercial use and offer a blend of speed and performance. | Component | Model(s) | | ----------------------------------------------------------------------------- | ------------------------------------------------------------------------ | | [Embeddings](https://neuml.github.io/txtai/embeddings) | [all-MiniLM-L6-v2](https://hf.co/sentence-transformers/all-MiniLM-L6-v2) | | [Image Captions](https://neuml.github.io/txtai/pipeline/image/caption) | [BLIP](https://hf.co/Salesforce/blip-image-captioning-base) | | [Labels - Zero Shot](https://neuml.github.io/txtai/pipeline/text/labels) | [BART-Large-MNLI](https://hf.co/facebook/bart-large) | | [Labels - Fixed](https://neuml.github.io/txtai/pipeline/text/labels) | Fine-tune with [training pipeline](https://neuml.github.io/txtai/pipeline/train/trainer) | | [Large Language Model (LLM)](https://neuml.github.io/txtai/pipeline/text/llm) | [gpt-oss-20b](https://huggingface.co/openai/gpt-oss-20b) | | [Summarization](https://neuml.github.io/txtai/pipeline/text/summary) | [DistilBART](https://hf.co/sshleifer/distilbart-cnn-12-6) | | [Text-to-Speech](https://neuml.github.io/txtai/pipeline/audio/texttospeech) | [ESPnet JETS](https://hf.co/NeuML/ljspeech-jets-onnx) | | [Transcription](https://neuml.github.io/txtai/pipeline/audio/transcription) | [Whisper](https://hf.co/openai/whisper-base) | | [Translation](https://neuml.github.io/txtai/pipeline/text/translation) | [OPUS Model Series](https://hf.co/Helsinki-NLP) | Models can be loaded as either a path from the Hugging Face Hub or a local directory. Model paths are optional, defaults are loaded when not specified. For tasks with no recommended model, txtai uses the default models as shown in the Hugging Face Tasks guide. See the following links to learn more. - [Hugging Face Tasks](https://hf.co/tasks) - [Hugging Face Model Hub](https://hf.co/models) - [MTEB Leaderboard](https://hf.co/spaces/mteb/leaderboard) - [LMSYS LLM Leaderboard](https://chat.lmsys.org/?leaderboard) - [Open LLM Leaderboard](https://hf.co/spaces/HuggingFaceH4/open_llm_leaderboard) ## Powered by txtai The following applications are powered by txtai. ![apps](https://raw.githubusercontent.com/neuml/txtai/master/apps.jpg) | Application | Description | |:------------ |:-------------| | [rag](https://github.com/neuml/rag) | Retrieval Augmented Generation (RAG) application | | [ncoder](https://github.com/neuml/ncoder) | Open-Source AI coding agent | | [paperai](https://github.com/neuml/paperai) | AI for medical and scientific papers | | [annotateai](https://github.com/neuml/annotateai) | Automatically annotate papers with LLMs | In addition to this list, there are also many other [open-source projects](https://github.com/neuml/txtai/network/dependents), [published research](https://scholar.google.com/scholar?q=txtai&hl=en&as_ylo=2022) and closed proprietary/commercial projects that have built on txtai in production. ## Further Reading ![further](https://raw.githubusercontent.com/neuml/txtai/master/docs/images/further.png#gh-light-mode-only) ![further](https://raw.githubusercontent.com/neuml/txtai/master/docs/images/further-ghdark.png#gh-dark-mode-only) - [Introducing txtai, the all-in-one AI framework](https://medium.com/neuml/introducing-txtai-the-all-in-one-ai-framework-0660ecfc39d7) - [Tutorial series on Hashnode](https://neuml.hashnode.dev/series/txtai-tutorial) | [dev.to](https://dev.to/neuml/tutorial-series-on-txtai-ibg) - [What's new in txtai 9.0](https://medium.com/neuml/whats-new-in-txtai-9-0-d522bb150afa) | [8.0](https://medium.com/neuml/whats-new-in-txtai-8-0-2d7d0ab4506b) | [7.0](https://medium.com/neuml/whats-new-in-txtai-7-0-855ad6a55440) | [6.0](https://medium.com/neuml/whats-new-in-txtai-6-0-7d93eeedf804) | [5.0](https://medium.com/neuml/whats-new-in-txtai-5-0-e5c75a13b101) | [4.0](https://medium.com/neuml/whats-new-in-txtai-4-0-bbc3a65c3d1c) - [Getting started with semantic search](https://medium.com/neuml/getting-started-with-semantic-search-a9fd9d8a48cf) | [workflows](https://medium.com/neuml/getting-started-with-semantic-workflows-2fefda6165d9) | [rag](https://medium.com/neuml/getting-started-with-rag-9a0cca75f748) - [Running txtai at scale](https://medium.com/neuml/running-at-scale-with-txtai-71196cdd99f9) - [Vector search & RAG Landscape: A review with txtai](https://medium.com/neuml/vector-search-rag-landscape-a-review-with-txtai-a7f37ad0e187) ## Documentation [Full documentation on txtai](https://neuml.github.io/txtai) including configuration settings for embeddings, pipelines, workflows, API and a FAQ with common questions/issues is available. ## Contributing For those who would like to contribute to txtai, please see [this guide](https://github.com/neuml/.github/blob/master/CONTRIBUTING.md). ================================================ FILE: docker/api/Dockerfile ================================================ # Set base image ARG BASE_IMAGE=neuml/txtai-cpu FROM $BASE_IMAGE # Copy configuration COPY config.yml . # Run local API instance to cache models in container RUN python -c "from txtai.api import API; API('config.yml', False)" # Start server and listen on all interfaces ENV CONFIG "config.yml" ENTRYPOINT ["uvicorn", "--host", "0.0.0.0", "txtai.api:app"] ================================================ FILE: docker/aws/Dockerfile ================================================ # Set base image ARG BASE_IMAGE=neuml/txtai-cpu FROM $BASE_IMAGE # Application script to copy into image ARG APP=api.py # Install Lambda Runtime Interface Client and Mangum ASGI bindings RUN pip install awslambdaric mangum # Copy configuration COPY config.yml . # Run local API instance to cache models in container RUN python -c "from txtai.api import API; API('config.yml', False)" # Copy application COPY $APP ./app.py # Start runtime client using default application handler ENV CONFIG "config.yml" ENTRYPOINT ["python", "-m", "awslambdaric"] CMD ["app.handler"] ================================================ FILE: docker/aws/api.py ================================================ """ Lambda handler for a txtai API instance """ from mangum import Mangum from txtai.api import app, start # pylint: disable=C0103 # Create FastAPI application instance wrapped by Mangum handler = None if not handler: # Start application start() # Create handler handler = Mangum(app, lifespan="off") ================================================ FILE: docker/aws/workflow.py ================================================ """ Lambda handler for txtai workflows """ import json from txtai.api import API APP = None # pylint: disable=W0603,W0613 def handler(event, context): """ Runs a workflow using input event parameters. Args: event: input event context: input context Returns: Workflow results """ # Create (or get) global app instance global APP APP = APP if APP else API("config.yml") # Get parameters from event body event = json.loads(event["body"]) # Run workflow and return results return {"statusCode": 200, "headers": {"Content-Type": "application/json"}, "body": list(APP.workflow(event["name"], event["elements"]))} ================================================ FILE: docker/base/Dockerfile ================================================ # Set base image ARG BASE_IMAGE=python:3.10-slim FROM $BASE_IMAGE # Install GPU-enabled version of PyTorch if set ARG GPU # Target CPU architecture ARG TARGETARCH # Set Python version (i.e. 3, 3.10) ARG PYTHON_VERSION=3 # List of txtai components to install ARG COMPONENTS=[all] # Locale environment variables ENV LC_ALL=C.UTF-8 ENV LANG=C.UTF-8 RUN \ # Install required packages apt-get update && \ apt-get -y --no-install-recommends install libgomp1 libportaudio2 libsndfile1 git gcc g++ python${PYTHON_VERSION} python${PYTHON_VERSION}-dev python3-pip && \ rm -rf /var/lib/apt/lists && \ \ # Install txtai project and dependencies ln -s /usr/bin/python${PYTHON_VERSION} /usr/bin/python && \ python -m pip install --no-cache-dir -U pip wheel setuptools && \ if [ -z ${GPU} ] && { [ -z ${TARGETARCH} ] || [ ${TARGETARCH} = "amd64" ] ;}; then pip install --no-cache-dir torch==2.10.0+cpu torchvision==0.25.0+cpu -f https://download.pytorch.org/whl/torch -f https://download.pytorch.org/whl/torchvision; fi && \ python -m pip install --no-cache-dir txtai${COMPONENTS} && \ python -c "import sys, importlib.util as util; 1 if util.find_spec('nltk') else sys.exit(); import nltk; nltk.download(['punkt', 'punkt_tab', 'averaged_perceptron_tagger_eng'])" && \ \ # Cleanup build packages apt-get -y purge git gcc g++ python${PYTHON_VERSION}-dev && apt-get -y autoremove # Set default working directory WORKDIR /app ================================================ FILE: docker/schedule/Dockerfile ================================================ # Set base image ARG BASE_IMAGE=neuml/txtai-cpu FROM $BASE_IMAGE # Copy configuration COPY config.yml . # Run local API instance to cache models in container RUN python -c "from txtai.api import API; API('config.yml', False)" # Start application and wait for completion. Scheduled workflows can run indefinitely. ENTRYPOINT ["python", "-c", "from txtai.api import API; API('config.yml').wait()"] ================================================ FILE: docker/workflow/Dockerfile ================================================ # Set base image ARG BASE_IMAGE=neuml/txtai-cpu FROM $BASE_IMAGE # Copy configuration COPY config.yml . # Run local API instance to cache models in container RUN python -c "from txtai.api import API; API('config.yml', False)" # Run workflow. Requires two command line arguments: name of workflow and input elements ENTRYPOINT ["python", "-c", "import sys; from txtai.api import API\nfor _ in API('config.yml').workflow(sys.argv[1], sys.argv[2:]): pass"] CMD ["workflow"] ================================================ FILE: docs/agent/configuration.md ================================================ # Configuration An agent takes two main arguments, an LLM and a list of tools. The txtai agent framework is built with [smolagents](https://github.com/huggingface/smolagents). Additional options can be passed in the `Agent` constructor. ```python from datetime import datetime from txtai import Agent wikipedia = { "name": "wikipedia", "description": "Searches a Wikipedia database", "provider": "huggingface-hub", "container": "neuml/txtai-wikipedia" } arxiv = { "name": "arxiv", "description": "Searches a database of scientific papers", "provider": "huggingface-hub", "container": "neuml/txtai-arxiv" } def today() -> str: """ Gets the current date and time Returns: current date and time """ return datetime.today().isoformat() agent = Agent( model="Qwen/Qwen3-4B-Instruct-2507", tools=[today, wikipedia, arxiv, "websearch"], ) ``` ## model ```yaml model: string|llm instance ``` LLM model path or LLM pipeline instance. The `llm` parameter is also supported for backwards compatibility. See the [LLM pipeline](../../pipeline/text/llm) for more information. ## tools ```yaml tools: list ``` List of tools to supply to the agent. Supports the following configurations. ### function A function tool takes the following dictionary fields. | Field | Description | |:------------|:-------------------------| | name | name of the tool | | description | tool description | | target | target method / callable | A function or callable method can also be directly supplied in the `tools` list. In this case, the fields are inferred from the method documentation. ### embeddings Embeddings indexes have built-in support. Provide the following dictionary configuration to add an embeddings index as a tool. | Field | Description | |:------------|:-------------------------------------------| | name | embeddings index name | | description | embeddings index description | | **kwargs | Parameters to pass to [embeddings.load](../../embeddings/methods/#txtai.embeddings.Embeddings.load) | ### tool The following shortcut strings load tools directly. Passing a Tool instance is also supported. | Tool | Description | |:------------|:----------------------------------------------------------| | bash | Runs a shell command through subprocess | | defaults | Loads all of these tools as the default toolkit | | edit | Edits a file in place and returns a diff | | glob | Finds matching file patterns in a directory | | grep | Finds matching file content in a directory | | http.* | HTTP Path to a [Model Context Protocol (MCP)](https://modelcontextprotocol.io/docs/getting-started/intro) server | | python | Runs a Python action | | read | Reads file or url content, supports text extraction | | todowrite | Generates a task list to organize complex tasks | | websearch | Runs a websearch using the built-in websearch tool | | webview | Extracts content from a web page. Alias for `read` tool | | write | Writes content to file | | *.md | Loads a [`skill.md`](https://agentskills.io/specification) file | ## instructions ```yaml instructions: string|path ``` Supports loading an `agents.md` file. Can be provided directly as a string or as a path to a file. [Read more about agents.md here](https://github.com/agentsmd/agents.md) ## template ```yaml template: string ``` Customize the prompt template used by this agent. Supports Jinja templates. Uses a default template when this parameter is not provided. Must include `{{ text }}` and `{{ memory }}` placeholders. ## memory ```yaml memory: int ``` Keeps a rolling window of `memory` inputs and outputs. These are added to future prompts and serve as "agent memory". Supports storing memory by `session` to enable multiple conversation threads. Defaults to shared memory when not set. See the [method documentation](../methods#txtai.agent.base.Agent.__call__) for more information. ## method ```yaml method: code|tool ``` Sets the agent method. Supports either a `code` or `tool` (default) calling agent. A code agent generates Python code and executes that. A tool calling agent generates JSON blocks and calls the agents within those blocks. Additional options can be directly passed. See [CodeAgent](https://huggingface.co/docs/smolagents/main/en/reference/agents#smolagents.CodeAgent) or [ToolCallingAgent](https://huggingface.co/docs/smolagents/main/en/reference/agents#smolagents.ToolCallingAgent) for a list of parameters. [Read more here](https://huggingface.co/docs/smolagents/main/en/guided_tour). ================================================ FILE: docs/agent/index.md ================================================ # Agent ![agent](../images/agent.png) An agent automatically creates workflows to answer multi-faceted user requests. Agents iteratively prompt and/or interface with tools to step through a process and ultimately come to an answer for a request. Agent prompting with [`agents.md`](https://github.com/agentsmd/agents.md) and [`skill.md`](https://agentskills.io/specification) are also supported. [Read the configuration](./configuration/#tool) for more on how to setup those up. Agents excel at complex tasks where multiple tools and/or methods are required. They incorporate a level of randomness similar to different people working on the same task. When the request is simple and/or there is a rule-based process, other methods such as RAG and Workflows should be explored. The following code snippet defines a basic agent. ```python from datetime import datetime from txtai import Agent wikipedia = { "name": "wikipedia", "description": "Searches a Wikipedia database", "provider": "huggingface-hub", "container": "neuml/txtai-wikipedia" } arxiv = { "name": "arxiv", "description": "Searches a database of scientific papers", "provider": "huggingface-hub", "container": "neuml/txtai-arxiv" } def today() -> str: """ Gets the current date and time Returns: current date and time """ return datetime.today().isoformat() agent = Agent( model="Qwen/Qwen3-4B-Instruct-2507", tools=[today, wikipedia, arxiv, "websearch"], max_steps=10, ) ``` The agent above has access to two embeddings databases (Wikipedia and ArXiv) and the web. Given the user's input request, the agent decides the best tool to solve the task. ## Example The first example will solve a problem with multiple data points. See below. ```python agent("Which city has the highest population, Boston or New York?") ``` This requires looking up the population of each city before knowing how to answer the question. Multiple search requests are run to generate a final answer. ## Agentic RAG Standard retrieval augmented generation (RAG) runs a single vector search to obtain a context and builds a prompt with the context + input question. Agentic RAG is a more complex process that goes through multiple iterations. It can also utilize multiple databases to come to a final conclusion. The example below aggregates information from multiple sources and builds a report on a topic. ```python researcher = """ You're an expert researcher looking to write a paper on {topic}. Search for websites, scientific papers and Wikipedia related to the topic. Write a report with summaries and references (with hyperlinks). Write the text as Markdown. """ agent(researcher.format(topic="alien life")) ``` ## Agent Teams Agents can also be tools. This enables the concept of building "Agent Teams" to solve problems. The previous example can be rewritten as a list of agents. ```python from txtai import Agent, LLM llm = LLM("Qwen/Qwen3-4B-Instruct-2507") websearcher = Agent( model=llm, tools=["websearch"], ) wikiman = Agent( model=llm, tools=[{ "name": "wikipedia", "description": "Searches a Wikipedia database", "provider": "huggingface-hub", "container": "neuml/txtai-wikipedia" }], ) researcher = Agent( model=llm, tools=[{ "name": "arxiv", "description": "Searches a database of scientific papers", "provider": "huggingface-hub", "container": "neuml/txtai-arxiv" }], ) agent = Agent( model=llm, tools=[{ "name": "websearcher", "description": "I run web searches, there is no answer a web search can't solve!", "target": websearcher }, { "name": "wikiman", "description": "Wikipedia has all the answers, I search Wikipedia and answer questions", "target": wikiman }, { "name": "researcher", "description": "I'm a science guy. I search arXiv to get all my answers.", "target": researcher }], max_steps=10 ) ``` This provides another level of intelligence to the process. Instead of just a single tool execution, each agent-tool combination has it's own reasoning engine. ```python agent(""" Research fundamental concepts about Signal Processing and build a comprehensive report. Write the output in Markdown. """) ``` # More examples Check out this [Agent Quickstart Example](https://github.com/neuml/txtai/blob/master/examples/agent_quickstart.py). Additional examples are listed below. | Notebook | Description | | |:----------|:-------------|------:| | [What's new in txtai 8.0](https://github.com/neuml/txtai/blob/master/examples/67_Whats_new_in_txtai_8_0.ipynb) | Agents with txtai | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/67_Whats_new_in_txtai_8_0.ipynb) | | [Analyzing Hugging Face Posts with Graphs and Agents](https://github.com/neuml/txtai/blob/master/examples/68_Analyzing_Hugging_Face_Posts_with_Graphs_and_Agents.ipynb) | Explore a rich dataset with Graph Analysis and Agents | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/68_Analyzing_Hugging_Face_Posts_with_Graphs_and_Agents.ipynb) | | [Granting autonomy to agents](https://github.com/neuml/txtai/blob/master/examples/69_Granting_autonomy_to_agents.ipynb) | Agents that iteratively solve problems as they see fit | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/69_Granting_autonomy_to_agents.ipynb) | | [Analyzing LinkedIn Company Posts with Graphs and Agents](https://github.com/neuml/txtai/blob/master/examples/71_Analyzing_LinkedIn_Company_Posts_with_Graphs_and_Agents.ipynb) | Exploring how to improve social media engagement with AI | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/71_Analyzing_LinkedIn_Company_Posts_with_Graphs_and_Agents.ipynb) | | [Parsing the stars with txtai](https://github.com/neuml/txtai/blob/master/examples/72_Parsing_the_stars_with_txtai.ipynb) | Explore an astronomical knowledge graph of known stars, planets, galaxies | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/72_Parsing_the_stars_with_txtai.ipynb) | | [Agentic College Search](https://github.com/neuml/txtai/blob/master/examples/82_Agentic_College_Search.ipynb) | Identify list of strong engineering colleges | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/82_Agentic_College_Search.ipynb) | | [TxtAI got skills](https://github.com/neuml/txtai/blob/master/examples/83_TxtAI_got_skills.ipynb) | Integrate skill.md files with your agent | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/83_TxtAI_got_skills.ipynb) | | [Agent Tools](https://github.com/neuml/txtai/blob/master/examples/84_Agent_Tools.ipynb) [▶️](https://www.youtube.com/watch?v=RDNaFXQy3GQ) | Learn about the txtai agent toolkit | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/84_Agent_Tools.ipynb) | ================================================ FILE: docs/agent/methods.md ================================================ # Methods ## ::: txtai.agent.base.Agent.__init__ ## ::: txtai.agent.base.Agent.__call__ ================================================ FILE: docs/api/cluster.md ================================================ # Distributed embeddings clusters The API supports combining multiple API instances into a single logical embeddings index. An example configuration is shown below. ```yaml cluster: shards: - http://127.0.0.1:8002 - http://127.0.0.1:8003 ``` This configuration aggregates the API instances above as index shards. Data is evenly split among each of the shards at index time. Queries are run in parallel against each shard and the results are joined together. This method allows horizontal scaling and supports very large index clusters. This method is only recommended for data sets in the 1 billion+ records. The ANN libraries can easily support smaller data sizes and this method is not worth the additional complexity. At this time, new shards can not be added after building the initial index. See the link below for a detailed example covering distributed embeddings clusters. | Notebook | Description | | |:----------|:-------------|------:| | [Distributed embeddings cluster](https://github.com/neuml/txtai/blob/master/examples/15_Distributed_embeddings_cluster.ipynb) | Distribute an embeddings index across multiple data nodes | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/15_Distributed_embeddings_cluster.ipynb) | ================================================ FILE: docs/api/configuration.md ================================================ # Configuration Configuration is set through YAML. In most cases, YAML keys map to fields names in Python. The [example in the previous section](../) gave a full-featured example covering a wide array of configuration options. Each section below describes the available configuration settings. ## Embeddings The configuration parser expects a top level `embeddings` key to be present in the YAML. All [embeddings configuration](../../embeddings/configuration) is supported. The following example defines an embeddings index. ```yaml path: index path writable: true embeddings: path: vector model content: true ``` Three top level settings are available to control where indexes are saved and if an index is a read-only index. ### path ```yaml path: string ``` Path to save and load the embeddings index. Each API instance can only access a single index at a time. ### writable ```yaml writable: boolean ``` Determines if the input embeddings index is writable (true) or read-only (false). This allows serving a read-only index. ### cloud [Cloud storage settings](../../embeddings/configuration/cloud) can be set under a `cloud` top level configuration group. ## Agent Agents are defined under a top level `agent` key. Each key under the `agent` key is the name of the agent. Constructor parameters can be passed under this key. The following example defines an agent. ```yaml agent: researcher: tools: - websearch llm: path: Qwen/Qwen3-4B-Instruct-2507 ``` ## Pipeline Pipelines are loaded as top level configuration parameters. Pipeline names are automatically detected in the YAML configuration and created upon startup. All [pipelines](../../pipeline) are supported. The following example defines a series of pipelines. Note that entries below are the lower-case names of the pipeline class. ```yaml caption: extractor: path: model path labels: summary: tabular: translation: ``` Under each pipeline name, configuration settings for the pipeline can be set. ## Workflow Workflows are defined under a top level `workflow` key. Each key under the `workflow` key is the name of the workflow. Under that is a `tasks` key with each task definition. The following example defines a workflow. ```yaml workflow: sumtranslate: tasks: - action: summary - action: translation ``` ### schedule Schedules a workflow using a [cron expression](../../workflow/schedule). ```yaml workflow: index: schedule: cron: 0/10 * * * * * elements: ["api params"] tasks: - task: service url: api url - action: index ``` ### tasks ```yaml tasks: list ``` Expects a list of workflow tasks. Each element defines a single workflow task. All [task configuration](../../workflow/task) is supported. A shorthand syntax for creating tasks is supported. This syntax will automatically map task strings to an `action:value` pair. Example below. ```yaml workflow: index: tasks: - action1 - action2 ``` Each task element supports the following additional arguments. #### action ```yaml action: string|list ``` Both single and multi-action tasks are supported. The action parameter works slightly different when passed via configuration. The parameter(s) needs to be converted into callable method(s). If action is a pipeline that has been defined in the current configuration, it will use that pipeline as the action. There are three special action names `index`, `upsert` and `search`. If `index` or `upsert` are used as the action, the task will collect workflow data elements and load them into defined the embeddings index. If `search` is used, the task will execute embeddings queries for each input data element. Otherwise, the action must be a path to a callable object or function. The configuration parser will resolve the function name and use that as the task action. #### task ```yaml task: string ``` Optionally sets the type of task to create. For example, this could be a `file` task or a `retrieve` task. If this is not specified, a generic task is created. [The list of workflow tasks can be found here](../../workflow). #### args ```yaml args: list ``` Optional list of static arguments to pass to the workflow task. These are combined with workflow data to pass to each `__call__`. ================================================ FILE: docs/api/customization.md ================================================ # Customization The txtai API has a number of features out of the box that are designed to help get started quickly. API services can also be augmented with custom code and functionality. The two main ways to do this are with extensions and dependencies. Extensions add a custom endpoint. Dependencies add middleware that executes with each request. See the sections below for more. ## Extensions While the API is extremely flexible and complex logic can be executed through YAML-driven workflows, some may prefer to create an endpoint in Python. API extensions define custom Python endpoints that interact with txtai applications. See the link below for a detailed example. | Notebook | Description | | |:----------|:-------------|------:| | [Custom API Endpoints](https://github.com/neuml/txtai/blob/master/examples/51_Custom_API_Endpoints.ipynb) | Extend the API with custom endpoints | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/51_Custom_API_Endpoints.ipynb) | ## Dependencies txtai has a default API token authorization method that works well in many cases. Dependencies can also add custom logic with each request. This could be an additional authorization step and/or an authentication method. See the link below for a detailed example. | Notebook | Description | | |:----------|:-------------|------:| | [API Authorization and Authentication](https://github.com/neuml/txtai/blob/master/examples/54_API_Authorization_and_Authentication.ipynb) | Add authorization, authentication and middleware dependencies to the API | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/54_API_Authorization_and_Authentication.ipynb) | ================================================ FILE: docs/api/index.md ================================================ # API ![api](../images/api.png#only-light) ![api](../images/api-dark.png#only-dark) txtai has a full-featured API, backed by [FastAPI](https://github.com/tiangolo/fastapi), that can optionally be enabled for any txtai process. All functionality found in txtai can be accessed via the API. The following is an example configuration and startup script for the API. Note: This configuration file enables all functionality. For memory-bound systems, splitting pipelines into multiple instances is a best practice. ```yaml # Index file path path: /tmp/index # Allow indexing of documents writable: True # Enbeddings index embeddings: path: sentence-transformers/nli-mpnet-base-v2 # Extractive QA extractor: path: distilbert-base-cased-distilled-squad # Zero-shot labeling labels: # Similarity similarity: # Text segmentation segmentation: sentences: true # Text summarization summary: # Text extraction textractor: paragraphs: true minlength: 100 join: true # Transcribe audio to text transcription: # Translate text between languages translation: # Workflow definitions workflow: sumfrench: tasks: - action: textractor task: url - action: summary - action: translation args: ["fr"] sumspanish: tasks: - action: textractor task: url - action: summary - action: translation args: ["es"] ``` Assuming this YAML content is stored in a file named config.yml, the following command starts the API process. ```bash CONFIG=config.yml uvicorn "txtai.api:app" ``` Uvicorn is a full-featured production-ready server. See the [Uvicorn deployment guide](https://www.uvicorn.org/deployment/) for more on configuration options. ## Connect to API The default port for the API is 8000. See the uvicorn link above to change this. txtai has a number of language bindings which abstract the API (see links below). Alternatively, code can be written to connect directly to the API. Documentation for a live running instance can be found at the `/docs` url (i.e. http://localhost:8000/docs). The following example runs a workflow using cURL. ```bash curl \ -X POST "http://localhost:8000/workflow" \ -H "Content-Type: application/json" \ -d '{"name":"sumfrench", "elements": ["https://github.com/neuml/txtai"]}' ``` ## Local instance A local instance can be instantiated. In this case, a txtai application runs internally, without any network connections, providing the same consolidated functionality. This enables running txtai in Python with configuration. The configuration above can be run in Python with: ```python from txtai import Application # Load and run workflow app = Application(config.yml) app.workflow("sumfrench", ["https://github.com/neuml/txtai"]) ``` See this [link for a full list of methods](./methods). ## Run with containers The API can be containerized and run. This will bring up an API instance without having to install Python, txtai or any dependencies on your machine! [See this section for more information](../cloud/#api). ## Supported language bindings The following programming languages have bindings with the txtai API: - [Python](https://github.com/neuml/txtai.py) - [JavaScript](https://github.com/neuml/txtai.js) - [Java](https://github.com/neuml/txtai.java) - [Rust](https://github.com/neuml/txtai.rs) - [Go](https://github.com/neuml/txtai.go) The API also supports hosting [OpenAI-compatible](./openai) and [Model Context Protocol (MCP)](./mcp) endpoints. See the links below for detailed examples covering the API. | Notebook | Description | | |:----------|:-------------|------:| | [API Gallery](https://github.com/neuml/txtai/blob/master/examples/08_API_Gallery.ipynb) | Using txtai in JavaScript, Java, Rust and Go | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/08_API_Gallery.ipynb) | | [Distributed embeddings cluster](https://github.com/neuml/txtai/blob/master/examples/15_Distributed_embeddings_cluster.ipynb) | Distribute an embeddings index across multiple data nodes | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/15_Distributed_embeddings_cluster.ipynb) | | [Embeddings in the Cloud](https://github.com/neuml/txtai/blob/master/examples/43_Embeddings_in_the_Cloud.ipynb) | Load and use an embeddings index from the Hugging Face Hub | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/43_Embeddings_in_the_Cloud.ipynb) | | [Custom API Endpoints](https://github.com/neuml/txtai/blob/master/examples/51_Custom_API_Endpoints.ipynb) | Extend the API with custom endpoints | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/51_Custom_API_Endpoints.ipynb) | | [API Authorization and Authentication](https://github.com/neuml/txtai/blob/master/examples/54_API_Authorization_and_Authentication.ipynb) | Add authorization, authentication and middleware dependencies to the API | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/54_API_Authorization_and_Authentication.ipynb) | | [OpenAI Compatible API](https://github.com/neuml/txtai/blob/master/examples/74_OpenAI_Compatible_API.ipynb) | Connect to txtai with a standard OpenAI client library | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/74_OpenAI_Compatible_API.ipynb) | ================================================ FILE: docs/api/mcp.md ================================================ # Model Context Protocol The [Model Context Protocol (MCP)](https://modelcontextprotocol.io/introduction) is an open standard that enables developers to build secure, two-way connections between their data sources and AI-powered tools. The API can be configured to handle MCP requests. All enabled endpoints set in the API configuration are automatically added as MCP tools. ```yaml mcp: True ``` Once this configuration option is added, a new route is added to the application `/mcp`. The [Model Context Protocol Inspector tool](https://www.npmjs.com/package/@modelcontextprotocol/inspector) is a quick way to explore how the MCP tools are exported through this interface. Run the following and go to the local URL specified. ``` npx @modelcontextprotocol/inspector node build/index.js ``` Enter `http://localhost:8000/mcp` to see the full list of tools available. ================================================ FILE: docs/api/methods.md ================================================ # Methods ::: txtai.api.API options: inherited_members: true filters: - "!__del__" - "!flows" - "!function" - "!indexes" - "!limit" - "!pipes" - "!read" - "!resolve" - "!weights" ================================================ FILE: docs/api/openai.md ================================================ # OpenAI-compatible API The API can be configured to serve an OpenAI-compatible API as shown below. ```yaml openai: True ``` See the link below for a detailed example. | Notebook | Description | | |:----------|:-------------|------:| | [OpenAI Compatible API](https://github.com/neuml/txtai/blob/master/examples/74_OpenAI_Compatible_API.ipynb) | Connect to txtai with a standard OpenAI client library | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/74_OpenAI_Compatible_API.ipynb) | ================================================ FILE: docs/api/security.md ================================================ # Security The default implementation of an API service runs via HTTP and is fully open. If the service is being run as a prototype on an internal network, that may be fine. In most scenarios, the connection should at least be encrypted. Authorization is another built-in feature that requires a valid API token with each request. See below for more. ## HTTPS The default API service command starts a Uvicorn server as a HTTP service on port 8000. To run a HTTPS service, consider the following options. - [TLS Proxy Server](https://fastapi.tiangolo.com/deployment/https/). *Recommended choice*. With this configuration, the txtai API service runs as a HTTP service only accessible on the localhost/local network. The proxy server handles all encryption and redirects requests to local services. See this [example configuration](https://www.uvicorn.org/deployment/#running-behind-nginx) for more. - [Uvicorn SSL Certificate](https://www.uvicorn.org/deployment/). Another option is setting the SSL certificate on the Uvicorn service. This works in simple situations but gets complex when hosting multiple txtai or other related services. ## Authorization Authorization requires a valid API token with each API request. This token is sent as a HTTP `Authorization` header. *Server* ```bash CONFIG=config.yml TOKEN= uvicorn "txtai.api:app" ``` *Client* ```bash curl \ -X POST "http://localhost:8000/workflow" \ -H "Content-Type: application/json" \ -H "Authorization: Bearer " \ -d '{"name":"sumfrench", "elements": ["https://github.com/neuml/txtai"]}' ``` It's important to note that HTTPS **must** be enabled using one of the methods mentioned above. Otherwise, tokens will be exchanged as clear text. Authentication and Authorization can be fully customized. See the [dependencies](../customization#dependencies) section for more. ================================================ FILE: docs/cloud.md ================================================ # Cloud ![cloud](images/cloud.png#only-light) ![cloud](images/cloud-dark.png#only-dark) Scalable cloud-native applications can be built with txtai. The following cloud runtimes are supported. - Container Orchestration Systems (i.e. Kubernetes) - Docker Engine - Serverless Compute - txtai.cloud (planned for future) Images for txtai are available on Docker Hub for [CPU](https://hub.docker.com/r/neuml/txtai-cpu) and [GPU](https://hub.docker.com/r/neuml/txtai-gpu) installs. The CPU install is recommended when GPUs aren't available given the image is significantly smaller. The base txtai images have no models installed and models will be downloaded each time the container starts. Caching the models is recommended as that will significantly reduce container start times. This can be done a couple different ways. - Create a container with the [models cached](#container-image-model-caching) - Set the transformers cache environment variable and mount that volume when starting the image ```bash docker run -v :/models -e TRANSFORMERS_CACHE=/models --rm -it ``` ## Build txtai images The txtai images found on Docker Hub are configured to support most situations. This image can be locally built with different options as desired. Examples build commands below. ```bash # Get Dockerfile wget https://raw.githubusercontent.com/neuml/txtai/master/docker/base/Dockerfile # Build Ubuntu 22.04 image running Python 3.10 docker build -t txtai --build-arg BASE_IMAGE=ubuntu:22.04 --build-arg PYTHON_VERSION=3.10 . # Build image with GPU support docker build -t txtai --build-arg GPU=1 . # Build minimal image with the base txtai components docker build -t txtai --build-arg COMPONENTS= . ``` ## Container image model caching As mentioned previously, model caching is recommended to reduce container start times. The following commands demonstrate this. In all cases, it is assumed a config.yml file is present in the local directory with the desired configuration set. ### API This section builds an image that caches models and starts an API service. The config.yml file should be configured with the desired components to expose via the API. The following is a sample config.yml file that creates an Embeddings API service. ```yaml # config.yml writable: true embeddings: path: sentence-transformers/nli-mpnet-base-v2 content: true ``` The next section builds the image and starts an instance. ```bash # Get Dockerfile wget https://raw.githubusercontent.com/neuml/txtai/master/docker/api/Dockerfile # CPU build docker build -t txtai-api . # GPU build docker build -t txtai-api --build-arg BASE_IMAGE=neuml/txtai-gpu . # Run docker run -p 8000:8000 --rm -it txtai-api ``` ### Service This section builds a scheduled workflow service. [More on scheduled workflows can be found here.](../workflow/schedule) ```bash # Get Dockerfile wget https://raw.githubusercontent.com/neuml/txtai/master/docker/service/Dockerfile # CPU build docker build -t txtai-service . # GPU build docker build -t txtai-service --build-arg BASE_IMAGE=neuml/txtai-gpu . # Run docker run --rm -it txtai-service ``` ### Workflow This section builds a single run workflow. [Example workflows can be found here.](../examples/#workflows) ```bash # Get Dockerfile wget https://raw.githubusercontent.com/neuml/txtai/master/docker/workflow/Dockerfile # CPU build docker build -t txtai-workflow . # GPU build docker build -t txtai-workflow --build-arg BASE_IMAGE=neuml/txtai-gpu . # Run docker run --rm -it txtai-workflow ``` ## Serverless Compute One of the most powerful features of txtai is building YAML-configured applications with the "build once, run anywhere" approach. API instances and workflows can run locally, on a server, on a cluster or serverless. Serverless instances of txtai are supported on frameworks such as [AWS Lambda](https://aws.amazon.com/lambda/), [Google Cloud Functions](https://cloud.google.com/functions), [Azure Cloud Functions](https://azure.microsoft.com/en-us/services/functions/) and [Kubernetes](https://kubernetes.io/) with [Knative](https://knative.dev/docs/). ### AWS Lambda The following steps show a basic example of how to build a serverless API instance with [AWS SAM](https://github.com/aws/serverless-application-model). - Create config.yml and template.yml ```yaml # config.yml writable: true embeddings: path: sentence-transformers/nli-mpnet-base-v2 content: true ``` ```yaml # template.yml Resources: txtai: Type: AWS::Serverless::Function Properties: PackageType: Image MemorySize: 3000 Timeout: 20 Events: Api: Type: Api Properties: Path: "/{proxy+}" Method: ANY Metadata: Dockerfile: Dockerfile DockerContext: ./ DockerTag: api ``` - Install [AWS SAM](https://pypi.org/project/aws-sam-cli/) - Run following ```bash # Get Dockerfile and application wget https://raw.githubusercontent.com/neuml/txtai/master/docker/aws/api.py wget https://raw.githubusercontent.com/neuml/txtai/master/docker/aws/Dockerfile # Build the docker image sam build # Start API gateway and Lambda instance locally sam local start-api -p 8000 --warm-containers LAZY # Verify instance running (should return 0) curl http://localhost:8080/count ``` If successful, a local API instance is now running in a "serverless" fashion. This configuration can be deployed to AWS using SAM. [See this link for more information.](https://docs.aws.amazon.com/serverless-application-model/latest/developerguide/sam-cli-command-reference-sam-deploy.html) ### Kubernetes with Knative txtai scales with container orchestration systems. This can be self-hosted or with a cloud provider such as [Amazon Elastic Kubernetes Service](https://aws.amazon.com/eks/), [Google Kubernetes Engine](https://cloud.google.com/kubernetes-engine) and [Azure Kubernetes Service](https://azure.microsoft.com/en-us/services/kubernetes-service/). There are also other smaller providers with a managed Kubernetes offering. A full example covering how to build a serverless txtai application on Kubernetes with Knative [can be found here](https://medium.com/neuml/serverless-vector-search-with-txtai-96f6163ab972). ## txtai.cloud [txtai.cloud](https://txtai.cloud) is a planned effort that will offer an easy and secure way to run hosted txtai applications. ================================================ FILE: docs/embeddings/configuration/ann.md ================================================ # ANN Approximate Nearest Neighbor (ANN) index configuration for storing vector embeddings. ## backend ```yaml backend: faiss|hnsw|annoy|ggml|numpy|torch|pgvector|sqlite|custom ``` Sets the ANN backend. Defaults to `faiss`. Additional backends are available via the [ann](../../../install/#ann) extras package. Set custom backends via setting this parameter to the fully resolvable class string. Backend-specific settings are set with a corresponding configuration object having the same name as the backend (i.e. annoy, faiss, or hnsw). These are optional and set to defaults if omitted. ### faiss ```yaml faiss: components: comma separated list of components - defaults to "IDMap,Flat" for small indices and "IVFx,Flat" for larger indexes where x = min(4 * sqrt(embeddings count), embeddings count / 39) automatically calculates number of IVF cells when omitted (supports "IVF,Flat") nprobe: search probe setting (int) - defaults to x/16 (as defined above) for larger indexes nflip: same as nprobe - only used with binary hash indexes quantize: store vectors with x-bit precision vs 32-bit (boolean|int) true sets 8-bit precision, false disables, int sets specified precision mmap: load as on-disk index (boolean) - trade query response time for a smaller RAM footprint, defaults to false sample: percent of data to use for model training (0.0 - 1.0) reduces indexing time for larger (>1M+ row) indexes, defaults to 1.0 ``` Faiss supports both floating point and binary indexes. Floating point indexes are the default. Binary indexes are used when indexing scalar-quantized datasets. See the following Faiss documentation links for more information. - [Guidelines for choosing an index](https://github.com/facebookresearch/faiss/wiki/Guidelines-to-choose-an-index) - [Index configuration summary](https://github.com/facebookresearch/faiss/wiki/Faiss-indexes) - [Index Factory](https://github.com/facebookresearch/faiss/wiki/The-index-factory) - [Binary Indexes](https://github.com/facebookresearch/faiss/wiki/Binary-indexes) - [Search Tuning](https://github.com/facebookresearch/faiss/wiki/Faster-search) Note: For macOS users, an existing bug in an upstream package restricts the number of processing threads to 1. This limitation is managed internally to prevent system crashes. ### hnsw ```yaml hnsw: efconstruction: ef_construction param for init_index (int) - defaults to 200 m: M param for init_index (int) - defaults to 16 randomseed: random-seed param for init_index (int) - defaults to 100 efsearch: ef search param (int) - defaults to None and not set ``` See [Hnswlib documentation](https://github.com/nmslib/hnswlib/blob/master/ALGO_PARAMS.md) for more information on these parameters. ### annoy ```yaml annoy: ntrees: number of trees (int) - defaults to 10 searchk: search_k search setting (int) - defaults to -1 ``` See [Annoy documentation](https://github.com/spotify/annoy#full-python-api) for more information on these parameters. Note that annoy indexes can not be modified after creation, upserts/deletes and other modifications are not supported. ### ggml ```yaml ggml: gpu: enable GPU - defaults to True quantize: sets the tensor quantization - defaults to F32 querysize: query buffer size - defaults to 64 ``` The [GGML](https://github.com/ggml-org/ggml) backend is a k-nearest neighbors backend. It stores tensors using GGML and [GGUF](https://huggingface.co/docs/hub/en/gguf). It supports GPU-enabled operations and supports quantization. GGML is the framework used by [llama.cpp](https://github.com/ggml-org/llama.cpp). [See this](https://github.com/ggml-org/ggml/blob/master/include/ggml.h#L379) for a list of quantization types. ### numpy The NumPy backend is a k-nearest neighbors backend. It's designed for simplicity and works well with smaller datasets that fit into memory. ```yaml numpy: safetensors: stores vectors using the safetensors format defaults to NumPy array storage ``` ### torch The Torch backend is a k-nearest neighbors backend like NumPy. It supports GPU-enabled operations. It also has support for quantization which enables fitting larger arrays into GPU memory. When quantization is enabled, vectors are _always_ stored in safetensors. _Note that macOS support for quantization is limited._ ```yaml torch: safetensors: stores vectors using the safetensors format - defaults to NumPy array storage if quantization is disabled quantize: type: quantization type (fp4, nf4, int8) blocksize: quantization block size parameter ``` ### pgvector ```yaml pgvector: url: database url connection string, alternatively can be set via ANN_URL environment variable schema: database schema to store vectors - defaults to being determined by the database table: database table to store vectors - defaults to `vectors` precision: vector float precision (half or full) - defaults to `full` efconstruction: ef_construction param (int) - defaults to 200 m: M param for init_index (int) - defaults to 16 ``` The pgvector backend stores embeddings in a Postgres database. See the [pgvector documentation](https://github.com/pgvector/pgvector-python?tab=readme-ov-file#sqlalchemy) for more information on these parameters. See the [SQLAlchemy](https://docs.sqlalchemy.org/en/20/core/engines.html#database-urls) documentation for more information on how to construct url connection strings. ### sqlite ```yaml sqlite: quantize: store vectors with x-bit precision vs 32-bit (boolean|int) true sets 8-bit precision, false disables, int sets specified precision table: database table to store vectors - defaults to `vectors` ``` The SQLite backend stores embeddings in a SQLite database using [sqlite-vec](https://github.com/asg017/sqlite-vec). This backend supports 1-bit and 8-bit quantization at the storage level. See [this note](https://alexgarcia.xyz/sqlite-vec/python.html#macos-blocks-sqlite-extensions-by-default) on how to run this ANN on MacOS. ================================================ FILE: docs/embeddings/configuration/cloud.md ================================================ # Cloud The following describes parameters used to sync indexes with cloud storage. Cloud object storage, the [Hugging Face Hub](https://huggingface.co/models) and custom providers are all supported. Parameters are set via the [embeddings.load](../../methods/#txtai.embeddings.base.Embeddings.load) and [embeddings.save](../../methods/#txtai.embeddings.base.Embeddings.save) methods. ## provider ```yaml provider: string ``` Cloud provider. Can be one of the following: - Cloud object storage. Set to one of these [providers](https://libcloud.readthedocs.io/en/stable/storage/supported_providers.html). Use the text shown in the `Provider Constant` column as lower case. - Hugging Face Hub. Set to `huggingface-hub`. - Custom providers. Set to the full class path of the custom provider. ## container ```yaml container: string ``` Container/bucket/directory/repository name. Embeddings will be stored in the container with the filename specified by the `path` configuration. ## Cloud object storage configuration In addition to the above common configuration, the cloud object storage provider has the following additional configuration parameters. Note that some cloud providers do not need any of these parameters and can use implicit authentication with service accounts. See the [libcloud documentation](https://libcloud.readthedocs.io/en/stable/apidocs/libcloud.common.html#module-libcloud.common.base) for more information on these parameters. ### key ```yaml key: string ``` Provider-specific access key. Can also be set via `ACCESS_KEY` environment variable. Ensure the configuration file is secured if added to the file. When using implicit authentication, set this to a value such as 'using-implicit-auth'. ### secret ```yaml secret: string ``` Provider-specific access secret. Can also be set via `ACCESS_SECRET` environment variable. Ensure the configuration file is secured if added to the file. When using implicit authentication, this option is not required. ### prefix ```yaml prefix: string ``` Optional object prefix. Object storage doesn't have the concept of a directory but a prefix is similar. For example, a prefix could be `base/dir`. This helps with organizing data in an object storage bucket. More can be found at the following links. - [Organizing objects using prefixes](https://docs.aws.amazon.com/AmazonS3/latest/userguide/using-prefixes.html) - [libcloud container method documentation](https://libcloud.readthedocs.io/en/stable/storage/api.html#libcloud.storage.base.StorageDriver.iterate_container_objects) ### host ```yaml host: string ``` Optional server host name. Set when using a local cloud storage server. ### port ```yaml port: int ``` Optional server port. Set when using a local cloud storage server. ### token ```yaml token: string ``` Optional temporary session token ### region ```yaml region: string ``` Optional parameter to specify the storage region, provider-specific. ## Hugging Face Hub configuration The huggingface-hub provider supports the following additional configuration parameters. More on these parameters can be found in the [Hugging Face Hub's documentation](https://huggingface.co/docs/huggingface_hub/main/en/package_reference/overview). ### revision ```yaml revision: string ``` Optional Git revision id which can be a branch name, a tag, or a commit hash ### cache ```yaml cache: string ``` Path to the folder where cached files are stored ### token ```yaml token: string|boolean ``` Token to be used for the download. If set to True, the token will be read from the Hugging Face config folder. ================================================ FILE: docs/embeddings/configuration/database.md ================================================ # Database Databases store metadata, text and binary content. ## content ```yaml content: boolean|sqlite|duckdb|client|url|custom ``` Enables content storage. When true, the default storage engine, `sqlite` will be used to save metadata. Client-server connections are supported with either `client` or a full connection URL. When set to `client`, the CLIENT_URL environment variable must be set to the full connection URL. See the [SQLAlchemy](https://docs.sqlalchemy.org/en/20/core/engines.html#database-urls) documentation for more information on how to construct connection strings for client-server databases. Add custom storage engines via setting this parameter to the fully resolvable class string. Content storage specific settings are set with a corresponding configuration object having the same name as the content storage engine (i.e. duckdb or sqlite). These are optional and set to defaults if omitted. ### client ```yaml schema: default database schema for the session - defaults to being determined by the database ``` Additional settings for client-server databases. Also supported when the `content=url`. ### sqlite ```yaml sqlite: wal: enable write-ahead logging - allows concurrent read/write operations, defaults to false ``` Additional settings for SQLite. ## objects ```yaml objects: boolean|image|pickle ``` Enables object storage. Supports storing binary content. Requires content storage to also be enabled. Object encoding options are: - `standard`: Default encoder when boolean set. Encodes and decodes objects as byte arrays. - `image`: Image encoder. Encodes and decodes objects as image objects. - `pickle`: Pickle encoder. Encodes and decodes objects with the pickle module. Supports arbitrary objects. ## functions ```yaml functions: list ``` List of functions with user-defined SQL functions. Each list element must be one of the following: - function - callable object - dict with fields for name, argcount, function and deterministic [An example can be found here](../../query#custom-sql-functions). ## expressions ```yaml expressions: list ``` List of expression shortcuts. Each list element must be a dict with the following fields. - `name`: name of the expression - `expression`: SQL expression, defaults to `name` when empty - `index`: if this expression should have a database index, defaults to False when not provided The expression can be a json data column, sql function or anything that can be run as a SQL snippet. ## query ```yaml query: path: sets the path for the query model - this can be any model on the Hugging Face Model Hub or a local file path. prefix: text prefix to prepend to all inputs maxlength: maximum generated sequence length ``` Query translation model. Translates natural language queries to txtai compatible SQL statements. ================================================ FILE: docs/embeddings/configuration/general.md ================================================ # General General configuration options. ## keyword ```yaml keyword: boolean|string ``` Enables sparse keyword indexing for this embeddings. When set to a boolean, this parameter creates a BM25 index for full text search. When set to a string, it expects a [keyword method](../scoring#method). It also implicitly disables the [defaults](#defaults) setting for vector search. ## sparse ```yaml sparse: boolean|path ``` Enables sparse vector indexing for this embeddings. When set to `True`, this parameter creates a sparse vector index using the [default sparse index model](https://huggingface.co/prithivida/Splade_PP_en_v2). When set to a string, it expects a local or Hugging Face model path. It also implicitly disables the [defaults](#defaults) setting for vector search. ## dense ```yaml dense: boolean|string ``` Alias for the [vector model path](../vectors/#path). When set to `True`, the [default transformers vector model](https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2) is used. ## hybrid ```yaml hybrid: boolean ``` Enables hybrid (sparse + dense) indexing for this embeddings. When enabled, this parameter creates a BM25 index for full text search. It has no effect on the [defaults](#defaults) or [path](../vectors/#path) settings. ## defaults ```yaml defaults: boolean ``` Uses default vector model path when enabled (default setting is True) and `path` is not provided. See [this link](../) for an example. ## indexes ```yaml indexes: dict ``` Key value pairs defining subindexes for this embeddings. Each key is the index name and the value is the full configuration. This configuration can use any of the available configurations in a standard embeddings instance. ## autoid ```yaml format: int|uuid function ``` Sets the auto id generation method. When this is not set, an autogenerated numeric sequence is used. This also supports [UUID generation functions](https://docs.python.org/3/library/uuid.html#uuid.uuid1). For example, setting this value to `uuid4` will generate random UUIDs. Setting this to `uuid5` will generate deterministic UUIDs for each input data row. ## columns ```yaml columns: text: name of the text column object: name of the object column store: limit json data fields to this list of columns ``` Sets the `text` and `object` column names. Defaults to `text` and `object` if not provided. `store` sets a list of columns to store in the JSON data field. When this isn't provided, all columns are stored (default). When `store` is set to `None`, no JSON columns are stored. This is useful is a field is only needed at indexing time but not search time. ## format ```yaml format: json|pickle ``` Sets the configuration storage format. Defaults to `json`. ================================================ FILE: docs/embeddings/configuration/graph.md ================================================ # Graph Enable graph storage via the `graph` parameter. This component requires the [graph](../../../install/#graph) extras package. When enabled, a graph network is built using the embeddings index. Graph nodes are synced with each embeddings index operation (index/upsert/delete). Graph edges are created using the embeddings index upon completion of each index/upsert/delete embeddings index call. ## backend ```yaml backend: networkx|rdbms|custom ``` Sets the graph backend. Defaults to `networkx`. Add custom graph storage engines via setting this parameter to the fully resolvable class string. The `rdbms` backend has the following additional settings. ### rdbms ```yaml url: database url connection string, alternatively can be set via the GRAPH_URL environment variable schema: database schema to store graph - defaults to being determined by the database nodes: table to store node data, defaults to `nodes` edges: table to store edge data, defaults to `edges` ``` ## batchsize ```yaml batchsize: int ``` Batch query size, used to query embeddings index - defaults to 256. ## limit ```yaml limit: int ``` Maximum number of results to return per embeddings query - defaults to 15. ## minscore ```yaml minscore: float ``` Minimum score required to consider embeddings query matches - defaults to 0.1. ## approximate ```yaml approximate: boolean ``` When true, queries only run for nodes without edges - defaults to true. ## topics ```yaml topics: algorithm: community detection algorithm (string), options are louvain (default), greedy, lpa level: controls number of topics (string), options are best (default) or first resolution: controls number of topics (int), larger values create more topics (int), defaults to 100 labels: scoring index method used to build topic labels (string) options are bm25 (default), tfidf, sif terms: number of frequent terms to use for topic labels (int), defaults to 4 stopwords: optional list of stop words to exclude from topic labels categories: optional list of categories used to group topics, allows granular topics with broad categories grouping topics ``` Enables topic modeling. Defaults are tuned so that in most cases these values don't need to be changed (except for categories). These parameters are available for advanced use cases where one wants full control over the community detection process. ## copyattributes ```yaml copyattributes: boolean|list ``` Copy these attributes from input dictionaries in the `insert` method. If this is set to `True`, all attributes are copied. Otherwise, only the attributes specified in this list are copied to the graph as attributes. ================================================ FILE: docs/embeddings/configuration/index.md ================================================ # Configuration The following describes available embeddings configuration. These parameters are set in the [Embeddings constructor](../methods#txtai.embeddings.base.Embeddings.__init__) via either the `config` parameter or as keyword arguments. Configuration is designed to be optional and set only when needed. Out of the box, sensible defaults are picked to get up and running fast. For example: ```python from txtai import Embeddings embeddings = Embeddings() ``` Creates a new embeddings instance, using [all-MiniLM-L6-v2](https://hf.co/sentence-transformers/all-MiniLM-L6-v2) as the vector model, [Faiss](https://faiss.ai/) as the ANN index backend and content disabled. ```python from txtai import Embeddings embeddings = Embeddings(content=True) ``` Is the same as above except it adds in [SQLite](https://www.sqlite.org/index.html) for content storage. The following sections link to all the available configuration options. ## [ANN](./ann) The default vector index backend is Faiss. ## [Cloud](./cloud) Embeddings databases can optionally be synced with cloud storage. ## [Database](./database) Content storage is disabled by default. When enabled, SQLite is the default storage engine. ## [General](./general) General configuration that doesn't fit elsewhere. ## [Graph](./graph) An accomplying graph index can be created with an embeddings database. This enables topic modeling, path traversal and more. [NetworkX](https://github.com/networkx/networkx) is the default graph index. ## [Scoring](./scoring) Sparse keyword indexing and word vectors term weighting. ## [Vectors](./vectors) Vector search is enabled by converting text and other binary data into embeddings vectors. These vectors are then stored in an ANN index. The vector model is optional and a default model is used when not provided. ================================================ FILE: docs/embeddings/configuration/scoring.md ================================================ # Scoring Enable scoring support via the `scoring` parameter. This scoring instance can serve two purposes, depending on the settings. One use case is building sparse/keyword indexes. This occurs when the `terms` parameter is set to `True`. The other use case is with word vector term weighting. This feature has been available since the initial version but isn't quite as common anymore. The following covers the available options. ## method ```yaml method: bm25|tfidf|sif|pgtext|sparse|custom ``` Sets the scoring method. Add custom scoring via setting this parameter to the fully resolvable class string. ### pgtext ```yaml schema: database schema to store keyword index - defaults to being determined by the database ``` Additional settings for Postgres full-text keyword indexes. ### sparse ```yaml path: sparse vector model path vectormethod: vector embeddings method vectornormalize: enable vector embeddings normalization (boolean) gpu: boolean|int|string|device normalize: enable score normalization (boolean|float|string|dict) batch: Sets the transform batch size encodebatch: Sets the encode batch size vectors: additional model init args encodeargs: additional encode() args backend: ivfsparse|pgsparse ``` Sparse vector scoring options. The sparse scoring instance combines a sparse vector model with a sparse approximate nearest neighbor index (ANN). This method supports both vector normalization and score normalization. Vector normalization normalizes all vectors to have a magnitude of 1. By extension, all generated scores will be 0 to 1. Score normalization scales the output between 0 and 1. This setting supports: - `True` for default scale normalization - `float` normalize using this as the scale factor - `"bayes"` for Bayesian normalization using dynamic candidate score statistics - `{method: "bayes", alpha: 1.0, beta: null}` for Bayesian normalization with optional custom parameters #### ivfsparse ```yaml ivfsparse: sample: percent of data to use for model training (0.0 - 1.0) nfeatures: top n features to use for model training (int) nlist: desired number of clusters (int) nprobe: search probe setting (int) minpoints: minimum number of points for a cluster (int) ``` Inverted file (IVF) index with flat vector file storage and sparse array support. #### pgsparse Sparse ANN backed by Postgres. Supports same options as the [pgvector](../ann/#pgvector) ANN. ## terms ```yaml terms: boolean|dict ``` Enables term frequency sparse arrays for a scoring instance. This is the backend for sparse keyword indexes. Supports a `dict` with the parameters `cachelimit` and `cutoff`. `cachelimit` is the maximum amount of resident memory in bytes to use during indexing before flushing to disk. This parameter is an `int`. `cutoff` is used during search to determine what constitutes a common term. This parameter is a `float`, i.e. 0.1 for a cutoff of 10%. When `terms` is set to `True`, default parameters are used for the `cachelimit` and `cutoff`. Normally, these defaults are sufficient. ## normalize ```yaml normalize: boolean|str|dict ``` Enables normalized scoring (ranging from 0 to 1). This setting supports: - `True` for standard score normalization - `"bayes"` | `"bb25"` for Bayesian normalization using dynamic candidate score statistics - `{method: "bayes", alpha: 1.0, beta: null}` for Bayesian normalization with optional custom parameters When standard normalization is enabled, statistics from the index are used to calculate normalized scores. When Bayesian/BB25 normalization is enabled, it uses positive-score candidates, dynamic `beta=median(scores)`, adaptive `alpha_eff=alpha/std(scores)` and a sigmoid transform (likelihood-only variant with flat prior) to map scores to `[0, 1]`. Bayesian normalization references: - [https://github.com/instructkr/bb25](https://github.com/instructkr/bb25) - [https://github.com/cognica-io/bayesian-bm25](https://github.com/cognica-io/bayesian-bm25) ## tokenizer ```yaml tokenizer: dict ``` Set tokenization rules. Passes these arguments to the underlying [Tokenization pipeline](../../../pipeline/data/tokenizer#txtai.pipeline.Tokenizer.__init__). ================================================ FILE: docs/embeddings/configuration/vectors.md ================================================ # Vectors The following covers available vector model configuration options. ## path ```yaml path: string ``` Sets the path for a vectors model. When using a transformers/sentence-transformers model, this can be any model on the [Hugging Face Hub](https://huggingface.co/models) or a local file path. Otherwise, it must be a local file path to a word embeddings model. ## method ```yaml method: transformers|sentence-transformers|llama.cpp|litellm|model2vec|external|words ``` Embeddings method to use. If the method is not provided, it is inferred using the `path`. `sentence-transformers`, `llama.cpp`, `litellm`, `model2vec` and `words` require the [vectors](../../../install/#vectors) extras package to be installed. ### transformers Builds embeddings using a transformers model. While this can be any transformers model, it works best with [models trained](https://huggingface.co/models?pipeline_tag=sentence-similarity) to build embeddings. `mean`, `cls` and `late` pooling are supported and automatically inferred from the model. The pooling method can be overwritten by changing the method from `transformers` to `meanpooling`, `clspooling` or `latepooling` respectively. Setting `maxlength` to `True` enables truncating inputs to the `max_seq_length`. Setting `maxlength` to an integer will truncate inputs to that value. When omitted (default), the `maxlength` will be set to either the model or tokenizer maxlength. ### sentence-transformers Same as transformers but loads models with the [sentence-transformers](https://github.com/UKPLab/sentence-transformers) library. ### llama.cpp Builds embeddings using a [llama.cpp](https://github.com/abetlen/llama-cpp-python) model. Supports both local and remote GGUF paths on the HF Hub. ### litellm Builds embeddings using a LiteLLM model. See the [LiteLLM documentation](https://litellm.vercel.app/docs/providers) for the options available with LiteLLM models. ### model2vec Builds embeddings using a [Model2Vec](https://github.com/MinishLab/model2vec) model. Model2Vec is a knowledge-distilled version of a transformers model with static vectors. ### words Builds embeddings using a word embeddings model and static vectors. While Transformers models are preferred in most cases, this method can be useful for low resource and historical languages where there isn't much linguistic data available. #### pca ```yaml pca: int ``` Removes _n_ principal components from generated embeddings. When enabled, a TruncatedSVD model is built to help with dimensionality reduction. After pooling of vectors creates a single embedding, this method is applied. ### external Embeddings are created via an external model or API. Requires setting the [transform](#transform) parameter to a function that translates data into embeddings. #### transform ```yaml transform: function ``` When method is `external`, this function transforms input content into embeddings. The input to this function is a list of data. This method must return either a numpy array or list of numpy arrays. ## gpu ```yaml gpu: boolean|int|string|device ``` Set the target device. Supports true/false, device id, device string and torch device instance. This is automatically derived if omitted. The `sentence-transformers` method supports encoding with multiple GPUs. This can be enabled by setting the gpu parameter to `all`. ## batch ```yaml batch: int ``` Sets the transform batch size. This parameter controls how input streams are chunked and vectorized. ## encodebatch ```yaml encodebatch: int ``` Sets the encode batch size. This parameter controls the underlying vector model batch size. This often corresponds to a GPU batch size, which controls GPU memory usage. ## dimensionality ```yaml dimensionality: int ``` Enables truncation of vectors to this dimensionality. This is only useful for models trained to store more important information in earlier dimensions such as [Matryoshka Representation Learning (MRL)](https://huggingface.co/blog/matryoshka). ## quantize ```yaml quantize: int|boolean ``` Enables scalar vector quantization at the specified precision. Supports 1-bit through 8-bit quantization. Scalar quantization transforms continuous floating point values to discrete unsigned integers. The `faiss`, `pgvector`, `numpy` and `torch` ANN backends support storing these vectors. This parameter supports booleans for backwards compatability. When set to true/false, this flag sets [faiss.quantize](../ann/#faiss). In addition to vector-level quantization, some ANN backends have the ability to quantize vectors at the storage layer. See the [ANN](../ann) configuration options for more. ## instructions ```yaml instructions: query: prefix for queries data: prefix for indexing ``` Instruction-based models use prefixes to modify how embeddings are computed. This is especially useful with asymmetric search, which is when the query and indexed data are of vastly different lengths. In other words, short queries with long documents. `txtai` automatically loads prompts stored in `config_sentence_transformers.json` except if this parameter is set. For some older models such as [E5-base](https://huggingface.co/intfloat/e5-base), instructions still need to be provided via this parameter. ## models ```yaml models: dict ``` Loads and stores vector models in this cache. This is primarily used with subindexes but can be set on any embeddings instance. This prevents the same model from being loaded multiple times when working with multiple embeddings instances. ## tokenize ```yaml tokenize: boolean ``` Enables string tokenization (defaults to false). This method applies tokenization rules that only work with English language text. It's not recommended for use with recent vector models. ## vectors ```yaml vectors: dict ``` Passes these additional parameters to the underlying vector model. ### muvera ```yaml muvera: repetitions: defaults 20 hashes: defaults to 5 projection: defaults 16 ``` Settings to control the size of MUVERA fixed dimensional outputs. Default is 20 * 2^5 * 16 = 10,240 dimensions. ### trust_remote_code ```yaml trust_remote_code: boolean ``` Parameter for trusting the code from Hugging Face models with custom implementations. ================================================ FILE: docs/embeddings/format.md ================================================ # Index format ![format](../images/format.png#only-light) ![format](../images/format-dark.png#only-dark) This section documents the txtai index format. Each component is designed to ensure open access to the underlying data in a programmatic and platform independent way If an underlying library has an index format, that is used. Otherwise, txtai persists content with [MessagePack](https://msgpack.org/index.html) serialization. To learn more about how these components work together, read the [Index Guide](../indexing) and [Query Guide](../query). ## ANN Approximate Nearest Neighbor (ANN) index configuration for storing vector embeddings. | Component | Storage Format | | ------------------------------------------------------------- | ---------------------------------------------------------------------------- | | [Faiss](https://github.com/facebookresearch/faiss) | Local file format provided by library | | [Hnswlib](https://github.com/nmslib/hnswlib) | Local file format provided by library | | [Annoy](https://github.com/spotify/annoy) | Local file format provided by library | | [NumPy](https://github.com/numpy/numpy) | Local NumPy array files via np.save / np.load | | [Postgres via pgvector](https://github.com/pgvector/pgvector) | Vector tables in a Postgres database | ## Core Core embeddings index files. | Component | Storage Format | | ------------------------------------------------------------- | ---------------------------------------------------------------------------- | | [Configuration](https://www.json.org/) | Embeddings index configuration stored as JSON | | [Index Ids](https://msgpack.org/index.html) | Embeddings index ids serialized with MessagePack. Only enabled when when content storage (database) is disabled. | ## Database Databases store metadata, text and binary content. | Component | Storage Format | | ------------------------------------------------------------- | ---------------------------------------------------------------------------- | | [SQLite](https://www.sqlite.org/) | Local database files with SQLite | | [DuckDB](https://github.com/duckdb/duckdb) | Local database files with DuckDB | | [Postgres](https://www.postgresql.org/) | Postgres relational database via [SQLAlchemy](https://github.com/sqlalchemy/sqlalchemy). Supports additional databases via this library. | ## Graph Graph nodes and edges for an embeddings index | Component | Storage Format | | ------------------------------------------------------------- | ----------------------------------------------------------------------------- | | [NetworkX](https://github.com/networkx/networkx) | Nodes and edges exported to local file serialized with MessagePack | | [Postgres](https://github.com/aplbrain/grand) | Nodes and edges stored in a Postgres database. Supports additional databases. | ## Scoring Sparse/keyword indexing | Component | Storage Format | | ------------------------------------------------------------- | ----------------------------------------------------------------------------- | | [Local index](https://www.sqlite.org/) | Metadata serialized with MessagePack. Terms stored in SQLite. | | [Postgres](https://www.postgresql.org/docs/current/textsearch.html) | Text indexed with Postgres Full Text Search (FTS) | ================================================ FILE: docs/embeddings/index.md ================================================ # Embeddings ![embeddings](../images/embeddings.png#only-light) ![embeddings](../images/embeddings-dark.png#only-dark) Embeddings databases are the engine that delivers semantic search. Data is transformed into embeddings vectors where similar concepts will produce similar vectors. Indexes both large and small are built with these vectors. The indexes are used to find results that have the same meaning, not necessarily the same keywords. The following code snippet shows how to build and search an embeddings index. ```python from txtai import Embeddings # Create embeddings model, backed by sentence-transformers & transformers embeddings = Embeddings(path="sentence-transformers/nli-mpnet-base-v2") data = [ "US tops 5 million confirmed virus cases", "Canada's last fully intact ice shelf has suddenly collapsed, " + "forming a Manhattan-sized iceberg", "Beijing mobilises invasion craft along coast as Taiwan tensions escalate", "The National Park Service warns against sacrificing slower friends " + "in a bear attack", "Maine man wins $1M from $25 lottery ticket", "Make huge profits without work, earn up to $100,000 a day" ] # Index the list of text embeddings.index(data) print(f"{'Query':20} Best Match") print("-" * 50) # Run an embeddings search for each query for query in ("feel good story", "climate change", "public health story", "war", "wildlife", "asia", "lucky", "dishonest junk"): # Extract uid of first result # search result format: (uid, score) uid = embeddings.search(query, 1)[0][0] # Print text print(f"{query:20} {data[uid]}") ``` ## Build An embeddings instance is [configuration-driven](configuration) based on what is passed in the constructor. Vectors are stored with the option to also [store content](configuration/database#content). Content storage enables additional filtering and data retrieval options. The example above sets a specific embeddings vector model via the [path](configuration/vectors/#path) parameter. An embeddings instance with no configuration can also be created. ```python embeddings = Embeddings() ``` In this case, when loading and searching for data, the [default transformers vector model](https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2) is used to vectorize data. See the [model guide](../models) for current model recommentations. ## Index After creating a new embeddings instance, the next step is adding data to it. ```python embeddings.index(rows) ``` The index method takes an iterable and supports the following formats for each element. - `(id, data, tags)` - default processing format | Element | Description | | ----------- | ------------------------------------------------------------- | | id | unique record id | | data | input data to index, can be text, a dictionary or object | | tags | optional tags string, used to mark/label data as it's indexed | - `(id, data)` Same as above but without tags. - `data` Single element to index. In this case, unique id's will automatically be generated. Note that for generated id's, [upsert](methods/#txtai.embeddings.base.Embeddings.upsert) and [delete](methods/#txtai.embeddings.base.Embeddings.delete) calls require a separate search to get the target ids. When the data field is a dictionary, text is passed via the `text` key, binary objects via the `object` key. Note that [content](configuration/database#content) must be enabled to store metadata and [objects](configuration/database#objects) to store binary object data. The `id` and `tags` keys will be extracted, if provided. The input iterable can be a list or generator. [Generators](https://wiki.python.org/moin/Generators) help with indexing very large datasets as only portions of the data is in memory at any given time. More information on indexing can be found in the [index guide](indexing). ## Search Once data is indexed, it is ready for search. ```python embeddings.search(query, limit) ``` The search method takes two parameters, the query and query limit. The results format is different based on whether [content](configuration/database#content) is stored or not. - List of `(id, score)` when content is _not_ stored - List of `{**query columns}` when content is stored Both natural language and SQL queries are supported. More information can be found in the [query guide](query). ## Resource management Embeddings databases are context managers. The following blocks automatically [close](methods/#txtai.embeddings.base.Embeddings.close) and free resources upon completion. ```python # Create a new Embeddings database, index data and save with Embeddings() as embeddings: embeddings.index(rows) embeddings.save(path) # Search a saved Embeddings database with Embeddings().load(path) as embeddings: embeddings.search(query) ``` While calling `close` isn't always necessary (resources will be garbage collected), it's best to free shared resources like database connections as soon as they aren't needed. ## More examples See [this link](../examples/#semantic-search) for a full list of embeddings examples. ================================================ FILE: docs/embeddings/indexing.md ================================================ # Index guide ![indexing](../images/indexing.png#only-light) ![indexing](../images/indexing-dark.png#only-dark) This section gives an in-depth overview on how to index data with txtai. We'll cover vectorization, indexing/updating/deleting data and the various components of an embeddings database. ## Vectorization The most compute intensive step in building an index is vectorization. The [path](../configuration/vectors#path) parameter sets the path to the vector model. There is logic to automatically detect the vector model [method](../configuration/vectors#method) but it can also be set directly. The [batch](../configuration/vectors#batch) and [encodebatch](../configuration/vectors#encodebatch) parameters control the vectorization process. Larger values for `batch` will pass larger batches to the vectorization method. Larger values for `encodebatch` will pass larger batches for each vector encode call. In the case of GPU vector models, larger values will consume more GPU memory. Data is buffered to temporary storage during indexing as embeddings vectors can be quite large (for example 768 dimensions of float32 is 768 * 4 = 3072 bytes per vector). Once vectorization is complete, a mmapped array is created with all vectors for [Approximate Nearest Neighbor (ANN)](../configuration/vectors#backend) indexing. The terms `ANN` and `dense vector index` are used interchangeably throughout txtai's documentation. ## Setting a backend As mentioned above, computed vectors are stored in an ANN. There are various index [backends](../configuration/ann#backend) that can be configured. Faiss is the default backend. ## Content storage Embeddings indexes can optionally [store content](../configuration/database#content). When this is enabled, the input content is saved in a database alongside the computed vectors. This enables filtering on additional fields and content retrieval. The columns used for text, object and JSON data storage are set via [column configuration](../configuration/general#columns). ## Index vs Upsert Data is loaded into an index with either an [index](../methods#txtai.embeddings.base.Embeddings.index) or [upsert](../methods#txtai.embeddings.base.Embeddings.upsert) call. ```python embeddings.index([(uid, text, None) for uid, text in enumerate(data)]) embeddings.upsert([(uid, text, None) for uid, text in enumerate(data)]) ``` The `index` call will build a brand new index replacing an existing one. `upsert` will insert or update records. `upsert` ops do _not_ require a full index rebuild. ## Save Indexes can be stored in a directory using the [save](../methods/#txtai.embeddings.base.Embeddings.save) method. ```python embeddings.save("/path/to/save") ``` Compressed indexes are also supported. ```python embeddings.save("/path/to/save/index.tar.gz") ``` In addition to saving indexes locally, they can also be persisted to [cloud storage](../configuration/cloud). ```python embeddings.save("/path/to/save/index.tar.gz", cloud={...}) ``` This is especially useful when running in a serverless context or otherwise running on temporary compute. Cloud storage is only supported with compressed indexes. Embeddings indexes can be restored using the [load](../methods/#txtai.embeddings.base.Embeddings.load) method. ```python embeddings.load("/path/to/load") ``` ## Delete Content can be removed from the index with the [delete](../methods#txtai.embeddings.base.Embeddings.delete) method. This method takes a list of ids to delete. ```python embeddings.delete(ids) ``` ## Reindex When [content storage](../configuration/database#content) is enabled, [reindex](../methods#txtai.embeddings.base.Embeddings.reindex) can be called to rebuild the index with new settings. For example, the backend can be switched from faiss to hnsw or the vector model can be updated. This prevents having to go back to the original raw data. ```python embeddings.reindex(path="sentence-transformers/all-MiniLM-L6-v2", backend="hnsw") ``` ## Graph Enabling a [graph network](../configuration/graph) adds a semantic graph at index time as data is being vectorized. Vector embeddings are used to automatically create relationships in the graph. Relationships can also be manually specified at index time. ```python # Manual relationships by id embeddings.index([{"id": "0", "text": "...", "relationships": ["2"]}]) # Manual relationships with additional edge attributes embeddings.index(["id": "0", "text": "...", "relationships": [ {"id": "2", "type": "MEMBER_OF"} ]]) ``` Additionally, graphs can be used for topic modeling. Dimensionality reduction with UMAP combined with HDBSCAN is a popular topic modeling method found in a number of libraries. txtai takes a different approach using community detection algorithms to build topic clusters. This approach has the advantage of only having to vectorize data once. It also has the advantage of better topic precision given there isn't a dimensionality reduction operation (UMAP). Semantic graph examples are shown below. Get a mapping of discovered topics to associated ids. ```python embeddings.graph.topics ``` Show the most central nodes in the index. ```python embeddings.graph.centrality() ``` Graphs are persisted alongside an embeddings index. Each save and load will also save and load the graph. ## Sparse vectors Scoring instances can create a standalone [sparse keyword indexes](../configuration/general#keyword) (BM25, TF-IDF) and [sparse vector indexes](../configuration/general#sparse) (SPLADE). This enables [hybrid](../configuration/general/#hybrid) search when there is an available dense vector index. The terms `sparse vector index`, `keyword index`, `terms index` and `scoring index` are used interchangeably throughout txtai's documentation. See [this link](../../examples/#semantic-search) to learn more. ## Subindexes An embeddings instance can optionally have associated [subindexes](../configuration/general/#indexes), which are also embeddings databases. This enables indexing additional fields, vector models and much more. ## Word vectors When using [word vector backed models](../configuration/vectors#words) with scoring set, a separate call is required before calling `index` as follows: ```python embeddings.score(rows) embeddings.index(rows) ``` Both calls are required to support generator-backed iteration of data with word vectors models. ================================================ FILE: docs/embeddings/methods.md ================================================ # Methods ::: txtai.embeddings.Embeddings options: filters: - "!columns" - "!createann" - "!createcloud" - "!createdatabase" - "!creategraph" - "!createids" - "!createindexes" - "!createscoring" - "!checkarchive" - "!configure" - "!defaultallowed" - "!defaults" - "!initindex" - "!loadquery" - "!loadvectors" ================================================ FILE: docs/embeddings/query.md ================================================ # Query guide ![query](../images/query.png#only-light) ![query](../images/query-dark.png#only-dark) This section covers how to query data with txtai. The simplest way to search for data is building a natural language string with the desired content to find. txtai also supports querying with SQL. We'll cover both methods here. ## Natural language queries In the simplest case, the query is text and the results are index text that is most similar to the query text. ```python embeddings.search("feel good story") embeddings.search("wildlife") ``` The queries above [search](../methods#txtai.embeddings.base.Embeddings.search) the index for similarity matches on `feel good story` and `wildlife`. If content storage is enabled, a list of `{**query columns}` is returned. Otherwise, a list of `(id, score)` tuples are returned. ## SQL txtai supports more complex queries with SQL. This is only supported if [content storage](../configuration/database#content) is enabled. txtai has a translation layer that analyzes input SQL statements and combines similarity results with content stored in a relational database. SQL queries are run through `embeddings.search` like natural language queries but the examples below only show the SQL query for conciseness. ```python embeddings.search("SQL query") ``` ### Similar clause The similar clause is a txtai function that enables similarity searches with SQL. ```sql SELECT id, text, score FROM txtai WHERE similar('feel good story') ``` The similar clause takes the following arguments: ```sql similar("query", "number of candidates", "index", "weights") ``` | Argument | Description | | --------------------- | ---------------------------------------| | query | natural language query to run | | number of candidates | number of candidate results to return | | index | target index name | | weights | hybrid score weights | The txtai query layer joins results from two separate components, a relational store and a similarity index. With a similar clause, a similarity search is run and those ids are fed to the underlying database query. The number of candidates should be larger than the desired number of results when applying additional filter clauses. This ensures that `limit` results are still returned after applying additional filters. If the number of candidates is not specified, it is defaulted as follows: - For a single query filter clause, the default is the query limit - With multiple filtering clauses, the default is 10x the query limit The index name is only applicable when [subindexes](../configuration/general/#indexes) are enabled. This specifies the index to use for the query. Weights sets the hybrid score weights when an index has both a sparse and dense index. ### Dynamic columns Content can be indexed in multiple ways when content storage is enabled. [Remember that input documents](../#index) take the form of `(id, data, tags)` tuples. If data is a string or binary content, it's indexed and searchable with `similar()` clauses. If data is a dictionary, then all fields in the dictionary are stored and available via SQL. The `text` field or [field specified in the index configuration](../configuration/general/#columns) is indexed and searchable with `similar()` clauses. For example: ```python embeddings.index([{"text": "text to index", "flag": True, "actiondate": "2022-01-01"}]) ``` With the above input data, queries can now have more complex filters. ```sql SELECT text, flag, actiondate FROM txtai WHERE similar('query') AND flag = 1 AND actiondate >= '2022-01-01' ``` txtai's query layer automatically detects columns and translates queries into a format that can be understood by the underlying database. Nested dictionaries/JSON is supported and can be escaped with bracket statements. ```python embeddings.index([{"text": "text to index", "parent": {"child element": "abc"}}]) ``` ```sql SELECT text FROM txtai WHERE [parent.child element] = 'abc' ``` Note the bracket statement escaping the nested column with spaces in the name. ### Bind parameters txtai has support for SQL bind parameters. ```python # Query with a bind parameter for similar clause query = "SELECT id, text, score FROM txtai WHERE similar(:x)" results = embeddings.search(query, parameters={"x": "feel good story"}) # Query with a bind parameter for column filter query = "SELECT text, flag, actiondate FROM txtai WHERE flag = :x" results = embeddings.search(query, parameters={"x": 1}) ``` ### Aggregation queries The goal of txtai's query language is to closely support all functions in the underlying database engine. The main challenge is ensuring dynamic columns are properly escaped into the engines native query function. Aggregation query examples. ```sql SELECT count(*) FROM txtai WHERE similar('feel good story') AND score >= 0.15 SELECT max(length(text)) FROM txtai WHERE similar('feel good story') AND score >= 0.15 SELECT count(*), flag FROM txtai GROUP BY flag ORDER BY count(*) DESC ``` ## Binary objects txtai has support for storing and retrieving binary objects. Binary objects can be retrieved as shown in the example below. ```python # Create embeddings index with content and object storage enabled embeddings = Embeddings(content=True, objects=True) # Get an image request = open("demo.gif", "rb") # Insert record embeddings.index([( "txtai", {"text": "txtai executes machine-learning workflows.", "object": request.read()} )]) # Query txtai and get associated object query = "SELECT object FROM txtai WHERE similar('machine learning') LIMIT 1" result = embeddings.search(query)[0]["object"] # Query binary content with a bind parameter query = "SELECT object FROM txtai WHERE similar(:x) LIMIT 1" results = embeddings.search(query, parameters={"x": request.read()}) ``` ## Custom SQL functions Custom, user-defined SQL functions extend selection, filtering and ordering clauses with additional logic. For example, the following snippet defines a function that translates text using a translation pipeline. ```python # Translation pipeline translate = Translation() # Create embeddings index embeddings = Embeddings(path="sentence-transformers/nli-mpnet-base-v2", content=True, functions=[translate]}) # Run a search using a custom SQL function embeddings.search(""" SELECT text, translation(text, 'de', null) 'text (DE)', translation(text, 'es', null) 'text (ES)', translation(text, 'fr', null) 'text (FR)' FROM txtai WHERE similar('feel good story') LIMIT 1 """) ``` ## Expressions Expression shortcuts expand into more complex SQL snippets. This is useful for making SQL queries more concise. Indexing is also available on expressions as a performance improvement. The following example indexes a json extraction field (`filepath`) and the length of each field. ```python # Create embeddings index embeddings = Embeddings( path="sentence-transformers/nli-mpnet-base-v2", content=True, expressions=[ {"name": "filepath", "index": True}, {"name": "textlength", "expression": "length(text)", "index": True} ] ) embeddings.search("SELECT textlength, filepath FROM txtai LIMIT 1") ``` ## Query translation Natural language queries with filters can be converted to txtai-compatible SQL statements with query translation. For example: ```python embeddings.search("feel good story since yesterday") ``` can be converted to a SQL statement with a similar clause and date filter. ```sql select id, text, score from txtai where similar('feel good story') and entry >= date('now', '-1 day') ``` This requires setting a [query translation model](../configuration/database#query). The default query translation model is [t5-small-txtsql](https://huggingface.co/NeuML/t5-small-txtsql) but this can easily be finetuned to handle different use cases. ## Hybrid search When an embeddings database has both a sparse and dense index, both indexes will be queried and the results will be equally weighted unless otherwise specified. ```python embeddings.search("query", weights=0.5) embeddings.search( "SELECT id, text, score FROM txtai WHERE similar('query', 0.5)" ) ``` ## Graph search If an embeddings database has an associated graph network, graph searches can be run. The search syntax below uses [openCypher](https://github.com/opencypher/openCypher). Follow the preceding link to learn more about this syntax. Additionally, standard embeddings searches can be returned as graphs. ```python # Find all paths between id: 0 and id: 5 between 1 and 3 hops away embeddings.graph.search(""" MATCH P=({id: 0})-[*1..3]->({id: 5}) RETURN P """) # Standard embeddings search as graph embeddings.search("query", graph=True) ``` ## Subindexes Subindexes can be queried as follows: ```python # Build index with subindexes embeddings = Embeddings( content=True, defaults=False, indexes={ "keyword": { "keyword": True }, "dense":{ "dense": True } } ) embeddings.index(stream()) # Query with index parameter embeddings.search("query", index="keyword") # Specify with SQL embeddings.search(""" SELECT id, text, score FROM txtai WHERE similar('query', 'keyword') """) ``` ## Combined index architecture txtai has multiple storage and indexing components. Content is stored in an underlying database along with an approximate nearest neighbor (ANN) index, keyword index and graph network. These components combine to deliver similarity search alongside traditional structured search. The ANN index stores ids and vectors for each input element. When a natural language query is run, the query is translated into a vector and a similarity query finds the best matching ids. When a database is added into the mix, an additional step is executed. This step takes those ids and effectively inserts them as part of the underlying database query. The same steps apply with keyword indexes except a term frequency index is used to find the best matching ids. Dynamic columns are supported via the underlying engine. For SQLite, data is stored as JSON and dynamic columns are converted into `json_extract` clauses. Client-server databases are supported via [SQLAlchemy](https://docs.sqlalchemy.org/en/20/dialects/) and dynamic columns are supported provided the underlying engine has [JSON](https://docs.sqlalchemy.org/en/20/core/type_basics.html#sqlalchemy.types.JSON) support. ================================================ FILE: docs/examples.md ================================================ # Examples ![examples](images/examples.png#only-light) ![examples](images/examples-dark.png#only-dark) See below for a comprehensive series of example notebooks and applications covering txtai. ## Semantic Search Build semantic/similarity/vector/neural search applications. | Notebook | Description | | |:----------|:-------------|------:| | [Introducing txtai](https://github.com/neuml/txtai/blob/master/examples/01_Introducing_txtai.ipynb) [▶️](https://www.youtube.com/watch?v=SIezMnVdmMs) | Overview of the functionality provided by txtai | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/01_Introducing_txtai.ipynb) | | [Build an Embeddings index with Hugging Face Datasets](https://github.com/neuml/txtai/blob/master/examples/02_Build_an_Embeddings_index_with_Hugging_Face_Datasets.ipynb) | Index and search Hugging Face Datasets | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/02_Build_an_Embeddings_index_with_Hugging_Face_Datasets.ipynb) | | [Build an Embeddings index from a data source](https://github.com/neuml/txtai/blob/master/examples/03_Build_an_Embeddings_index_from_a_data_source.ipynb) | Index and search a data source with word embeddings | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/03_Build_an_Embeddings_index_from_a_data_source.ipynb) | | [Add semantic search to Elasticsearch](https://github.com/neuml/txtai/blob/master/examples/04_Add_semantic_search_to_Elasticsearch.ipynb) | Add semantic search to existing search systems | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/04_Add_semantic_search_to_Elasticsearch.ipynb) | | [Similarity search with images](https://github.com/neuml/txtai/blob/master/examples/13_Similarity_search_with_images.ipynb) | Embed images and text into the same space for search | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/13_Similarity_search_with_images.ipynb) | | [Custom Embeddings SQL functions](https://github.com/neuml/txtai/blob/master/examples/30_Embeddings_SQL_custom_functions.ipynb) | Add user-defined functions to Embeddings SQL | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/30_Embeddings_SQL_custom_functions.ipynb) | | [Model explainability](https://github.com/neuml/txtai/blob/master/examples/32_Model_explainability.ipynb) | Explainability for semantic search | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/32_Model_explainability.ipynb) | | [Query translation](https://github.com/neuml/txtai/blob/master/examples/33_Query_translation.ipynb) | Domain-specific natural language queries with query translation | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/33_Query_translation.ipynb) | | [Build a QA database](https://github.com/neuml/txtai/blob/master/examples/34_Build_a_QA_database.ipynb) | Question matching with semantic search | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/34_Build_a_QA_database.ipynb) | | [Semantic Graphs](https://github.com/neuml/txtai/blob/master/examples/38_Introducing_the_Semantic_Graph.ipynb) | Explore topics, data connectivity and run network analysis| [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/38_Introducing_the_Semantic_Graph.ipynb) | | [Topic Modeling with BM25](https://github.com/neuml/txtai/blob/master/examples/39_Classic_Topic_Modeling_with_BM25.ipynb) | Topic modeling backed by a BM25 index | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/39_Classic_Topic_Modeling_with_BM25.ipynb) | ## LLM Autonomous agents, retrieval augmented generation (RAG), chat with your data, pipelines and workflows that interface with large language models (LLMs). | Notebook | Description | | |:----------|:-------------|------:| | [Prompt-driven search with LLMs](https://github.com/neuml/txtai/blob/master/examples/42_Prompt_driven_search_with_LLMs.ipynb) | Embeddings-guided and Prompt-driven search with Large Language Models (LLMs) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/42_Prompt_driven_search_with_LLMs.ipynb) | | [Prompt templates and task chains](https://github.com/neuml/txtai/blob/master/examples/44_Prompt_templates_and_task_chains.ipynb) | Build model prompts and connect tasks together with workflows | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/44_Prompt_templates_and_task_chains.ipynb) | | [Build RAG pipelines with txtai](https://github.com/neuml/txtai/blob/master/examples/52_Build_RAG_pipelines_with_txtai.ipynb) [▶️](https://www.youtube.com/watch?v=t_OeAc8NVfQ) | Guide on retrieval augmented generation including how to create citations | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/52_Build_RAG_pipelines_with_txtai.ipynb) | | [Integrate LLM frameworks](https://github.com/neuml/txtai/blob/master/examples/53_Integrate_LLM_Frameworks.ipynb) | Integrate llama.cpp, LiteLLM and custom generation frameworks | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/53_Integrate_LLM_Frameworks.ipynb) | | [Generate knowledge with Semantic Graphs and RAG](https://github.com/neuml/txtai/blob/master/examples/55_Generate_knowledge_with_Semantic_Graphs_and_RAG.ipynb) | Knowledge exploration and discovery with Semantic Graphs and RAG | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/55_Generate_knowledge_with_Semantic_Graphs_and_RAG.ipynb) | | [Build knowledge graphs with LLMs](https://github.com/neuml/txtai/blob/master/examples/57_Build_knowledge_graphs_with_LLM_driven_entity_extraction.ipynb) | Build knowledge graphs with LLM-driven entity extraction | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/57_Build_knowledge_graphs_with_LLM_driven_entity_extraction.ipynb) | | [Advanced RAG with graph path traversal](https://github.com/neuml/txtai/blob/master/examples/58_Advanced_RAG_with_graph_path_traversal.ipynb) | Graph path traversal to collect complex sets of data for advanced RAG | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/58_Advanced_RAG_with_graph_path_traversal.ipynb) | | [Advanced RAG with guided generation](https://github.com/neuml/txtai/blob/master/examples/60_Advanced_RAG_with_guided_generation.ipynb) | Retrieval Augmented and Guided Generation | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/60_Advanced_RAG_with_guided_generation.ipynb) | | [RAG with llama.cpp and external API services](https://github.com/neuml/txtai/blob/master/examples/62_RAG_with_llama_cpp_and_external_API_services.ipynb) | RAG with additional vector and LLM frameworks | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/62_RAG_with_llama_cpp_and_external_API_services.ipynb) | | [How RAG with txtai works](https://github.com/neuml/txtai/blob/master/examples/63_How_RAG_with_txtai_works.ipynb) | Create RAG processes, API services and Docker instances | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/63_How_RAG_with_txtai_works.ipynb) | | [Speech to Speech RAG](https://github.com/neuml/txtai/blob/master/examples/65_Speech_to_Speech_RAG.ipynb) [▶️](https://www.youtube.com/watch?v=tH8QWwkVMKA) | Full cycle speech to speech workflow with RAG | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/65_Speech_to_Speech_RAG.ipynb) | | [Analyzing Hugging Face Posts with Graphs and Agents](https://github.com/neuml/txtai/blob/master/examples/68_Analyzing_Hugging_Face_Posts_with_Graphs_and_Agents.ipynb) | Explore a rich dataset with Graph Analysis and Agents | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/68_Analyzing_Hugging_Face_Posts_with_Graphs_and_Agents.ipynb) | | [Granting autonomy to agents](https://github.com/neuml/txtai/blob/master/examples/69_Granting_autonomy_to_agents.ipynb) | Agents that iteratively solve problems as they see fit | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/69_Granting_autonomy_to_agents.ipynb) | | [Getting started with LLM APIs](https://github.com/neuml/txtai/blob/master/examples/70_Getting_started_with_LLM_APIs.ipynb) | Generate embeddings and run LLMs with OpenAI, Claude, Gemini, Bedrock and more | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/70_Getting_started_with_LLM_APIs.ipynb) | | [Analyzing LinkedIn Company Posts with Graphs and Agents](https://github.com/neuml/txtai/blob/master/examples/71_Analyzing_LinkedIn_Company_Posts_with_Graphs_and_Agents.ipynb) | Exploring how to improve social media engagement with AI | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/71_Analyzing_LinkedIn_Company_Posts_with_Graphs_and_Agents.ipynb) | | [Parsing the stars with txtai](https://github.com/neuml/txtai/blob/master/examples/72_Parsing_the_stars_with_txtai.ipynb) | Explore an astronomical knowledge graph of known stars, planets, galaxies | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/72_Parsing_the_stars_with_txtai.ipynb) | | [Chunking your data for RAG](https://github.com/neuml/txtai/blob/master/examples/73_Chunking_your_data_for_RAG.ipynb) | Extract, chunk and index content for effective retrieval | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/73_Chunking_your_data_for_RAG.ipynb) | | [Medical RAG Research with txtai](https://github.com/neuml/txtai/blob/master/examples/75_Medical_RAG_Research_with_txtai.ipynb) | Analyze PubMed article metadata with RAG | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/75_Medical_RAG_Research_with_txtai.ipynb) | | [GraphRAG with Wikipedia and GPT OSS](https://github.com/neuml/txtai/blob/master/examples/77_GraphRAG_with_Wikipedia_and_GPT_OSS.ipynb) | Deep graph search powered RAG | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/77_GraphRAG_with_Wikipedia_and_GPT_OSS.ipynb) | | [RAG is more than Vector Search](https://github.com/neuml/txtai/blob/master/examples/79_RAG_is_more_than_Vector_Search.ipynb) | Context retrieval via Web, SQL and other sources | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/79_RAG_is_more_than_Vector_Search.ipynb) | | [OpenCode as a txtai LLM](https://github.com/neuml/txtai/blob/master/examples/81_OpenCode_as_a_txtai_LLM.ipynb) | Integrate OpenCode with the txtai ecosystem | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/81_OpenCode_as_a_txtai_LLM.ipynb) | | [Agentic College Search](https://github.com/neuml/txtai/blob/master/examples/82_Agentic_College_Search.ipynb) | Identify a list of strong engineering colleges | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/82_Agentic_College_Search.ipynb) | | [TxtAI got skills](https://github.com/neuml/txtai/blob/master/examples/83_TxtAI_got_skills.ipynb) | Integrate skill.md files with your agent | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/83_TxtAI_got_skills.ipynb) | | [Agent Tools](https://github.com/neuml/txtai/blob/master/examples/84_Agent_Tools.ipynb) [▶️](https://www.youtube.com/watch?v=RDNaFXQy3GQ) | Learn about the txtai agent toolkit | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/84_Agent_Tools.ipynb) | ## Pipelines Transform data with language model backed pipelines. | Notebook | Description | | |:----------|:-------------|------:| | [Extractive QA with txtai](https://github.com/neuml/txtai/blob/master/examples/05_Extractive_QA_with_txtai.ipynb) | Introduction to extractive question-answering with txtai | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/05_Extractive_QA_with_txtai.ipynb) | | [Extractive QA with Elasticsearch](https://github.com/neuml/txtai/blob/master/examples/06_Extractive_QA_with_Elasticsearch.ipynb) | Run extractive question-answering queries with Elasticsearch | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/06_Extractive_QA_with_Elasticsearch.ipynb) | | [Extractive QA to build structured data](https://github.com/neuml/txtai/blob/master/examples/20_Extractive_QA_to_build_structured_data.ipynb) | Build structured datasets using extractive question-answering | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/20_Extractive_QA_to_build_structured_data.ipynb) | | [Apply labels with zero shot classification](https://github.com/neuml/txtai/blob/master/examples/07_Apply_labels_with_zero_shot_classification.ipynb) | Use zero shot learning for labeling, classification and topic modeling | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/07_Apply_labels_with_zero_shot_classification.ipynb) | | [Building abstractive text summaries](https://github.com/neuml/txtai/blob/master/examples/09_Building_abstractive_text_summaries.ipynb) | Run abstractive text summarization | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/09_Building_abstractive_text_summaries.ipynb) | | [Extract text from documents](https://github.com/neuml/txtai/blob/master/examples/10_Extract_text_from_documents.ipynb) | Extract text from PDF, Office, HTML and more | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/10_Extract_text_from_documents.ipynb) | | [Text to speech generation](https://github.com/neuml/txtai/blob/master/examples/40_Text_to_Speech_Generation.ipynb) | Generate speech from text | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/40_Text_to_Speech_Generation.ipynb) | | [Transcribe audio to text](https://github.com/neuml/txtai/blob/master/examples/11_Transcribe_audio_to_text.ipynb) | Convert audio files to text | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/11_Transcribe_audio_to_text.ipynb) | | [Translate text between languages](https://github.com/neuml/txtai/blob/master/examples/12_Translate_text_between_languages.ipynb) | Streamline machine translation and language detection | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/12_Translate_text_between_languages.ipynb) | | [Generate image captions and detect objects](https://github.com/neuml/txtai/blob/master/examples/25_Generate_image_captions_and_detect_objects.ipynb) | Captions and object detection for images | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/25_Generate_image_captions_and_detect_objects.ipynb) | | [Near duplicate image detection](https://github.com/neuml/txtai/blob/master/examples/31_Near_duplicate_image_detection.ipynb) | Identify duplicate and near-duplicate images | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/31_Near_duplicate_image_detection.ipynb) | ## Workflows Efficiently process data at scale. | Notebook | Description | | |:----------|:-------------|------:| | [Run pipeline workflows](https://github.com/neuml/txtai/blob/master/examples/14_Run_pipeline_workflows.ipynb) [▶️](https://www.youtube.com/watch?v=UBMPDCn1gEU) | Simple yet powerful constructs to efficiently process data | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/14_Run_pipeline_workflows.ipynb) | | [Transform tabular data with composable workflows](https://github.com/neuml/txtai/blob/master/examples/22_Transform_tabular_data_with_composable_workflows.ipynb) | Transform, index and search tabular data | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/22_Transform_tabular_data_with_composable_workflows.ipynb) | | [Tensor workflows](https://github.com/neuml/txtai/blob/master/examples/23_Tensor_workflows.ipynb) | Performant processing of large tensor arrays | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/23_Tensor_workflows.ipynb) | | [Entity extraction workflows](https://github.com/neuml/txtai/blob/master/examples/26_Entity_extraction_workflows.ipynb) | Identify entity/label combinations | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/26_Entity_extraction_workflows.ipynb) | | [Workflow Scheduling](https://github.com/neuml/txtai/blob/master/examples/27_Workflow_scheduling.ipynb) | Schedule workflows with cron expressions | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/27_Workflow_scheduling.ipynb) | | [Push notifications with workflows](https://github.com/neuml/txtai/blob/master/examples/28_Push_notifications_with_workflows.ipynb) | Generate and push notifications with workflows | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/28_Push_notifications_with_workflows.ipynb) | | [Pictures are a worth a thousand words](https://github.com/neuml/txtai/blob/master/examples/35_Pictures_are_worth_a_thousand_words.ipynb) | Generate webpage summary images with DALL-E mini | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/35_Pictures_are_worth_a_thousand_words.ipynb) | | [Run txtai with native code](https://github.com/neuml/txtai/blob/master/examples/36_Run_txtai_in_native_code.ipynb) | Execute workflows in native code with the Python C API | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/36_Run_txtai_in_native_code.ipynb) | | [Generative Audio](https://github.com/neuml/txtai/blob/master/examples/66_Generative_Audio.ipynb) | Storytelling with generative audio workflows | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/66_Generative_Audio.ipynb) | ## Model Training Train, distill, fine-tune and export models. | Notebook | Description | | |:----------|:-------------|------:| | [Train a text labeler](https://github.com/neuml/txtai/blob/master/examples/16_Train_a_text_labeler.ipynb) | Build text sequence classification models | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/16_Train_a_text_labeler.ipynb) | | [Train without labels](https://github.com/neuml/txtai/blob/master/examples/17_Train_without_labels.ipynb) | Use zero-shot classifiers to train new models | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/17_Train_without_labels.ipynb) | | [Train a QA model](https://github.com/neuml/txtai/blob/master/examples/19_Train_a_QA_model.ipynb) | Build and fine-tune question-answering models | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/19_Train_a_QA_model.ipynb) | | [Train a language model from scratch](https://github.com/neuml/txtai/blob/master/examples/41_Train_a_language_model_from_scratch.ipynb) | Build new language models | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/41_Train_a_language_model_from_scratch.ipynb) | | [Distilling Knowledge into Tiny LLMs](https://github.com/neuml/txtai/blob/master/examples/80_Distilling_Knowledge_into_Tiny_LLMs.ipynb) [▶️](https://www.youtube.com/watch?v=Ol560ktgkf0) | Finetune tiny LLMs to enable inference using less resources | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/80_Distilling_Knowledge_into_Tiny_LLMs.ipynb) | | [Export and run models with ONNX](https://github.com/neuml/txtai/blob/master/examples/18_Export_and_run_models_with_ONNX.ipynb) | Export models with ONNX, run natively in JavaScript, Java and Rust | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/18_Export_and_run_models_with_ONNX.ipynb) | | [Export and run other machine learning models](https://github.com/neuml/txtai/blob/master/examples/21_Export_and_run_other_machine_learning_models.ipynb) | Export and run models from scikit-learn, PyTorch and more | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/21_Export_and_run_other_machine_learning_models.ipynb) | ## API Run distributed txtai, integrate with the API and cloud endpoints. | Notebook | Description | | |:----------|:-------------|------:| | [API Gallery](https://github.com/neuml/txtai/blob/master/examples/08_API_Gallery.ipynb) | Using txtai in JavaScript, Java, Rust and Go | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/08_API_Gallery.ipynb) | | [Distributed embeddings cluster](https://github.com/neuml/txtai/blob/master/examples/15_Distributed_embeddings_cluster.ipynb) | Distribute an embeddings index across multiple data nodes | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/15_Distributed_embeddings_cluster.ipynb) | | [Embeddings in the Cloud](https://github.com/neuml/txtai/blob/master/examples/43_Embeddings_in_the_Cloud.ipynb) | Load and use an embeddings index from the Hugging Face Hub | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/43_Embeddings_in_the_Cloud.ipynb) | | [Custom API Endpoints](https://github.com/neuml/txtai/blob/master/examples/51_Custom_API_Endpoints.ipynb) | Extend the API with custom endpoints | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/51_Custom_API_Endpoints.ipynb) | | [API Authorization and Authentication](https://github.com/neuml/txtai/blob/master/examples/54_API_Authorization_and_Authentication.ipynb) | Add authorization, authentication and middleware dependencies to the API | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/54_API_Authorization_and_Authentication.ipynb) | | [OpenAI Compatible API](https://github.com/neuml/txtai/blob/master/examples/74_OpenAI_Compatible_API.ipynb) | Connect to txtai with a standard OpenAI client library | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/74_OpenAI_Compatible_API.ipynb) | ## Architecture Project architecture, data formats, external integrations, scale to production, benchmarks, and performance. | Notebook | Description | | |:----------|:-------------|------:| | [Anatomy of a txtai index](https://github.com/neuml/txtai/blob/master/examples/29_Anatomy_of_a_txtai_index.ipynb) | Deep dive into the file formats behind a txtai embeddings index | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/29_Anatomy_of_a_txtai_index.ipynb) | | [Embeddings components](https://github.com/neuml/txtai/blob/master/examples/37_Embeddings_index_components.ipynb) | Composable search with vector, SQL and scoring components | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/37_Embeddings_index_components.ipynb) | | [Customize your own embeddings database](https://github.com/neuml/txtai/blob/master/examples/45_Customize_your_own_embeddings_database.ipynb) | Ways to combine vector indexes with relational databases | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/45_Customize_your_own_embeddings_database.ipynb) | | [Building an efficient sparse keyword index in Python](https://github.com/neuml/txtai/blob/master/examples/47_Building_an_efficient_sparse_keyword_index_in_Python.ipynb) | Fast and accurate sparse keyword indexing | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/47_Building_an_efficient_sparse_keyword_index_in_Python.ipynb) | | [Benefits of hybrid search](https://github.com/neuml/txtai/blob/master/examples/48_Benefits_of_hybrid_search.ipynb) | Improve accuracy with a combination of semantic and keyword search | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/48_Benefits_of_hybrid_search.ipynb) | | [External database integration](https://github.com/neuml/txtai/blob/master/examples/49_External_database_integration.ipynb) | Store metadata in PostgreSQL, MariaDB, MySQL and more | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/49_External_database_integration.ipynb) | | [All about vector quantization](https://github.com/neuml/txtai/blob/master/examples/50_All_about_vector_quantization.ipynb) | Benchmarking scalar and product quantization methods | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/50_All_about_vector_quantization.ipynb) | | [External vectorization](https://github.com/neuml/txtai/blob/master/examples/56_External_vectorization.ipynb) | Vectorization with precomputed embeddings datasets and APIs | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/56_External_vectorization.ipynb) | | [Integrate txtai with Postgres](https://github.com/neuml/txtai/blob/master/examples/61_Integrate_txtai_with_Postgres.ipynb) | Persist content, vectors and graph data in Postgres | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/61_Integrate_txtai_with_Postgres.ipynb) | | [Embeddings index format for open data access](https://github.com/neuml/txtai/blob/master/examples/64_Embeddings_index_format_for_open_data_access.ipynb) | Platform and programming language independent data storage with txtai | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/64_Embeddings_index_format_for_open_data_access.ipynb) | | [Accessing Low Level Vector APIs](https://github.com/neuml/txtai/blob/master/examples/78_Accessing_Low_Level_Vector_APIs.ipynb) | Build a vector database using txtai's low-level APIs | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/78_Accessing_Low_Level_Vector_APIs.ipynb) | ## Releases New functionality added in major releases. | Notebook | Description | | |:----------|:-------------|------:| | [What's new in txtai 4.0](https://github.com/neuml/txtai/blob/master/examples/24_Whats_new_in_txtai_4_0.ipynb) | Content storage, SQL, object storage, reindex and compressed indexes | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/24_Whats_new_in_txtai_4_0.ipynb) | | [What's new in txtai 6.0](https://github.com/neuml/txtai/blob/master/examples/46_Whats_new_in_txtai_6_0.ipynb) | Sparse, hybrid and subindexes for embeddings, LLM improvements | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/46_Whats_new_in_txtai_6_0.ipynb) | | [What's new in txtai 7.0](https://github.com/neuml/txtai/blob/master/examples/59_Whats_new_in_txtai_7_0.ipynb) | Semantic graph 2.0, LoRA/QLoRA training and binary API support | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/59_Whats_new_in_txtai_7_0.ipynb) | | [What's new in txtai 8.0](https://github.com/neuml/txtai/blob/master/examples/67_Whats_new_in_txtai_8_0.ipynb) | Agents with txtai | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/67_Whats_new_in_txtai_8_0.ipynb) | | [What's new in txtai 9.0](https://github.com/neuml/txtai/blob/master/examples/76_Whats_new_in_txtai_9_0.ipynb) | Learned sparse vectors, late interaction models and rerankers | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/76_Whats_new_in_txtai_9_0.ipynb) | ## Applications Series of example applications with txtai. Links to hosted versions on [Hugging Face Spaces](https://hf.co/spaces) are also provided, when available. | Application | Description | | |:-------------|:-------------|------:| | [Basic similarity search](https://github.com/neuml/txtai/blob/master/examples/similarity.py) | Basic similarity search example. Data from the original txtai demo. |[🤗](https://hf.co/spaces/NeuML/similarity)| | [Baseball stats](https://github.com/neuml/txtai/blob/master/examples/baseball.py) | Match historical baseball player stats using vector search. |[🤗](https://hf.co/spaces/NeuML/baseball)| | [Benchmarks](https://github.com/neuml/txtai/blob/master/examples/benchmarks.py) | Calculate performance metrics for the BEIR datasets. |*Local run only*| | [Book search](https://github.com/neuml/txtai/blob/master/examples/books.py) | Book similarity search application. Index book descriptions and query using natural language statements. |*Local run only*| | [Image search](https://github.com/neuml/txtai/blob/master/examples/images.py) | Image similarity search application. Index a directory of images and run searches to identify images similar to the input query. |[🤗](https://hf.co/spaces/NeuML/imagesearch)| | [Retrieval Augmented Generation](https://github.com/neuml/rag/) | RAG with txtai embeddings databases. Ask questions and get answers from LLMs bound by a context. |*Local run only*| | [Summarize an article](https://github.com/neuml/txtai/blob/master/examples/article.py) | Summarize an article. Workflow that extracts text from a webpage and builds a summary. |[🤗](https://hf.co/spaces/NeuML/articlesummary)| | [Wiki search](https://github.com/neuml/txtai/blob/master/examples/wiki.py) | Wikipedia search application. Queries Wikipedia API and summarizes the top result. |[🤗](https://hf.co/spaces/NeuML/wikisummary)| | [Workflow builder](https://github.com/neuml/txtai/blob/master/examples/workflows.py) | Build and execute txtai workflows. Connect summarization, text extraction, transcription, translation and similarity search pipelines together to run unified workflows. |[🤗](https://hf.co/spaces/NeuML/txtai)| ================================================ FILE: docs/faq.md ================================================ # FAQ ![faq](images/faq.png) Below is a list of frequently asked questions and common issues encountered. ## Questions ---------- __Question__ What models are recommended? __Answer__ See the [model guide](../models). ---------- __Question__ What is the best way to track the progress of an `embeddings.index` call? __Answer__ Wrap the list or generator passed to the index call with tqdm. See [#478](https://github.com/neuml/txtai/issues/478) for more. ---------- __Question__ What is the best way to analyze and debug a txtai process? __Answer__ See the [observability](../observability) section for more on how this can be enabled in txtai processes. txtai also has a console application. [This article](https://medium.com/neuml/insights-from-the-txtai-console-d307c28e149e) has more details. ---------- __Question__ How can models be externally loaded and passed to embeddings and pipelines? __Answer__ Embeddings example. ```python from transformers import AutoModel, AutoTokenizer from txtai import Embeddings # Load model externally model = AutoModel.from_pretrained("sentence-transformers/all-MiniLM-L6-v2") tokenizer = AutoTokenizer.from_pretrained("sentence-transformers/all-MiniLM-L6-v2") # Pass to embeddings instance embeddings = Embeddings(path=model, tokenizer=tokenizer) ``` LLM pipeline example. ```python import torch from transformers import AutoModelForCausalLM, AutoTokenizer from txtai import LLM # Load Qwen3 0.6B path = "Qwen/Qwen3-0.6B" model = AutoModelForCausalLM.from_pretrained( path, dtype=torch.bfloat16, ) tokenizer = AutoTokenizer.from_pretrained(path) llm = LLM((model, tokenizer)) ``` ## Common issues ---------- __Issue__ Embeddings query errors like this: ``` SQLError: no such function: json_extract ``` __Solution__ Upgrade Python version as it doesn't have SQLite support for `json_extract` ---------- __Issue__ Segmentation faults and similar errors on macOS __Solution__ Set the following environment parameters. - Disable OpenMP multithreading via `export OMP_NUM_THREADS=1` - Workaround `OMP: Error #15` errors via `export KMP_DUPLICATE_LIB_OK=TRUE` - Disable PyTorch MPS device via `export PYTORCH_MPS_DISABLE=1` - Disable llama.cpp metal via `export LLAMA_NO_METAL=1` For more details, refer to [this issue on GitHub](https://github.com/kyamagu/faiss-wheels/issues/73). ---------- __Issue__ Error running SQLite ANN on macOS ``` AttributeError: 'sqlite3.Connection' object has no attribute 'enable_load_extension' ``` __Solution__ See [this note](https://alexgarcia.xyz/sqlite-vec/python.html#macos-blocks-sqlite-extensions-by-default) for options on how to fix this. ---------- __Issue__ `ContextualVersionConflict` and/or package METADATA exception while running one of the [examples](../examples) notebooks on Google Colab __Solution__ Restart the kernel. See issue [#409](https://github.com/neuml/txtai/issues/409) for more on this issue. ---------- __Issue__ Error installing optional/extra dependencies such as `pipeline` __Solution__ The default MacOS shell (zsh) and Windows PowerShell require escaping square brackets ``` pip install 'txtai[pipeline]' ``` ================================================ FILE: docs/further.md ================================================ # Further reading ![further](images/further.png#only-light) ![further](images/further-dark.png#only-dark) - [Introducing txtai, the all-in-one AI framework](https://medium.com/neuml/introducing-txtai-the-all-in-one-ai-framework-0660ecfc39d7) - [Tutorial series on Hashnode](https://neuml.hashnode.dev/series/txtai-tutorial) | [dev.to](https://dev.to/neuml/tutorial-series-on-txtai-ibg) - [What's new in txtai 9.0](https://medium.com/neuml/whats-new-in-txtai-9-0-d522bb150afa) | [8.0](https://medium.com/neuml/whats-new-in-txtai-8-0-2d7d0ab4506b) | [7.0](https://medium.com/neuml/whats-new-in-txtai-7-0-855ad6a55440) | [6.0](https://medium.com/neuml/whats-new-in-txtai-6-0-7d93eeedf804) | [5.0](https://medium.com/neuml/whats-new-in-txtai-5-0-e5c75a13b101) | [4.0](https://medium.com/neuml/whats-new-in-txtai-4-0-bbc3a65c3d1c) - [Getting started with semantic search](https://medium.com/neuml/getting-started-with-semantic-search-a9fd9d8a48cf) | [workflows](https://medium.com/neuml/getting-started-with-semantic-workflows-2fefda6165d9) | [rag](https://medium.com/neuml/getting-started-with-rag-9a0cca75f748) - [Running txtai at scale](https://medium.com/neuml/running-at-scale-with-txtai-71196cdd99f9) - [Vector search & RAG Landscape: A review with txtai](https://medium.com/neuml/vector-search-rag-landscape-a-review-with-txtai-a7f37ad0e187) ================================================ FILE: docs/images/agent.excalidraw ================================================ { "type": "excalidraw", "version": 2, "source": "https://excalidraw.com", "elements": [ { "type": "text", "version": 1781, "versionNonce": 1504167660, "isDeleted": false, "id": "yF7ftUwr3mnAwOlC59RMi", "fillStyle": "solid", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 0, "opacity": 100, "angle": 0, "x": 1746.949005220418, "y": 32.708022189384565, "strokeColor": "#7950f2", "backgroundColor": "transparent", "width": 212.06666666666666, "height": 25.420547029684837, "seed": 376471794, "groupIds": [], "roundness": null, "boundElements": [], "updated": 1731090696250, "link": null, "locked": false, "fontSize": 21.18378919140403, "fontFamily": 1, "text": "Tools and Functions", "textAlign": "center", "verticalAlign": "top", "containerId": null, "originalText": "Tools and Functions", "index": "a1", "frameId": null, "autoResize": true, "lineHeight": 1.2 }, { "type": "line", "version": 5394, "versionNonce": 1005349228, "isDeleted": false, "id": "aEfZn91vFxUsNoiS_231h", "fillStyle": "cross-hatch", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 0, "opacity": 100, "angle": 0, "x": 1818.1429109642538, "y": -209.6883063766287, "strokeColor": "#ff7043", "backgroundColor": "#fa5252", "width": 62.46534181999921, "height": 80.62144046844392, "seed": 283082348, "groupIds": [ "YlFNaYy8uxiVBIOqH078l" ], "roundness": { "type": 2 }, "boundElements": [], "updated": 1731090626713, "link": null, "locked": false, "startBinding": null, "endBinding": null, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": null, "points": [ [ 0, 0 ], [ 0.20597861858336483, 60.93324773178684 ], [ 0.009639315807164919, 67.87023338578067 ], [ 3.2171032549927925, 70.86761910642136 ], [ 14.386936381131346, 73.4043779800946 ], [ 33.26711841453209, 74.19428679833626 ], [ 51.30588383530208, 72.93308166143258 ], [ 60.89001271988386, 69.91693238712155 ], [ 62.241724409786244, 67.37428690837503 ], [ 62.431567423392536, 61.78937034530568 ], [ 62.2825627324806, 5.111959380832905 ], [ 61.946658330498884, -0.24301167156979483 ], [ 57.93575321694209, -3.2359402189020954 ], [ 49.489643010028544, -4.969286064540119 ], [ 30.242099023560478, -6.427153670107652 ], [ 14.810450273983571, -5.557823934240758 ], [ 2.6735633343171465, -2.609163541657122 ], [ -0.033774396606671454, -0.03661258119549431 ], [ 0, 0 ] ], "index": "a3", "frameId": null }, { "type": "line", "version": 3136, "versionNonce": 1274305516, "isDeleted": false, "id": "JSkCiTP6nfh3_0WXfktLt", "fillStyle": "cross-hatch", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 0, "opacity": 100, "angle": 0, "x": 1821.286624574959, "y": -163.7331019656284, "strokeColor": "#ff7043", "backgroundColor": "transparent", "width": 57.79381935156673, "height": 6.618955975035558, "seed": 465359084, "groupIds": [ "YlFNaYy8uxiVBIOqH078l" ], "roundness": { "type": 2 }, "boundElements": [], "updated": 1731090626713, "link": null, "locked": false, "startBinding": null, "endBinding": null, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": null, "points": [ [ 0, 0 ], [ 1.6474005758537682, 2.765528506120729 ], [ 8.751958185556267, 5.085781507915724 ], [ 18.205684820748022, 6.490912696060695 ], [ 33.01612829072934, 6.618955975035558 ], [ 50.29940758591302, 5.724372353349996 ], [ 57.79381935156673, 3.769506661359109 ], [ 56.89849492771129, 4.013173323127235 ] ], "index": "a4", "frameId": null }, { "type": "line", "version": 3222, "versionNonce": 727318124, "isDeleted": false, "id": "XbZa4_v84aJkwUbHNYtWo", "fillStyle": "cross-hatch", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 0, "opacity": 100, "angle": 0, "x": 1817.0033279434713, "y": -183.2009370605118, "strokeColor": "#ff7043", "backgroundColor": "transparent", "width": 62.53013535500473, "height": 6.9378137498265104, "seed": 877384556, "groupIds": [ "YlFNaYy8uxiVBIOqH078l" ], "roundness": { "type": 2 }, "boundElements": [], "updated": 1731090626713, "link": null, "locked": false, "startBinding": null, "endBinding": null, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": null, "points": [ [ 0, 0 ], [ 1.647400575853768, 2.765528506120729 ], [ 8.751958185556267, 5.085781507915719 ], [ 18.20568482074802, 6.490912696060696 ], [ 33.016128290729334, 6.618955975035559 ], [ 50.29940758591301, 5.7243723533499935 ], [ 60.393038010317625, 2.4698973319836566 ], [ 62.53013535500473, -0.3188577747909514 ] ], "index": "a5", "frameId": null }, { "type": "ellipse", "version": 6241, "versionNonce": 48713964, "isDeleted": false, "id": "WgZ_UNGw6chJCNh_YqBCg", "fillStyle": "cross-hatch", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 0, "opacity": 100, "angle": 0, "x": 1818.1477872422097, "y": -216.22048622222513, "strokeColor": "#ff7043", "backgroundColor": "#ff7043", "width": 62.06467888763865, "height": 12.55211437327341, "seed": 416712172, "groupIds": [ "YlFNaYy8uxiVBIOqH078l" ], "roundness": null, "boundElements": [], "updated": 1731090626713, "link": null, "locked": false, "index": "a6", "frameId": null }, { "type": "ellipse", "version": 1583, "versionNonce": 1378243436, "isDeleted": false, "id": "5zDzUgYAzqCuHz5y0fEbO", "fillStyle": "cross-hatch", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 0, "opacity": 100, "angle": 0, "x": 1867.2486508066536, "y": -195.2355116320403, "strokeColor": "#ff7043", "backgroundColor": "#ff7043", "width": 9.096173632176203, "height": 9.872132933859838, "seed": 778496108, "groupIds": [ "YlFNaYy8uxiVBIOqH078l" ], "roundness": null, "boundElements": [], "updated": 1731090626713, "link": null, "locked": false, "index": "a7", "frameId": null }, { "type": "ellipse", "version": 1632, "versionNonce": 392234476, "isDeleted": false, "id": "VtzlusDzgSLcDDBt5hG2x", "fillStyle": "cross-hatch", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 0, "opacity": 100, "angle": 0, "x": 1867.2486508066536, "y": -173.56619840583764, "strokeColor": "#ff7043", "backgroundColor": "#ff7043", "width": 9.096173632176203, "height": 9.872132933859838, "seed": 684129004, "groupIds": [ "YlFNaYy8uxiVBIOqH078l" ], "roundness": null, "boundElements": [], "updated": 1731090626713, "link": null, "locked": false, "index": "a8", "frameId": null }, { "type": "ellipse", "version": 1685, "versionNonce": 2007066732, "isDeleted": false, "id": "B_0DhVcOs-USQL4XV0QRo", "fillStyle": "cross-hatch", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 0, "opacity": 100, "angle": 0, "x": 1867.2486508066536, "y": -150.01453414636433, "strokeColor": "#ff7043", "backgroundColor": "#ff7043", "width": 9.096173632176203, "height": 9.872132933859838, "seed": 847297900, "groupIds": [ "YlFNaYy8uxiVBIOqH078l" ], "roundness": null, "boundElements": [], "updated": 1731090626713, "link": null, "locked": false, "index": "a9", "frameId": null }, { "type": "text", "version": 1457, "versionNonce": 1722907372, "isDeleted": false, "id": "4zMdwTCbTrrgg-ITOMXC0", "fillStyle": "hachure", "strokeWidth": 4, "strokeStyle": "solid", "roughness": 0, "opacity": 100, "angle": 0, "x": 1779.9259357205947, "y": -120.42699264611412, "strokeColor": "#ff7043", "backgroundColor": "transparent", "width": 136.73556714551438, "height": 25.535793939087647, "seed": 1848588206, "groupIds": [], "roundness": null, "boundElements": [ { "id": "leDc6Y5qdU0OH3j90C072", "type": "arrow" } ], "updated": 1731090626713, "link": null, "locked": false, "fontSize": 21.27982828257304, "fontFamily": 1, "text": "Data Stores", "textAlign": "center", "verticalAlign": "top", "containerId": null, "originalText": "Data Stores", "index": "aA", "frameId": null, "autoResize": true, "lineHeight": 1.2 }, { "type": "arrow", "version": 3697, "versionNonce": 368624620, "isDeleted": false, "id": "iPTphyfJkvqyMptaZS0ge", "fillStyle": "hachure", "strokeWidth": 4, "strokeStyle": "solid", "roughness": 0, "opacity": 100, "angle": 0, "x": 1814.6382080826127, "y": -173.3228158450134, "strokeColor": "#ff7043", "backgroundColor": "#7950f2", "width": 182.4871709755887, "height": 81.80578637477413, "seed": 1965679388, "groupIds": [], "roundness": { "type": 2 }, "boundElements": [], "updated": 1731090676915, "link": null, "locked": false, "startBinding": null, "endBinding": { "elementId": "V54vdkZmQeI13AEahvNPV", "focus": 0.1525458549344351, "gap": 4.034063550289034, "fixedPoint": null }, "lastCommittedPoint": null, "startArrowhead": "arrow", "endArrowhead": "arrow", "points": [ [ 0, 0 ], [ -182.4871709755887, 81.80578637477413 ] ], "index": "aB", "frameId": null }, { "type": "text", "version": 1522, "versionNonce": 2052237012, "isDeleted": false, "id": "YCEwpQiKAjRS3zIG3_E8s", "fillStyle": "hachure", "strokeWidth": 2, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 1555.5109174458237, "y": -43.50932007711572, "strokeColor": "#03a9f4", "backgroundColor": "#03a9f4", "width": 38.06666666666666, "height": 23.223174527172613, "seed": 676249636, "groupIds": [], "roundness": null, "boundElements": [], "updated": 1731090662600, "link": null, "locked": false, "fontSize": 19.35264543931051, "fontFamily": 1, "text": "LLM", "textAlign": "left", "verticalAlign": "top", "containerId": null, "originalText": "LLM", "index": "aC", "frameId": null, "autoResize": true, "lineHeight": 1.2 }, { "type": "arrow", "version": 5294, "versionNonce": 1355936620, "isDeleted": false, "id": "leDc6Y5qdU0OH3j90C072", "fillStyle": "hachure", "strokeWidth": 4, "strokeStyle": "solid", "roughness": 0, "opacity": 100, "angle": 0, "x": 1816.227500619918, "y": -9.184012279360502, "strokeColor": "#7950f2", "backgroundColor": "#7950f2", "width": 182.7150865033991, "height": 52.05945128870996, "seed": 241264412, "groupIds": [], "roundness": { "type": 2 }, "boundElements": [], "updated": 1731090696850, "link": null, "locked": false, "startBinding": { "elementId": "qzVAVN6sXRxG16958WgBM", "focus": -0.358596672811187, "gap": 2.5501158014194516, "fixedPoint": null }, "endBinding": { "elementId": "V54vdkZmQeI13AEahvNPV", "focus": 0.704166265794774, "gap": 5.395440559783879, "fixedPoint": null }, "lastCommittedPoint": null, "startArrowhead": "arrow", "endArrowhead": "arrow", "points": [ [ 0, 0 ], [ -182.7150865033991, -52.05945128870996 ] ], "index": "aD", "frameId": null }, { "type": "line", "version": 2515, "versionNonce": 679390292, "isDeleted": false, "id": "aFBJSnHUVo8ZDTb0uAkQ2", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 1310.644313207792, "y": -121.5611895708457, "strokeColor": "#ffeb3b", "backgroundColor": "#ffeb3b", "width": 99.33023905537627, "height": 88.43543879744573, "seed": 1963890941, "groupIds": [], "roundness": null, "boundElements": [], "updated": 1731090662600, "link": null, "locked": false, "startBinding": null, "endBinding": null, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": null, "points": [ [ 0, 0 ], [ -53.27565065744737, 16.38089846974506 ], [ -53.90255959959227, 67.77711223685318 ], [ -6.4999879824429385, 88.43543879744573 ], [ 44.3351035833632, 67.53847405928806 ], [ 45.42767945578399, 18.161010280772913 ], [ 0, 0 ] ], "index": "aE", "frameId": null }, { "type": "text", "version": 1046, "versionNonce": 235798996, "isDeleted": false, "id": "eqOUg003X-50uDJ_XGUnB", "fillStyle": "solid", "strokeWidth": 4, "strokeStyle": "solid", "roughness": 0, "opacity": 100, "angle": 0, "x": 1234.634080487935, "y": -28.17382387311136, "strokeColor": "#ffb13b", "backgroundColor": "#00e676", "width": 145.03034615947246, "height": 28.667631224488975, "seed": 1122464339, "groupIds": [], "roundness": { "type": 2 }, "boundElements": [], "updated": 1731090662600, "link": null, "locked": false, "fontSize": 23.889692687074145, "fontFamily": 1, "text": "Agent Start", "textAlign": "left", "verticalAlign": "top", "containerId": null, "originalText": "Agent Start", "index": "aF", "frameId": null, "autoResize": true, "lineHeight": 1.2 }, { "type": "rectangle", "version": 2424, "versionNonce": 35039060, "isDeleted": false, "id": "ZTKEryJuyeKfeG4nAI4_e", "fillStyle": "cross-hatch", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 1519.3715280015788, "y": -97.22713220408534, "strokeColor": "#03a9f4", "backgroundColor": "#03a9f4", "width": 16.103557439457987, "height": 40.2588935986449, "seed": 1188509340, "groupIds": [ "SFux9iuPQVutxOMe0wlW3" ], "roundness": null, "boundElements": [ { "id": "Inol-LWi8GThocPMvSZGX", "type": "arrow" } ], "updated": 1731090662600, "link": null, "locked": false, "index": "aG", "frameId": null }, { "type": "rectangle", "version": 2566, "versionNonce": 741073492, "isDeleted": false, "id": "CckqOoWddQBHuvidbpk4P", "fillStyle": "cross-hatch", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 1552.7682727230922, "y": -135.31805500658663, "strokeColor": "#03a9f4", "backgroundColor": "#03a9f4", "width": 16.103557439457962, "height": 80.51778719728983, "seed": 331107492, "groupIds": [ "SFux9iuPQVutxOMe0wlW3" ], "roundness": null, "boundElements": [], "updated": 1731090662600, "link": null, "locked": false, "index": "aH", "frameId": null }, { "type": "rectangle", "version": 2446, "versionNonce": 1386510292, "isDeleted": false, "id": "J8ox4BBdzEg-04DGRL-jh", "fillStyle": "cross-hatch", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 1583.2097012862876, "y": -111.1627188473997, "strokeColor": "#03a9f4", "backgroundColor": "#03a9f4", "width": 16.103557439457973, "height": 56.36245103810287, "seed": 1316437916, "groupIds": [ "SFux9iuPQVutxOMe0wlW3" ], "roundness": null, "boundElements": [], "updated": 1731090662600, "link": null, "locked": false, "index": "aI", "frameId": null }, { "type": "rectangle", "version": 2640, "versionNonce": 739318484, "isDeleted": false, "id": "V54vdkZmQeI13AEahvNPV", "fillStyle": "cross-hatch", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 1612.013416117277, "y": -128.4099677663797, "strokeColor": "#03a9f4", "backgroundColor": "#03a9f4", "width": 16.103557439457962, "height": 72.46600847756086, "seed": 1025829916, "groupIds": [ "SFux9iuPQVutxOMe0wlW3" ], "roundness": null, "boundElements": [ { "id": "leDc6Y5qdU0OH3j90C072", "type": "arrow" }, { "id": "5pptt-wBgcgbxd5LUka3S", "type": "arrow" }, { "id": "iPTphyfJkvqyMptaZS0ge", "type": "arrow" } ], "updated": 1731090668783, "link": null, "locked": false, "index": "aJ", "frameId": null }, { "type": "arrow", "version": 508, "versionNonce": 441289068, "isDeleted": false, "id": "Inol-LWi8GThocPMvSZGX", "fillStyle": "hachure", "strokeWidth": 4, "strokeStyle": "solid", "roughness": 0, "opacity": 100, "angle": 0, "x": 1355.5263615107983, "y": -75.4835786217636, "strokeColor": "#ffeb3b", "backgroundColor": "transparent", "width": 160.1111111111113, "height": 0.7777777777777999, "seed": 1433457683, "groupIds": [], "roundness": { "type": 2 }, "boundElements": [], "updated": 1731090662602, "link": null, "locked": false, "startBinding": null, "endBinding": { "elementId": "ZTKEryJuyeKfeG4nAI4_e", "focus": -0.12143341039143216, "gap": 3.73405537966903, "fixedPoint": null }, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": "arrow", "points": [ [ 0, 0 ], [ 160.1111111111113, 0.7777777777777999 ] ], "index": "aK", "frameId": null }, { "type": "line", "version": 2786, "versionNonce": 635507308, "isDeleted": false, "id": "8H60JYjdZgpvCjCSLf54L", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 2069.873935895482, "y": -127.67713376387258, "strokeColor": "#00e676", "backgroundColor": "#00e676", "width": 99.4022378882407, "height": 88.49954061012453, "seed": 1027783997, "groupIds": [], "roundness": null, "boundElements": [], "updated": 1731090655648, "link": null, "locked": false, "startBinding": null, "endBinding": null, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": null, "points": [ [ 0, 0 ], [ -53.31426714225508, 16.39277205005967 ], [ -53.9416304949861, 67.82624000521841 ], [ -6.504699453519805, 88.49954061012453 ], [ 44.36723957481235, 67.58742885243016 ], [ 45.4606073932546, 18.174174162751758 ], [ 0, 0 ] ], "index": "aL", "frameId": null }, { "type": "text", "version": 1287, "versionNonce": 1131144428, "isDeleted": false, "id": "E_wil3E0D2OO7QeMf_xiu", "fillStyle": "solid", "strokeWidth": 4, "strokeStyle": "solid", "roughness": 0, "opacity": 100, "angle": 0, "x": 2006.4263604935459, "y": -33.70532271267456, "strokeColor": "#00e676", "backgroundColor": "#00e676", "width": 118.31123365004963, "height": 28.68841075757759, "seed": 754233971, "groupIds": [], "roundness": { "type": 2 }, "boundElements": [], "updated": 1731090655648, "link": null, "locked": false, "fontSize": 23.90700896464799, "fontFamily": 1, "text": "Agent End", "textAlign": "left", "verticalAlign": "top", "containerId": null, "originalText": "Agent End", "index": "aM", "frameId": null, "autoResize": true, "lineHeight": 1.2 }, { "type": "arrow", "version": 845, "versionNonce": 1896722900, "isDeleted": false, "id": "5pptt-wBgcgbxd5LUka3S", "fillStyle": "hachure", "strokeWidth": 4, "strokeStyle": "solid", "roughness": 0, "opacity": 100, "angle": 0, "x": 1632.4152503996868, "y": -79.37246751065243, "strokeColor": "#00e676", "backgroundColor": "#34bbde", "width": 380.1111111111111, "height": 0.8888888888888005, "seed": 603518365, "groupIds": [], "roundness": { "type": 2 }, "boundElements": [], "updated": 1731090666260, "link": null, "locked": false, "startBinding": { "elementId": "V54vdkZmQeI13AEahvNPV", "focus": 0.35400627835344733, "gap": 4.29827684295185, "fixedPoint": null }, "endBinding": null, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": "arrow", "points": [ [ 0, 0 ], [ 380.1111111111111, -0.8888888888888005 ] ], "index": "aN", "frameId": null }, { "id": "qzVAVN6sXRxG16958WgBM", "type": "rectangle", "x": 1818.7776164213374, "y": -52.91854707012696, "width": 72.81644178885557, "height": 75.24632678271404, "angle": 0, "strokeColor": "#7950f2", "backgroundColor": "#7950f2", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "groupIds": [], "strokeSharpness": "sharp", "seed": 1985394028, "version": 1013, "versionNonce": 627575148, "isDeleted": false, "boundElementIds": null, "index": "aW", "frameId": null, "roundness": null, "boundElements": [ { "id": "leDc6Y5qdU0OH3j90C072", "type": "arrow" } ], "updated": 1731090696250, "link": null, "locked": false }, { "id": "z6DGOpk0Ie3KZbNy-UpRO", "type": "rectangle", "x": 1835.6278241812026, "y": -37.7505489614438, "width": 40.35005029973769, "height": 16.04584197060145, "angle": 0, "strokeColor": "#7950f2", "backgroundColor": "#7950f2", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "groupIds": [], "strokeSharpness": "sharp", "seed": 2036669676, "version": 850, "versionNonce": 1651351148, "isDeleted": false, "boundElementIds": null, "index": "aZ", "frameId": null, "roundness": null, "boundElements": [], "updated": 1731090696250, "link": null, "locked": false }, { "id": "3SSoVnN124G8vYffp5E2K", "type": "rectangle", "x": 1835.6278241812026, "y": -9.064290762712005, "width": 40.35005029973769, "height": 16.04584197060145, "angle": 0, "strokeColor": "#7950f2", "backgroundColor": "#7950f2", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "groupIds": [], "strokeSharpness": "sharp", "seed": 1326096236, "version": 892, "versionNonce": 1451684076, "isDeleted": false, "boundElementIds": null, "index": "aa", "frameId": null, "roundness": null, "boundElements": [], "updated": 1731090696250, "link": null, "locked": false } ], "appState": { "gridSize": 20, "gridStep": 5, "gridModeEnabled": false, "viewBackgroundColor": "#ffffff" }, "files": {} } ================================================ FILE: docs/images/api.excalidraw ================================================ { "type": "excalidraw", "version": 2, "source": "https://excalidraw.com", "elements": [ { "type": "rectangle", "version": 3067, "versionNonce": 2074130101, "isDeleted": false, "id": "Ufh5VUA3qmvJowuFyEWz4", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 686.7515859513394, "y": 139.76834100985968, "strokeColor": "#d0d9dd", "backgroundColor": "transparent", "width": 647.3302177689409, "height": 598.4947884819552, "seed": 1218477931, "groupIds": [], "frameId": null, "roundness": null, "boundElements": [], "updated": 1688312763581, "link": null, "locked": false }, { "type": "rectangle", "version": 834, "versionNonce": 351468315, "isDeleted": false, "id": "np05zgjgJ1cvT_UcHRO4Q", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 430.44757097692855, "y": 299.40554486158544, "strokeColor": "#03a9f4", "backgroundColor": "#03a9f4", "width": 110.66704644097234, "height": 115.17917209201391, "seed": 112784715, "groupIds": [ "MUxhY1x2GsxZKOmYcaGSB" ], "frameId": null, "roundness": null, "boundElements": [ { "id": "_UxQXW5nkuKOXtaavoJ3q", "type": "arrow" } ], "updated": 1688312763581, "link": null, "locked": false }, { "type": "text", "version": 3122, "versionNonce": 606214165, "isDeleted": false, "id": "GtS3MjQ4UtMxFXJO57RTt", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 436.4176538657067, "y": 346.76486653900156, "strokeColor": "#000000", "backgroundColor": "#4c6ef5", "width": 100.73332977294922, "height": 26, "seed": 1042420715, "groupIds": [ "MUxhY1x2GsxZKOmYcaGSB" ], "frameId": null, "roundness": null, "boundElements": [], "updated": 1688312763581, "link": null, "locked": false, "fontSize": 20, "fontFamily": 1, "text": "txtai.java", "textAlign": "left", "verticalAlign": "top", "containerId": null, "originalText": "txtai.java", "lineHeight": 1.3, "baseline": 18 }, { "type": "ellipse", "version": 1432, "versionNonce": 951771067, "isDeleted": false, "id": "OwUTZKSXPIR8EHoAXUrAc", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 748.5117575144184, "y": 353.37666266846827, "strokeColor": "#03a9f4", "backgroundColor": "#03a9f4", "width": 27.50940800286237, "height": 23.432583857740315, "seed": 616058027, "groupIds": [], "frameId": null, "roundness": null, "boundElements": [ { "id": "wtQWWSoRs-w6Dtmax-Hb5", "type": "arrow" }, { "id": "_UxQXW5nkuKOXtaavoJ3q", "type": "arrow" } ], "updated": 1688312763581, "link": null, "locked": false }, { "type": "ellipse", "version": 1403, "versionNonce": 1823493493, "isDeleted": false, "id": "qfK-ulSZh44Bl6KC9Ii6F", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 746.9882589503762, "y": 513.0271342544165, "strokeColor": "#00e676", "backgroundColor": "#00e676", "width": 27.50940800286237, "height": 23.432583857740315, "seed": 605129061, "groupIds": [], "frameId": null, "roundness": null, "boundElements": [ { "id": "Iju4jXbzOeX44JCepZN3j", "type": "arrow" } ], "updated": 1688312763582, "link": null, "locked": false }, { "type": "ellipse", "version": 1805, "versionNonce": 794636379, "isDeleted": false, "id": "xcu0cSh50RePPTdayuKwa", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 744.9160350012032, "y": 664.9580577695606, "strokeColor": "#ff7043", "backgroundColor": "#ff7043", "width": 27.50940800286237, "height": 23.432583857740315, "seed": 726590347, "groupIds": [], "frameId": null, "roundness": null, "boundElements": [ { "id": "QyCYDQ1akZks7nob5gQcb", "type": "arrow" } ], "updated": 1688312763582, "link": null, "locked": false }, { "type": "line", "version": 1891, "versionNonce": 1687213781, "isDeleted": false, "id": "M3a040F3DuK1icxv47TKI", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 888.8977161326809, "y": 311.8359502397928, "strokeColor": "#495057", "backgroundColor": "#ced4da", "width": 291.907455568772, "height": 237.38924779486172, "seed": 2113350923, "groupIds": [ "OA8MrA83v3EseguJzvKpu" ], "frameId": null, "roundness": null, "boundElements": [], "updated": 1688312763582, "link": null, "locked": false, "startBinding": null, "endBinding": null, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": null, "points": [ [ 0, 0 ], [ 94.12373118273203, -0.35823119069767984 ], [ 119.85817357723676, 26.483994997226382 ], [ 290.1826277821376, 26.874356201934916 ], [ 291.23331617134846, 236.70336432335256 ], [ -0.6741393974235308, 237.03101660416405 ], [ 0, 0 ] ] }, { "type": "ellipse", "version": 1544, "versionNonce": 179072251, "isDeleted": false, "id": "09DIdeixkMWbCga6IxuMh", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 60, "angle": 0, "x": 896.3667273815456, "y": 321.85704123732336, "strokeColor": "#000000", "backgroundColor": "#ced4da", "width": 11.242087394797462, "height": 9.502075937111973, "seed": 1749242795, "groupIds": [ "o22U873S7MmF-aAZOMXrC", "OA8MrA83v3EseguJzvKpu" ], "frameId": null, "roundness": null, "boundElements": [], "updated": 1688312763582, "link": null, "locked": false }, { "type": "ellipse", "version": 1579, "versionNonce": 374944821, "isDeleted": false, "id": "HvI6J1Tn27hWbdbb3G2PZ", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 60, "angle": 0, "x": 911.3550504277948, "y": 321.3688660254814, "strokeColor": "#000000", "backgroundColor": "#ced4da", "width": 11.242087394797462, "height": 9.502075937111973, "seed": 261203531, "groupIds": [ "o22U873S7MmF-aAZOMXrC", "OA8MrA83v3EseguJzvKpu" ], "frameId": null, "roundness": null, "boundElements": [], "updated": 1688312763582, "link": null, "locked": false }, { "type": "ellipse", "version": 1632, "versionNonce": 2116752795, "isDeleted": false, "id": "2zH7sdihf_U-DKWGnQEP-", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 60, "angle": 0, "x": 927.1911747446115, "y": 320.8462868771415, "strokeColor": "#000000", "backgroundColor": "#ced4da", "width": 11.242087394797462, "height": 9.502075937111973, "seed": 1790747883, "groupIds": [ "o22U873S7MmF-aAZOMXrC", "OA8MrA83v3EseguJzvKpu" ], "frameId": null, "roundness": null, "boundElements": [], "updated": 1688312763582, "link": null, "locked": false }, { "type": "ellipse", "version": 1672, "versionNonce": 502195605, "isDeleted": false, "id": "XUyFnqAl7kQAnMAoB7Cgx", "fillStyle": "solid", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 935.2057099043777, "y": 359.9947885349467, "strokeColor": "#343a40", "backgroundColor": "#fefefe", "width": 189.85440623862308, "height": 166.25236270503373, "seed": 827248523, "groupIds": [ "OA8MrA83v3EseguJzvKpu" ], "frameId": null, "roundness": null, "boundElements": [ { "id": "wtQWWSoRs-w6Dtmax-Hb5", "type": "arrow" }, { "id": "D904AMBJU1YvbvQMXyluo", "type": "arrow" } ], "updated": 1688312763582, "link": null, "locked": false }, { "type": "text", "version": 1200, "versionNonce": 688630331, "isDeleted": false, "id": "oYWPL1hYAJT2RGX0H8kwp", "fillStyle": "solid", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 988.3296817129515, "y": 416.9111931412608, "strokeColor": "#343a40", "backgroundColor": "#fefefe", "width": 92.69999694824219, "height": 72, "seed": 857102411, "groupIds": [], "frameId": null, "roundness": null, "boundElements": [], "updated": 1688312763582, "link": null, "locked": false, "fontSize": 28, "fontFamily": 1, "text": "API \nservice", "textAlign": "center", "verticalAlign": "top", "containerId": null, "originalText": "API \nservice", "lineHeight": 1.2857142857142858, "baseline": 61 }, { "type": "arrow", "version": 3749, "versionNonce": 2122803957, "isDeleted": false, "id": "wtQWWSoRs-w6Dtmax-Hb5", "fillStyle": "solid", "strokeWidth": 2, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 948.4692485198284, "y": 398.09023585204204, "strokeColor": "#03a9f4", "backgroundColor": "#fefefe", "width": 169.2246874884429, "height": 34.21193675692433, "seed": 1918002661, "groupIds": [], "frameId": null, "roundness": null, "boundElements": [], "updated": 1688312763582, "link": null, "locked": false, "startBinding": { "elementId": "XUyFnqAl7kQAnMAoB7Cgx", "focus": 0.5294260456922872, "gap": 1.5145847198102445 }, "endBinding": { "elementId": "OwUTZKSXPIR8EHoAXUrAc", "focus": -0.11436214558425631, "gap": 3.2793683958148367 }, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": null, "points": [ [ 0, 0 ], [ -41.176703496995515, -0.5128540090383922 ], [ -41.24791785008779, -33.26749312785438 ], [ -169.2246874884429, -34.21193675692433 ] ] }, { "type": "line", "version": 5941, "versionNonce": 2003095061, "isDeleted": false, "id": "_btUlJm1SELpFH74lhe5u", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 1242.776436154969, "y": 396.44332526359824, "strokeColor": "#5f3dc4", "backgroundColor": "#7950f2", "width": 76.99810389727404, "height": 99.37827711605759, "seed": 576008901, "groupIds": [ "nC_wnGn6V9Cg51lj82An8", "xEHH4p1jeHBH4PUaRrIni", "JG_8M8qPMZ6QPLNYCIjBV" ], "frameId": null, "roundness": { "type": 2 }, "boundElements": [], "updated": 1688312766628, "link": null, "locked": false, "startBinding": null, "endBinding": null, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": null, "points": [ [ 0, 0 ], [ 0.25390020469271946, 75.10956320658954 ], [ 0.011881933539363843, 83.66046081728857 ], [ 3.9655726433067358, 87.35519793732486 ], [ 17.734103263694266, 90.48214189738948 ], [ 41.00682018880677, 91.45582553513545 ], [ 63.24236222825351, 89.90119697892055 ], [ 75.05626943052894, 86.1833309042851 ], [ 76.72246117951802, 83.04913080160064 ], [ 76.95647177899504, 76.16486549140681 ], [ 76.77280066894052, 6.301273122914296 ], [ 76.35874703071867, -0.29954911620753677 ], [ 71.41469198102897, -3.9887920872726426 ], [ 61.00356715097897, -6.125406402086433 ], [ 37.27802033642641, -7.922451580922152 ], [ 18.256149021768273, -6.850869494392456 ], [ 3.2955764171578545, -3.2161938062295943 ], [ -0.04163211827899763, -0.0451306156134037 ], [ 0, 0 ] ] }, { "type": "ellipse", "version": 6700, "versionNonce": 950438331, "isDeleted": false, "id": "afFg1c-VqjZ6VB_2cnCgV", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 1242.2827684130289, "y": 387.6723986399642, "strokeColor": "#5f3dc4", "backgroundColor": "#7950f2", "width": 76.50422544892463, "height": 15.472404032124233, "seed": 1054869515, "groupIds": [ "nC_wnGn6V9Cg51lj82An8", "xEHH4p1jeHBH4PUaRrIni", "JG_8M8qPMZ6QPLNYCIjBV" ], "frameId": null, "roundness": null, "boundElements": [], "updated": 1688312766628, "link": null, "locked": false }, { "type": "text", "version": 1907, "versionNonce": 1915503477, "isDeleted": false, "id": "ymMTsrfERD1IWhRqaTh_I", "fillStyle": "hachure", "strokeWidth": 2, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 1261.2853467746677, "y": 422.35935481481545, "strokeColor": "#5f3dc4", "backgroundColor": "#7950f2", "width": 17.116666793823242, "height": 36, "seed": 621580837, "groupIds": [ "xEHH4p1jeHBH4PUaRrIni", "JG_8M8qPMZ6QPLNYCIjBV" ], "frameId": null, "roundness": { "type": 2 }, "boundElements": [ { "id": "D904AMBJU1YvbvQMXyluo", "type": "arrow" } ], "updated": 1688312766628, "link": null, "locked": false, "fontSize": 29.219434366479078, "fontFamily": 3, "text": "{", "textAlign": "left", "verticalAlign": "top", "containerId": null, "originalText": "{", "lineHeight": 1.2320567040578883, "baseline": 29 }, { "type": "text", "version": 1877, "versionNonce": 1212389979, "isDeleted": false, "id": "j26zNzxtmoRXOdiX0ecde", "fillStyle": "hachure", "strokeWidth": 2, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 1283.2962201204132, "y": 422.8360647867345, "strokeColor": "#5f3dc4", "backgroundColor": "#7950f2", "width": 17.21666717529297, "height": 35.94436319458793, "seed": 1834536619, "groupIds": [ "xEHH4p1jeHBH4PUaRrIni", "JG_8M8qPMZ6QPLNYCIjBV" ], "frameId": null, "roundness": { "type": 2 }, "boundElements": [], "updated": 1688312766628, "link": null, "locked": false, "fontSize": 29.384515916572173, "fontFamily": 3, "text": "}", "textAlign": "left", "verticalAlign": "top", "containerId": null, "originalText": "}", "lineHeight": 1.2232416316348487, "baseline": 29 }, { "type": "text", "version": 1091, "versionNonce": 52301013, "isDeleted": false, "id": "RVGdpt8fu7DFpzBrOmhVl", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 1257.5750007629395, "y": 489.46141030254137, "strokeColor": "#5f3dc4", "backgroundColor": "#7950f2", "width": 46.849998474121094, "height": 24, "seed": 1281426821, "groupIds": [ "JG_8M8qPMZ6QPLNYCIjBV" ], "frameId": null, "roundness": null, "boundElements": [], "updated": 1688312766628, "link": null, "locked": false, "fontSize": 18.16360832146819, "fontFamily": 1, "text": "txtai", "textAlign": "center", "verticalAlign": "top", "containerId": null, "originalText": "txtai", "lineHeight": 1.3213233612637187, "baseline": 17 }, { "type": "arrow", "version": 835, "versionNonce": 1915286293, "isDeleted": false, "id": "D904AMBJU1YvbvQMXyluo", "fillStyle": "cross-hatch", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 1126.546873152304, "y": 442.58712580573655, "strokeColor": "#495057", "backgroundColor": "transparent", "width": 118.78773358450212, "height": 0.33064816023818366, "seed": 1187727819, "groupIds": [], "frameId": null, "roundness": { "type": 2 }, "boundElements": [], "updated": 1688312763582, "link": null, "locked": false, "startBinding": { "elementId": "XUyFnqAl7kQAnMAoB7Cgx", "focus": -0.009650513305614256, "gap": 1.4886753513908815 }, "endBinding": { "elementId": "ymMTsrfERD1IWhRqaTh_I", "focus": -0.14572391577235705, "gap": 15.950740037861578 }, "lastCommittedPoint": null, "startArrowhead": "arrow", "endArrowhead": "arrow", "points": [ [ 0, 0 ], [ 118.78773358450212, 0.33064816023818366 ] ] }, { "type": "rectangle", "version": 906, "versionNonce": 1466119355, "isDeleted": false, "id": "y5QaZJpFTfZABVw2mHLpE", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 429.6664767795138, "y": 461.41041395399304, "strokeColor": "#00e676", "backgroundColor": "#00e676", "width": 114.10871875077035, "height": 118.76116853994468, "seed": 1524581995, "groupIds": [ "nGNW7tJf4LAc_8aa1gg6P" ], "frameId": null, "roundness": null, "boundElements": [ { "id": "Iju4jXbzOeX44JCepZN3j", "type": "arrow" } ], "updated": 1688312763582, "link": null, "locked": false }, { "type": "text", "version": 3201, "versionNonce": 723765365, "isDeleted": false, "id": "v_UF3QajlZu6wJg9yMLih", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 449.4691219935195, "y": 511.23981160653864, "strokeColor": "#000000", "backgroundColor": "#00e676", "width": 71.91666412353516, "height": 25.999999999999996, "seed": 396443915, "groupIds": [ "nGNW7tJf4LAc_8aa1gg6P" ], "frameId": null, "roundness": null, "boundElements": [], "updated": 1688312763582, "link": null, "locked": false, "fontSize": 19.322657964237056, "fontFamily": 1, "text": "txtai.js", "textAlign": "left", "verticalAlign": "top", "containerId": null, "originalText": "txtai.js", "lineHeight": 1.3455705756486278, "baseline": 18 }, { "type": "rectangle", "version": 1005, "versionNonce": 1613948251, "isDeleted": false, "id": "bOQD-uSjLkWjJgytmZ4oa", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 431.6664767795138, "y": 619.410413953993, "strokeColor": "#ff7043", "backgroundColor": "#ff7043", "width": 110.66704644097234, "height": 115.17917209201391, "seed": 347667589, "groupIds": [ "efl0w5KGoAkFRTYqTi7-b" ], "frameId": null, "roundness": null, "boundElements": [ { "id": "QyCYDQ1akZks7nob5gQcb", "type": "arrow" } ], "updated": 1688312763582, "link": null, "locked": false }, { "type": "text", "version": 3264, "versionNonce": 484241877, "isDeleted": false, "id": "XLzbZJT2I8ZRyPvDGlp6q", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 448.63655966829197, "y": 667.7697356314092, "strokeColor": "#000000", "backgroundColor": "#ff7043", "width": 76.5, "height": 26, "seed": 869344229, "groupIds": [ "efl0w5KGoAkFRTYqTi7-b" ], "frameId": null, "roundness": null, "boundElements": [], "updated": 1688312763582, "link": null, "locked": false, "fontSize": 20, "fontFamily": 1, "text": "txtai.rs", "textAlign": "left", "verticalAlign": "top", "containerId": null, "originalText": "txtai.rs", "lineHeight": 1.3, "baseline": 18 }, { "type": "rectangle", "version": 1804, "versionNonce": 1627619835, "isDeleted": false, "id": "NDWbJgM53uVck-LSQzD-0", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 428.6664767795138, "y": 141.41041395399304, "strokeColor": "#ffeb3b", "backgroundColor": "#ffeb3b", "width": 110.66704644097234, "height": 115.17917209201391, "seed": 1934943141, "groupIds": [ "6yaFhpagshturM5EK_Hay" ], "frameId": null, "roundness": null, "boundElements": [ { "id": "095ylTvsupuqCzyoolf44", "type": "arrow" } ], "updated": 1688312763582, "link": null, "locked": false }, { "type": "text", "version": 3123, "versionNonce": 1147793205, "isDeleted": false, "id": "DnwBnStWE5iZA-9cemLzh", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 445.63655966829197, "y": 189.76973563140916, "strokeColor": "#000000", "backgroundColor": "#ffeb3b", "width": 78.13333129882812, "height": 52, "seed": 547563269, "groupIds": [ "6yaFhpagshturM5EK_Hay" ], "frameId": null, "roundness": null, "boundElements": [], "updated": 1688312763583, "link": null, "locked": false, "fontSize": 20, "fontFamily": 1, "text": "txtai.go\n", "textAlign": "left", "verticalAlign": "top", "containerId": null, "originalText": "txtai.go\n", "lineHeight": 1.3, "baseline": 44 }, { "type": "ellipse", "version": 1434, "versionNonce": 47680155, "isDeleted": false, "id": "zt72wyfizv17KOzo8EnhU", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 748.2452959985687, "y": 186.2837080711298, "strokeColor": "#ffeb3b", "backgroundColor": "#ffeb3b", "width": 27.50940800286237, "height": 23.432583857740315, "seed": 1191665131, "groupIds": [], "frameId": null, "roundness": null, "boundElements": [ { "id": "wtQWWSoRs-w6Dtmax-Hb5", "type": "arrow" }, { "id": "095ylTvsupuqCzyoolf44", "type": "arrow" } ], "updated": 1688312763583, "link": null, "locked": false }, { "type": "line", "version": 198, "versionNonce": 1613774997, "isDeleted": false, "id": "l2-_RHbfroK1-yNNP9pWJ", "fillStyle": "cross-hatch", "strokeWidth": 2, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 968.0163994686301, "y": 373.9539804419734, "strokeColor": "#ffeb3b", "backgroundColor": "#4c6ef5", "width": 186.00110275930913, "height": 179.95398042998312, "seed": 767528715, "groupIds": [], "frameId": null, "roundness": null, "boundElements": [], "updated": 1688312763583, "link": null, "locked": false, "startBinding": null, "endBinding": null, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": null, "points": [ [ 0, 0 ], [ -1.0163994686301048, -179.9539804299831 ], [ -186.0011027593091, -179.21342907018897 ] ] }, { "type": "line", "version": 251, "versionNonce": 711032635, "isDeleted": false, "id": "7sDk3itWmD2GE1P-EYplQ", "fillStyle": "cross-hatch", "strokeWidth": 2, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 973.9999999999992, "y": 513.9999999998802, "strokeColor": "#ff7043", "backgroundColor": "#4c6ef5", "width": 197.9999999999992, "height": 166.00000000011983, "seed": 965983243, "groupIds": [], "frameId": null, "roundness": null, "boundElements": [], "updated": 1688312763583, "link": null, "locked": false, "startBinding": null, "endBinding": null, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": null, "points": [ [ 0, 0 ], [ -1.010204081632649, 166.00000000011983 ], [ -197.9999999999992, 166.00000000011983 ] ] }, { "type": "arrow", "version": 121, "versionNonce": 1172661749, "isDeleted": false, "id": "095ylTvsupuqCzyoolf44", "fillStyle": "hachure", "strokeWidth": 2, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 545.2882783422247, "y": 201.14030329253615, "strokeColor": "#ffeb3b", "backgroundColor": "#ff7043", "width": 198.68164012474733, "height": 0.2787977820074161, "seed": 4899205, "groupIds": [], "frameId": null, "roundness": null, "boundElements": [], "updated": 1688312763583, "link": null, "locked": false, "startBinding": { "elementId": "NDWbJgM53uVck-LSQzD-0", "focus": 0.038606073098519975, "gap": 5.954755121738572 }, "endBinding": { "elementId": "zt72wyfizv17KOzo8EnhU", "focus": -0.242073274518529, "gap": 4.559216593972247 }, "lastCommittedPoint": null, "startArrowhead": "arrow", "endArrowhead": "arrow", "points": [ [ 0, 0 ], [ 198.68164012474733, -0.2787977820074161 ] ] }, { "type": "arrow", "version": 67, "versionNonce": 33808347, "isDeleted": false, "id": "_UxQXW5nkuKOXtaavoJ3q", "fillStyle": "hachure", "strokeWidth": 2, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 545.2448106156662, "y": 362.4655547515135, "strokeColor": "#03a9f4", "backgroundColor": "#ff7043", "width": 195.75519089085594, "height": 1.5344470050185919, "seed": 180453803, "groupIds": [], "frameId": null, "roundness": null, "boundElements": [], "updated": 1688312763583, "link": null, "locked": false, "startBinding": { "elementId": "np05zgjgJ1cvT_UcHRO4Q", "focus": 0.08624654954364566, "gap": 4.130193197765266 }, "endBinding": { "elementId": "OwUTZKSXPIR8EHoAXUrAc", "focus": 0.07905353643606809, "gap": 7.545863589033182 }, "lastCommittedPoint": null, "startArrowhead": "arrow", "endArrowhead": "arrow", "points": [ [ 0, 0 ], [ 195.75519089085594, 1.5344470050185919 ] ] }, { "type": "arrow", "version": 146, "versionNonce": 2022629205, "isDeleted": false, "id": "Iju4jXbzOeX44JCepZN3j", "fillStyle": "hachure", "strokeWidth": 2, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 547.8284140902045, "y": 520.9148281821533, "strokeColor": "#00e676", "backgroundColor": "#ff7043", "width": 195.16671561089288, "height": 3.1847497390683657, "seed": 1277347109, "groupIds": [], "frameId": null, "roundness": null, "boundElements": [], "updated": 1688312763583, "link": null, "locked": false, "startBinding": { "elementId": "y5QaZJpFTfZABVw2mHLpE", "focus": -0.01448029053565506, "gap": 4.053218559920424 }, "endBinding": { "elementId": "qfK-ulSZh44Bl6KC9Ii6F", "focus": 0.03022905067490028, "gap": 4.0079533720564715 }, "lastCommittedPoint": null, "startArrowhead": "arrow", "endArrowhead": "arrow", "points": [ [ 0, 0 ], [ 195.16671561089288, 3.1847497390683657 ] ] }, { "type": "arrow", "version": 551, "versionNonce": 1137781883, "isDeleted": false, "id": "QyCYDQ1akZks7nob5gQcb", "fillStyle": "hachure", "strokeWidth": 2, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 548.0000000000001, "y": 679.9183352218894, "strokeColor": "#ff7043", "backgroundColor": "#ff7043", "width": 190.09617369448893, "height": 1.4298124428980827, "seed": 1602091877, "groupIds": [], "frameId": null, "roundness": null, "boundElements": [], "updated": 1688312763583, "link": null, "locked": false, "startBinding": { "elementId": "bOQD-uSjLkWjJgytmZ4oa", "focus": 0.05822088981723922, "gap": 5.666476779514028 }, "endBinding": { "elementId": "xcu0cSh50RePPTdayuKwa", "focus": -0.14162811640911155, "gap": 6.9174857452546 }, "lastCommittedPoint": null, "startArrowhead": "arrow", "endArrowhead": "arrow", "points": [ [ 0, 0 ], [ 190.09617369448893, -1.4298124428980827 ] ] }, { "type": "line", "version": 139, "versionNonce": 2122460341, "isDeleted": false, "id": "HKLfE2F8l78a_hZEKhmnY", "fillStyle": "hachure", "strokeWidth": 2, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 956, "y": 497, "strokeColor": "#00e676", "backgroundColor": "#ced4da", "width": 180, "height": 29, "seed": 597910187, "groupIds": [], "frameId": null, "roundness": null, "boundElements": [], "updated": 1688312763583, "link": null, "locked": false, "startBinding": null, "endBinding": null, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": null, "points": [ [ 0, 0 ], [ -45, 0 ], [ -44, 29 ], [ -180, 29 ] ] } ], "appState": { "gridSize": null, "viewBackgroundColor": "#ffffff" }, "files": {} } ================================================ FILE: docs/images/architecture.excalidraw ================================================ { "type": "excalidraw", "version": 2, "source": "https://excalidraw.com", "elements": [ { "type": "rectangle", "version": 4858, "versionNonce": 83494474, "isDeleted": false, "id": "1iE4X4trkcbtJpCDC5ajY", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 27.525089187647836, "y": -558.1200877100962, "strokeColor": "#03a9f4", "backgroundColor": "#03a9f4", "width": 227.65938859429855, "height": 57.830603028426594, "seed": 1321467630, "groupIds": [ "MnVd9k06uFuFRzqXmnEvR", "NFKrTdLl_zy4zOvDGWVBD" ], "frameId": null, "roundness": { "type": 3 }, "boundElements": [ { "id": "qROQdhT3FWGzgS0JOQXlg", "type": "arrow" } ], "updated": 1691411991785, "link": null, "locked": false }, { "type": "text", "version": 5224, "versionNonce": 1682430806, "isDeleted": false, "id": "ZiUda1q4lvcNM9G2Or_d7", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 39.103253497881326, "y": -548.6024245205953, "strokeColor": "#1e1e1e", "backgroundColor": "#343a40", "width": 210.71665954589844, "height": 40, "seed": 2050033394, "groupIds": [ "NFKrTdLl_zy4zOvDGWVBD" ], "frameId": null, "roundness": null, "boundElements": [ { "id": "qROQdhT3FWGzgS0JOQXlg", "type": "arrow" } ], "updated": 1691411991785, "link": null, "locked": false, "fontSize": 16, "fontFamily": 1, "text": "- Semantic, Keyword, Hybrid\n- Search with SQL", "textAlign": "left", "verticalAlign": "top", "containerId": null, "originalText": "- Semantic, Keyword, Hybrid\n- Search with SQL", "lineHeight": 1.25, "baseline": 35 }, { "type": "rectangle", "version": 2527, "versionNonce": 2136048394, "isDeleted": false, "id": "i9TRMGNTF_ckjPiI7BrdL", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 358.6031681551546, "y": -598.4332496538902, "strokeColor": "#868e96", "backgroundColor": "#e9ecef", "width": 348.08489527005264, "height": 200.0037909410919, "seed": 1867912238, "groupIds": [ "cNL6-_r-I9wctWvuw8DqS" ], "frameId": null, "roundness": { "type": 3 }, "boundElements": [ { "id": "viSfAjT-8KRTYvsrunjfn", "type": "arrow" }, { "id": "CJaOip-NXs_WGJ3YnzrzX", "type": "arrow" }, { "id": "sf5i6bxPYE2ykeh2wWm8m", "type": "arrow" } ], "updated": 1691411996548, "link": null, "locked": false }, { "type": "rectangle", "version": 1915, "versionNonce": 211681430, "isDeleted": false, "id": "F1r0Plm7hDaeNndIwZAuf", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 368.61999967371725, "y": -538.0801566026939, "strokeColor": "#03a9f4", "backgroundColor": "#03a9f4", "width": 160.26930429700263, "height": 62.605196991016655, "seed": 436869746, "groupIds": [ "cNL6-_r-I9wctWvuw8DqS" ], "frameId": null, "roundness": { "type": 3 }, "boundElements": [ { "type": "text", "id": "gw7zjxlDiAjodT1x1_fv_" }, { "id": "qROQdhT3FWGzgS0JOQXlg", "type": "arrow" }, { "id": "viSfAjT-8KRTYvsrunjfn", "type": "arrow" } ], "updated": 1691411991785, "link": null, "locked": false }, { "type": "text", "version": 1897, "versionNonce": 347110346, "isDeleted": false, "id": "gw7zjxlDiAjodT1x1_fv_", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 406.8879869357439, "y": -522.695916613224, "strokeColor": "#1e1e1e", "backgroundColor": "transparent", "width": 83.73332977294922, "height": 31.836717012076832, "seed": 1023300530, "groupIds": [ "cNL6-_r-I9wctWvuw8DqS" ], "frameId": null, "roundness": null, "boundElements": [], "updated": 1691411991785, "link": null, "locked": false, "fontSize": 25.469373609661467, "fontFamily": 1, "text": "Sparse", "textAlign": "center", "verticalAlign": "middle", "containerId": "F1r0Plm7hDaeNndIwZAuf", "originalText": "Sparse", "lineHeight": 1.25, "baseline": 22 }, { "type": "rectangle", "version": 2272, "versionNonce": 696090070, "isDeleted": false, "id": "55xWndBPWTiyUuLIS_J8b", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 369.8721036135376, "y": -469.46654385239594, "strokeColor": "#00e676", "backgroundColor": "#00e676", "width": 157.76509641736197, "height": 57.59678123173532, "seed": 1317384558, "groupIds": [ "cNL6-_r-I9wctWvuw8DqS" ], "frameId": null, "roundness": { "type": 3 }, "boundElements": [ { "type": "text", "id": "Qaeh-leNa7YgcPsQbfBcq" }, { "id": "qROQdhT3FWGzgS0JOQXlg", "type": "arrow" }, { "id": "viSfAjT-8KRTYvsrunjfn", "type": "arrow" }, { "id": "CJaOip-NXs_WGJ3YnzrzX", "type": "arrow" } ], "updated": 1691411991785, "link": null, "locked": false }, { "type": "text", "version": 2210, "versionNonce": 1434612362, "isDeleted": false, "id": "Qaeh-leNa7YgcPsQbfBcq", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 412.012986935744, "y": -456.59111621253714, "strokeColor": "#1e1e1e", "backgroundColor": "transparent", "width": 73.48332977294922, "height": 31.84592595201767, "seed": 1661259762, "groupIds": [ "cNL6-_r-I9wctWvuw8DqS" ], "frameId": null, "roundness": null, "boundElements": [], "updated": 1691411991785, "link": null, "locked": false, "fontSize": 25.476740761614135, "fontFamily": 1, "text": "Dense", "textAlign": "center", "verticalAlign": "middle", "containerId": "55xWndBPWTiyUuLIS_J8b", "originalText": "Dense", "lineHeight": 1.25, "baseline": 22 }, { "type": "rectangle", "version": 2356, "versionNonce": 268129046, "isDeleted": false, "id": "KSf88Asx9gne1WsRdsx6-", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 535.1498236698217, "y": -469.46654385239594, "strokeColor": "#ffeb3b", "backgroundColor": "#ffeb3b", "width": 156.51299247754164, "height": 57.59678123173532, "seed": 1016194418, "groupIds": [ "cNL6-_r-I9wctWvuw8DqS" ], "frameId": null, "roundness": { "type": 3 }, "boundElements": [ { "type": "text", "id": "7RaoYDeBRuz16CJo7jaVO" }, { "id": "CJaOip-NXs_WGJ3YnzrzX", "type": "arrow" } ], "updated": 1691411991785, "link": null, "locked": false }, { "type": "text", "version": 2343, "versionNonce": 2120716618, "isDeleted": false, "id": "7RaoYDeBRuz16CJo7jaVO", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 576.8063214344714, "y": -456.5934785058263, "strokeColor": "#1e1e1e", "backgroundColor": "transparent", "width": 73.19999694824219, "height": 31.850650538596007, "seed": 1307924782, "groupIds": [ "cNL6-_r-I9wctWvuw8DqS" ], "frameId": null, "roundness": null, "boundElements": [], "updated": 1691411991785, "link": null, "locked": false, "fontSize": 25.480520430876805, "fontFamily": 1, "text": "Graph", "textAlign": "center", "verticalAlign": "middle", "containerId": "KSf88Asx9gne1WsRdsx6-", "originalText": "Graph", "lineHeight": 1.25, "baseline": 22 }, { "type": "text", "version": 1738, "versionNonce": 713850966, "isDeleted": false, "id": "gAcYCRjD6VZUsDZce-atm", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 439.98992424347625, "y": -595.9290417742495, "strokeColor": "#1e1e1e", "backgroundColor": "#ffeb3b", "width": 183.39999389648438, "height": 43.823637893711656, "seed": 1369566386, "groupIds": [ "cNL6-_r-I9wctWvuw8DqS" ], "frameId": null, "roundness": null, "boundElements": [], "updated": 1691411991786, "link": null, "locked": false, "fontSize": 35.058910314969324, "fontFamily": 1, "text": "Embeddings", "textAlign": "left", "verticalAlign": "top", "containerId": null, "originalText": "Embeddings", "lineHeight": 1.25, "baseline": 31 }, { "type": "rectangle", "version": 2094, "versionNonce": 704590410, "isDeleted": false, "id": "UEE-Z2YT9Y5YvVCJefbd0", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 533.8977197300013, "y": -535.8280526628736, "strokeColor": "#7950f2", "backgroundColor": "#7950f2", "width": 160.26930429700263, "height": 60.10098911137599, "seed": 1015954930, "groupIds": [ "cNL6-_r-I9wctWvuw8DqS" ], "frameId": null, "roundness": { "type": 3 }, "boundElements": [ { "type": "text", "id": "Y1uf_NH7fI6arCSGiICiX" }, { "id": "CJaOip-NXs_WGJ3YnzrzX", "type": "arrow" } ], "updated": 1691412007529, "link": null, "locked": false }, { "type": "text", "version": 2084, "versionNonce": 1292240278, "isDeleted": false, "id": "Y1uf_NH7fI6arCSGiICiX", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 551.0823711155632, "y": -521.695916613224, "strokeColor": "#1e1e1e", "backgroundColor": "transparent", "width": 125.9000015258789, "height": 31.836717012076832, "seed": 867545010, "groupIds": [ "cNL6-_r-I9wctWvuw8DqS" ], "frameId": null, "roundness": null, "boundElements": [], "updated": 1691411991786, "link": null, "locked": false, "fontSize": 25.469373609661467, "fontFamily": 1, "text": "Database", "textAlign": "center", "verticalAlign": "middle", "containerId": "UEE-Z2YT9Y5YvVCJefbd0", "originalText": "Database", "lineHeight": 1.25, "baseline": 22 }, { "type": "rectangle", "version": 4488, "versionNonce": 1920419530, "isDeleted": false, "id": "uV8iktEBp-zvkGceO0YNj", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 28.400291679657585, "y": -482.37846781152234, "strokeColor": "#ffeb3b", "backgroundColor": "#ffeb3b", "width": 226.65938859429855, "height": 53.830603028426594, "seed": 511161586, "groupIds": [ "lsnRF8t7e-8bA4k7ZMtQs", "XOEGmXNmM0vVhFi_aLf0h" ], "frameId": null, "roundness": { "type": 3 }, "boundElements": [ { "id": "viSfAjT-8KRTYvsrunjfn", "type": "arrow" } ], "updated": 1691411991786, "link": null, "locked": false }, { "type": "text", "version": 4865, "versionNonce": 144499414, "isDeleted": false, "id": "VU0ZFGV4tB5pR_8AMPehi", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 38.17389738827433, "y": -477.32443222094344, "strokeColor": "#1e1e1e", "backgroundColor": "#343a40", "width": 190.39999389648438, "height": 40, "seed": 660096690, "groupIds": [ "XOEGmXNmM0vVhFi_aLf0h" ], "frameId": null, "roundness": null, "boundElements": [], "updated": 1691411991786, "link": null, "locked": false, "fontSize": 16, "fontFamily": 1, "text": "- Topics + Relationships\n- Multimodal Indexes", "textAlign": "left", "verticalAlign": "top", "containerId": null, "originalText": "- Topics + Relationships\n- Multimodal Indexes", "lineHeight": 1.25, "baseline": 35 }, { "type": "arrow", "version": 3140, "versionNonce": 1465895050, "isDeleted": false, "id": "qROQdhT3FWGzgS0JOQXlg", "fillStyle": "hachure", "strokeWidth": 2, "strokeStyle": "solid", "roughness": 0, "opacity": 100, "angle": 0, "x": 357.5459819116054, "y": -474.380420640035, "strokeColor": "#868e96", "backgroundColor": "#e9ecef", "width": 101.91733646013165, "height": 51.182713481535075, "seed": 1412373166, "groupIds": [], "frameId": null, "roundness": null, "boundElements": [], "updated": 1691412001500, "link": null, "locked": false, "startBinding": { "elementId": "55xWndBPWTiyUuLIS_J8b", "focus": -0.17593583244727756, "gap": 13.269493633705139 }, "endBinding": { "elementId": "ZiUda1q4lvcNM9G2Or_d7", "focus": -0.7240167179756973, "gap": 5.808732407693981 }, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": "arrow", "points": [ [ 0, 0 ], [ -101.91733646013165, -51.182713481535075 ] ] }, { "type": "arrow", "version": 2221, "versionNonce": 297446922, "isDeleted": false, "id": "viSfAjT-8KRTYvsrunjfn", "fillStyle": "hachure", "strokeWidth": 2, "strokeStyle": "solid", "roughness": 0, "opacity": 100, "angle": 0, "x": 355.69072075221885, "y": -473.6934438187176, "strokeColor": "#868e96", "backgroundColor": "#e9ecef", "width": 99.11528692741001, "height": 28.941684185339795, "seed": 769791342, "groupIds": [], "frameId": null, "roundness": null, "boundElements": [], "updated": 1691412005709, "link": null, "locked": false, "startBinding": { "elementId": "55xWndBPWTiyUuLIS_J8b", "focus": 1.1614221413579826, "gap": 14.797915501333051 }, "endBinding": { "elementId": "uV8iktEBp-zvkGceO0YNj", "focus": 0.737345008582056, "gap": 1.5157535508527076 }, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": "arrow", "points": [ [ 0, 0 ], [ -99.11528692741001, 28.941684185339795 ] ] }, { "type": "arrow", "version": 5939, "versionNonce": 1648005846, "isDeleted": false, "id": "CJaOip-NXs_WGJ3YnzrzX", "fillStyle": "hachure", "strokeWidth": 2, "strokeStyle": "solid", "roughness": 0, "opacity": 100, "angle": 0, "x": 780.2644916835155, "y": -444.3627069906215, "strokeColor": "#868e96", "backgroundColor": "#e9ecef", "width": 71.1891690063859, "height": 28.06038115974593, "seed": 1923467222, "groupIds": [], "frameId": null, "roundness": null, "boundElements": [], "updated": 1691411901454, "link": null, "locked": false, "startBinding": { "elementId": "SKqPLoIsGXgDL5mhOTqLv", "focus": -0.634641820715333, "gap": 5.063962132776453 }, "endBinding": { "elementId": "UEE-Z2YT9Y5YvVCJefbd0", "focus": -0.06665280707924877, "gap": 15.270023644140622 }, "lastCommittedPoint": null, "startArrowhead": "arrow", "endArrowhead": null, "points": [ [ 0, 0 ], [ -71.1891690063859, -28.06038115974593 ] ] }, { "type": "rectangle", "version": 3429, "versionNonce": 348832458, "isDeleted": false, "id": "7UwPwHCm_ZiMJb1o0x5A8", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 783.2131987136702, "y": -574.1983152922421, "strokeColor": "#00e676", "backgroundColor": "#00e676", "width": 272.0848952700526, "height": 73.0037909410919, "seed": 155582550, "groupIds": [ "kgkwY28q5i7W008rJksMR", "TqYTD07S3yHeAVJOG3yyi" ], "frameId": null, "roundness": { "type": 3 }, "boundElements": [], "updated": 1691411901455, "link": null, "locked": false }, { "type": "text", "version": 1591, "versionNonce": 72296150, "isDeleted": false, "id": "fFWZLl9F8IwK5QhXlAssA", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 792.6342330724652, "y": -569.5895346298003, "strokeColor": "#1e1e1e", "backgroundColor": "#343a40", "width": 77.1500015258789, "height": 20, "seed": 543556438, "groupIds": [ "TqYTD07S3yHeAVJOG3yyi" ], "frameId": null, "roundness": null, "boundElements": [ { "id": "CJaOip-NXs_WGJ3YnzrzX", "type": "arrow" } ], "updated": 1691411901455, "link": null, "locked": false, "fontSize": 16, "fontFamily": 1, "text": " Prompt >", "textAlign": "left", "verticalAlign": "top", "containerId": null, "originalText": " Prompt >", "lineHeight": 1.25, "baseline": 15 }, { "type": "text", "version": 1341, "versionNonce": 450634262, "isDeleted": false, "id": "FgFcgIY3s9iRdPKwN4t_r", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 785.1996772966362, "y": -548.1590294764883, "strokeColor": "#1e1e1e", "backgroundColor": "#343a40", "width": 261.5, "height": 41.41205162166277, "seed": 423650058, "groupIds": [ "TqYTD07S3yHeAVJOG3yyi" ], "frameId": null, "roundness": null, "boundElements": [], "updated": 1691412007529, "link": null, "locked": false, "fontSize": 16.56482064866511, "fontFamily": 1, "text": " Answer the following question\n using the context below", "textAlign": "left", "verticalAlign": "top", "containerId": null, "originalText": " Answer the following question\n using the context below", "lineHeight": 1.25, "baseline": 35 }, { "type": "rectangle", "version": 3467, "versionNonce": 1420811286, "isDeleted": false, "id": "_8pPaWBfdkvpd8B9PQ7wI", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 784.2131987136702, "y": -487.1983152922421, "strokeColor": "#6741d9", "backgroundColor": "#6741d9", "width": 272.0848952700526, "height": 73.0037909410919, "seed": 2066566410, "groupIds": [ "FtTm_MWbxe67f2n84saes", "3PAnqhix8z46z6JRJjAO7" ], "frameId": null, "roundness": { "type": 3 }, "boundElements": [ { "id": "CJaOip-NXs_WGJ3YnzrzX", "type": "arrow" } ], "updated": 1691411901455, "link": null, "locked": false }, { "type": "text", "version": 1602, "versionNonce": 1943803978, "isDeleted": false, "id": "apAxY8HSw8w2lfLe1pCXP", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 792.7130504007473, "y": -482.33339890257247, "strokeColor": "#1e1e1e", "backgroundColor": "#6741d9", "width": 75.73332977294922, "height": 20, "seed": 1870973578, "groupIds": [ "3PAnqhix8z46z6JRJjAO7" ], "frameId": null, "roundness": null, "boundElements": [], "updated": 1691411901455, "link": null, "locked": false, "fontSize": 16, "fontFamily": 1, "text": " Search >", "textAlign": "left", "verticalAlign": "top", "containerId": null, "originalText": " Search >", "lineHeight": 1.25, "baseline": 15 }, { "type": "text", "version": 1069, "versionNonce": 1506273622, "isDeleted": false, "id": "SKqPLoIsGXgDL5mhOTqLv", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 785.3284538162922, "y": -458.46265045437633, "strokeColor": "#1e1e1e", "backgroundColor": "#6741d9", "width": 251.21665954589844, "height": 41.82578128069407, "seed": 1533237462, "groupIds": [ "3PAnqhix8z46z6JRJjAO7" ], "frameId": null, "roundness": null, "boundElements": [ { "id": "CJaOip-NXs_WGJ3YnzrzX", "type": "arrow" } ], "updated": 1691411901455, "link": null, "locked": false, "fontSize": 16.730312512277628, "fontFamily": 1, "text": " SELECT ... FROM txtai\n WHERE SIMILAR('question')", "textAlign": "left", "verticalAlign": "top", "containerId": null, "originalText": " SELECT ... FROM txtai\n WHERE SIMILAR('question')", "lineHeight": 1.25, "baseline": 35 }, { "type": "arrow", "version": 5999, "versionNonce": 1002058634, "isDeleted": false, "id": "sf5i6bxPYE2ykeh2wWm8m", "fillStyle": "hachure", "strokeWidth": 2, "strokeStyle": "solid", "roughness": 0, "opacity": 100, "angle": 0, "x": 780.8522503409429, "y": -537.8502934180265, "strokeColor": "#868e96", "backgroundColor": "#e9ecef", "width": 72.32385186561339, "height": 64.0647548382936, "seed": 1598499018, "groupIds": [], "frameId": null, "roundness": null, "boundElements": [], "updated": 1691412007529, "link": null, "locked": false, "startBinding": null, "endBinding": null, "lastCommittedPoint": null, "startArrowhead": "arrow", "endArrowhead": null, "points": [ [ 0, 0 ], [ -72.32385186561339, 64.0647548382936 ] ] } ], "appState": { "gridSize": null, "viewBackgroundColor": "#ffffff" }, "files": {} } ================================================ FILE: docs/images/cloud.excalidraw ================================================ { "type": "excalidraw", "version": 2, "source": "https://excalidraw.com", "elements": [ { "type": "rectangle", "version": 3062, "versionNonce": 456539957, "isDeleted": false, "id": "Ufh5VUA3qmvJowuFyEWz4", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 570.7515859513394, "y": 59.768341009859626, "strokeColor": "#d0d9dd", "backgroundColor": "transparent", "width": 719.330217768941, "height": 100.49478848195537, "seed": 2109972213, "groupIds": [], "frameId": null, "roundness": null, "boundElements": [], "updated": 1688312603126, "link": null, "locked": false }, { "type": "rectangle", "version": 1798, "versionNonce": 1560857173, "isDeleted": false, "id": "NDWbJgM53uVck-LSQzD-0", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 740.6664767795138, "y": 67.92614920483035, "strokeColor": "#ffeb3b", "backgroundColor": "#ffeb3b", "width": 85, "height": 84.17917209201391, "seed": 1782446165, "groupIds": [], "frameId": null, "roundness": null, "boundElements": [ { "type": "text", "id": "0VztU7lb7T9I5zoL2LjLt" } ], "updated": 1688312605190, "link": null, "locked": false }, { "type": "text", "version": 385, "versionNonce": 1280754907, "isDeleted": false, "id": "0VztU7lb7T9I5zoL2LjLt", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 745.6664767795138, "y": 99.51573525083731, "strokeColor": "#000000", "backgroundColor": "#7950f2", "width": 75, "height": 21, "seed": 2095457717, "groupIds": [], "frameId": null, "roundness": null, "boundElements": [], "updated": 1688312605190, "link": null, "locked": false, "fontSize": 16, "fontFamily": 1, "text": "Container", "textAlign": "center", "verticalAlign": "middle", "containerId": "NDWbJgM53uVck-LSQzD-0", "originalText": "Container", "lineHeight": 1.3125, "baseline": 15 }, { "type": "rectangle", "version": 1874, "versionNonce": 847689653, "isDeleted": false, "id": "rr3YJsAyrrPqZbZ4qXh5x", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 1175.874140249897, "y": 67.92614920483035, "strokeColor": "#00e676", "backgroundColor": "#00e676", "width": 85, "height": 84.17917209201391, "seed": 1491183381, "groupIds": [], "frameId": null, "roundness": null, "boundElements": [ { "id": "9NBdG6TyrBqjuDKvzAZXN", "type": "text" } ], "updated": 1688312605190, "link": null, "locked": false }, { "type": "text", "version": 456, "versionNonce": 1820291451, "isDeleted": false, "id": "9NBdG6TyrBqjuDKvzAZXN", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 1180.874140249897, "y": 99.51573525083731, "strokeColor": "#000000", "backgroundColor": "#7950f2", "width": 75, "height": 21, "seed": 547748981, "groupIds": [], "frameId": null, "roundness": null, "boundElements": [], "updated": 1688312605190, "link": null, "locked": false, "fontSize": 16, "fontFamily": 1, "text": "Container", "textAlign": "center", "verticalAlign": "middle", "containerId": "rr3YJsAyrrPqZbZ4qXh5x", "originalText": "Container", "lineHeight": 1.3125, "baseline": 15 }, { "type": "rectangle", "version": 1864, "versionNonce": 1889532181, "isDeleted": false, "id": "ji9UBE590YJ5e8DKUKbPG", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 598.874140249897, "y": 67.92614920483035, "strokeColor": "#ff7043", "backgroundColor": "#ff7043", "width": 85, "height": 84.17917209201391, "seed": 200844757, "groupIds": [], "frameId": null, "roundness": null, "boundElements": [ { "id": "2fdy7dRwF7ZVaYPrb1Q7p", "type": "text" } ], "updated": 1688312605190, "link": null, "locked": false }, { "type": "text", "version": 446, "versionNonce": 673810971, "isDeleted": false, "id": "2fdy7dRwF7ZVaYPrb1Q7p", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 603.874140249897, "y": 99.51573525083731, "strokeColor": "#000000", "backgroundColor": "#7950f2", "width": 75, "height": 21, "seed": 1039082293, "groupIds": [], "frameId": null, "roundness": null, "boundElements": [], "updated": 1688312605190, "link": null, "locked": false, "fontSize": 16, "fontFamily": 1, "text": "Container", "textAlign": "center", "verticalAlign": "middle", "containerId": "ji9UBE590YJ5e8DKUKbPG", "originalText": "Container", "lineHeight": 1.3125, "baseline": 15 }, { "type": "rectangle", "version": 1805, "versionNonce": 281677429, "isDeleted": false, "id": "Kod3erHdgkFtJcsgqHeQQ", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 889.874140249897, "y": 67.92614920483035, "strokeColor": "#03a9f4", "backgroundColor": "#03a9f4", "width": 85, "height": 84.17917209201391, "seed": 1525609621, "groupIds": [], "frameId": null, "roundness": null, "boundElements": [ { "id": "vA5QfvPySVZE9WnP6d7dp", "type": "text" } ], "updated": 1688312605190, "link": null, "locked": false }, { "type": "text", "version": 390, "versionNonce": 1682411195, "isDeleted": false, "id": "vA5QfvPySVZE9WnP6d7dp", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 894.874140249897, "y": 99.51573525083731, "strokeColor": "#000000", "backgroundColor": "#7950f2", "width": 75, "height": 21, "seed": 1588435445, "groupIds": [], "frameId": null, "roundness": null, "boundElements": [], "updated": 1688312605190, "link": null, "locked": false, "fontSize": 16, "fontFamily": 1, "text": "Container", "textAlign": "center", "verticalAlign": "middle", "containerId": "Kod3erHdgkFtJcsgqHeQQ", "originalText": "Container", "lineHeight": 1.3125, "baseline": 15 }, { "type": "rectangle", "version": 1916, "versionNonce": 1103787989, "isDeleted": false, "id": "8p_AIsxC6UbaWESkrwl4g", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 1035.874140249897, "y": 67.92614920483035, "strokeColor": "#7950f2", "backgroundColor": "#7950f2", "width": 85, "height": 84.17917209201391, "seed": 842044245, "groupIds": [], "frameId": null, "roundness": null, "boundElements": [ { "id": "AceuLTPujneYjRp_njhIl", "type": "text" } ], "updated": 1688312605190, "link": null, "locked": false }, { "type": "text", "version": 499, "versionNonce": 1132199771, "isDeleted": false, "id": "AceuLTPujneYjRp_njhIl", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 1041.907473074604, "y": 99.51573525083731, "strokeColor": "#000000", "backgroundColor": "#7950f2", "width": 72.93333435058594, "height": 21, "seed": 30236853, "groupIds": [], "frameId": null, "roundness": null, "boundElements": [], "updated": 1688312605190, "link": null, "locked": false, "fontSize": 16, "fontFamily": 1, "text": "Container", "textAlign": "center", "verticalAlign": "middle", "containerId": "8p_AIsxC6UbaWESkrwl4g", "originalText": "Container", "lineHeight": 1.3125, "baseline": 15 } ], "appState": { "gridSize": null, "viewBackgroundColor": "#ffffff" }, "files": {} } ================================================ FILE: docs/images/embeddings.excalidraw ================================================ { "type": "excalidraw", "version": 2, "source": "https://excalidraw.com", "elements": [ { "type": "rectangle", "version": 824, "versionNonce": 1352316119, "isDeleted": false, "id": "U2NgEIEiFpAlwmv5Xnyzr", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 50, "angle": 0, "x": 557.5, "y": 267.30499999999995, "strokeColor": "#7950f2", "backgroundColor": "#7950f2", "width": 995.3286713286714, "height": 339.0000000000001, "seed": 1946478225, "groupIds": [ "C_65R9XVeMQED2nMfIW2D" ], "roundness": null, "boundElements": [], "updated": 1673788613149, "link": "", "locked": false }, { "type": "text", "version": 607, "versionNonce": 590931993, "isDeleted": false, "id": "fbSO8bnZmAdvIXZxBAtHY", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 572.7237762237763, "y": 281.50080419580416, "strokeColor": "#000", "backgroundColor": "#03a9f4", "width": 967, "height": 312, "seed": 1586673137, "groupIds": [ "C_65R9XVeMQED2nMfIW2D" ], "roundness": null, "boundElements": [], "updated": 1673788613149, "link": null, "locked": false, "fontSize": 20.27972027972028, "fontFamily": 1, "text": "Query Best Match\n----------------------------------------------------\nfeel good story Maine man wins $1M from $25 lottery ticket\nclimate change Canada's last fully intact ice shelf has suddenly collapsed, forming a \n Manhattan-sized iceberg\npublic health story US tops 5 million confirmed virus cases\nwar Beijing mobilises invasion craft along coast as Taiwan tensions escalate\nwildlife The National Park Service warns against sacrificing slower friends in a\n bear attack\nasia Beijing mobilises invasion craft along coast as Taiwan tensions escalate\nlucky Maine man wins $1M from $25 lottery ticket\ndishonest junk Make huge profits without work, earn up to $100,000 a day", "baseline": 304, "textAlign": "left", "verticalAlign": "top", "containerId": null, "originalText": "Query Best Match\n----------------------------------------------------\nfeel good story Maine man wins $1M from $25 lottery ticket\nclimate change Canada's last fully intact ice shelf has suddenly collapsed, forming a \n Manhattan-sized iceberg\npublic health story US tops 5 million confirmed virus cases\nwar Beijing mobilises invasion craft along coast as Taiwan tensions escalate\nwildlife The National Park Service warns against sacrificing slower friends in a\n bear attack\nasia Beijing mobilises invasion craft along coast as Taiwan tensions escalate\nlucky Maine man wins $1M from $25 lottery ticket\ndishonest junk Make huge profits without work, earn up to $100,000 a day" }, { "type": "rectangle", "version": 990, "versionNonce": 2000421367, "isDeleted": false, "id": "UO6MS3wSDu7yg2421__LI", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 935, "y": 109, "strokeColor": "#ffeb3b", "backgroundColor": "#ffeb3b", "width": 214, "height": 49, "seed": 1629565989, "groupIds": [ "3sURMvhuRfR0M-Q3VRPbg" ], "roundness": null, "boundElements": [ { "type": "text", "id": "8sp7H8ijWBlh6aMgZ0XTP" }, { "id": "Qzp41i_jzQIBlAB_qFKFH", "type": "arrow" }, { "id": "SJ0F0Y81z9hir5qQWAJjk", "type": "arrow" } ], "updated": 1673788613149, "link": null, "locked": false }, { "type": "rectangle", "version": 1649, "versionNonce": 1978562009, "isDeleted": false, "id": "qYd3q0Vjks7VOHUC9RR51", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 551, "y": 109.5, "strokeColor": "#03a9f4", "backgroundColor": "#03a9f4", "width": 219, "height": 52, "seed": 1441952427, "groupIds": [ "3sURMvhuRfR0M-Q3VRPbg" ], "roundness": null, "boundElements": [ { "type": "text", "id": "WPeWn6N4rCHf0jY16N9Ge" }, { "id": "Qzp41i_jzQIBlAB_qFKFH", "type": "arrow" } ], "updated": 1673788613149, "link": null, "locked": false }, { "type": "text", "version": 1336, "versionNonce": 1274169655, "isDeleted": false, "id": "WPeWn6N4rCHf0jY16N9Ge", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 597.5, "y": 117.5, "strokeColor": "#000", "backgroundColor": "#fa5252", "width": 126, "height": 36, "seed": 870516459, "groupIds": [ "3sURMvhuRfR0M-Q3VRPbg" ], "roundness": null, "boundElements": [], "updated": 1673788618421, "link": null, "locked": false, "fontSize": 28, "fontFamily": 1, "text": "Vectorize", "baseline": 25, "textAlign": "center", "verticalAlign": "middle", "containerId": "qYd3q0Vjks7VOHUC9RR51", "originalText": "Vectorize" }, { "type": "rectangle", "version": 1236, "versionNonce": 368709975, "isDeleted": false, "id": "5VuUdI_BsJ5pyE1nTqJUI", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 1334, "y": 110, "strokeColor": "#00e676", "backgroundColor": "#00e676", "width": 218, "height": 49, "seed": 1044404613, "groupIds": [ "3sURMvhuRfR0M-Q3VRPbg" ], "roundness": null, "boundElements": [ { "id": "bJJ9SGsJsvT071qBBH0w5", "type": "text" }, { "id": "SJ0F0Y81z9hir5qQWAJjk", "type": "arrow" } ], "updated": 1673788613149, "link": null, "locked": false }, { "type": "text", "version": 1420, "versionNonce": 1511737785, "isDeleted": false, "id": "bJJ9SGsJsvT071qBBH0w5", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 1397, "y": 116.5, "strokeColor": "#000", "backgroundColor": "#fa5252", "width": 92, "height": 36, "seed": 128953675, "groupIds": [ "3sURMvhuRfR0M-Q3VRPbg" ], "roundness": null, "boundElements": [], "updated": 1673788618421, "link": null, "locked": false, "fontSize": 28, "fontFamily": 1, "text": "Search", "baseline": 25, "textAlign": "center", "verticalAlign": "middle", "containerId": "5VuUdI_BsJ5pyE1nTqJUI", "originalText": "Search" }, { "type": "text", "version": 981, "versionNonce": 1600260695, "isDeleted": false, "id": "8sp7H8ijWBlh6aMgZ0XTP", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 1004, "y": 115.5, "strokeColor": "#000", "backgroundColor": "transparent", "width": 76, "height": 36, "seed": 1854823263, "groupIds": [ "3sURMvhuRfR0M-Q3VRPbg" ], "roundness": null, "boundElements": [], "updated": 1673788618422, "link": null, "locked": false, "fontSize": 28, "fontFamily": 1, "text": "Index", "baseline": 25, "textAlign": "center", "verticalAlign": "middle", "containerId": "UO6MS3wSDu7yg2421__LI", "originalText": "Index" }, { "type": "text", "version": 420, "versionNonce": 1275651991, "isDeleted": false, "id": "jWJpSXHkTCzRTCA4tbAgv", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 550.5, "y": 189.30499999999995, "strokeColor": "#000", "backgroundColor": "#03a9f4", "width": 296, "height": 42, "seed": 1241563487, "groupIds": [ "3sURMvhuRfR0M-Q3VRPbg" ], "roundness": null, "boundElements": [], "updated": 1673788613150, "link": null, "locked": false, "fontSize": 16, "fontFamily": 1, "text": "- Transform input into numbers\n- Similar concepts have similar values", "baseline": 36, "textAlign": "left", "verticalAlign": "top", "containerId": null, "originalText": "- Transform input into numbers\n- Similar concepts have similar values" }, { "type": "text", "version": 419, "versionNonce": 51134809, "isDeleted": false, "id": "qEnmXs0P_MQE8r4c4OWGh", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 933.5, "y": 188.20749999999992, "strokeColor": "#000", "backgroundColor": "#f44336", "width": 230, "height": 42, "seed": 1038536465, "groupIds": [ "3sURMvhuRfR0M-Q3VRPbg" ], "roundness": null, "boundElements": [], "updated": 1673788613150, "link": null, "locked": false, "fontSize": 16, "fontFamily": 1, "text": "- Save vectors\n- Store content with vectors", "baseline": 36, "textAlign": "left", "verticalAlign": "top", "containerId": null, "originalText": "- Save vectors\n- Store content with vectors" }, { "type": "text", "version": 529, "versionNonce": 869653687, "isDeleted": false, "id": "1q8bzjK8lnKUZj8_A9v7D", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 1245, "y": 191.20749999999992, "strokeColor": "#000", "backgroundColor": "#f44336", "width": 333, "height": 42, "seed": 304472945, "groupIds": [ "3sURMvhuRfR0M-Q3VRPbg" ], "roundness": null, "boundElements": [], "updated": 1673788613150, "link": null, "locked": false, "fontSize": 16, "fontFamily": 1, "text": "- Find similar vectors with cosine similarity\n- Add rule-based filters using content", "baseline": 36, "textAlign": "left", "verticalAlign": "top", "containerId": null, "originalText": "- Find similar vectors with cosine similarity\n- Add rule-based filters using content" }, { "type": "arrow", "version": 1935, "versionNonce": 20011767, "isDeleted": false, "id": "Qzp41i_jzQIBlAB_qFKFH", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 772.5, "y": 131.8470411964629, "strokeColor": "#000", "backgroundColor": "#f44336", "width": 158.1310513485223, "height": 0.5692601572380909, "seed": 660786897, "groupIds": [ "3sURMvhuRfR0M-Q3VRPbg" ], "roundness": { "type": 2 }, "boundElements": [], "updated": 1673788613309, "link": null, "locked": false, "startBinding": { "elementId": "qYd3q0Vjks7VOHUC9RR51", "focus": -0.15367587596362536, "gap": 2.5 }, "endBinding": { "elementId": "UO6MS3wSDu7yg2421__LI", "focus": 0.027437144815141, "gap": 4.3689486514776945 }, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": "arrow", "points": [ [ 0, 0 ], [ 158.1310513485223, 0.5692601572380909 ] ] }, { "type": "arrow", "version": 2084, "versionNonce": 1180230679, "isDeleted": false, "id": "SJ0F0Y81z9hir5qQWAJjk", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 1151.5, "y": 134.67907617019108, "strokeColor": "#000", "backgroundColor": "#f44336", "width": 181.5, "height": 1.5898915058209013, "seed": 899541905, "groupIds": [ "3sURMvhuRfR0M-Q3VRPbg" ], "roundness": { "type": 2 }, "boundElements": [], "updated": 1673788613309, "link": null, "locked": false, "startBinding": { "elementId": "UO6MS3wSDu7yg2421__LI", "focus": 0.08406032225724415, "gap": 2.5 }, "endBinding": { "elementId": "5VuUdI_BsJ5pyE1nTqJUI", "focus": 0.09327847520504394, "gap": 1 }, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": "arrow", "points": [ [ 0, 0 ], [ 181.5, -1.5898915058209013 ] ] } ], "appState": { "gridSize": null, "viewBackgroundColor": "#ffffff" }, "files": {} } ================================================ FILE: docs/images/examples.excalidraw ================================================ { "type": "excalidraw", "version": 2, "source": "https://excalidraw.com", "elements": [ { "type": "rectangle", "version": 658, "versionNonce": 56883131, "isDeleted": false, "id": "28knmXv7UWxhQgfO1Ap_e", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 843.2544325440515, "y": 565.0365494150617, "strokeColor": "#495057", "backgroundColor": "transparent", "width": 131.4290956134916, "height": 19.163298353951955, "seed": 333697800, "groupIds": [], "frameId": null, "roundness": null, "boundElements": [], "updated": 1688314176184, "link": null, "locked": false }, { "type": "rectangle", "version": 759, "versionNonce": 1321514357, "isDeleted": false, "id": "NWB9SsDdfRnKFM7WSMiDD", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 843.094785904982, "y": 596.6192561492705, "strokeColor": "#495057", "backgroundColor": "transparent", "width": 236.33086957943124, "height": 8.353548393013705, "seed": 1799143800, "groupIds": [], "frameId": null, "roundness": null, "boundElements": [], "updated": 1688314176184, "link": null, "locked": false }, { "type": "rectangle", "version": 873, "versionNonce": 1965246555, "isDeleted": false, "id": "o8mRXU1Pv0Lht3Kvw3LjM", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 843.9010630546302, "y": 628.3064112463869, "strokeColor": "#495057", "backgroundColor": "transparent", "width": 172.46335772731075, "height": 10.760364257767161, "seed": 84161032, "groupIds": [], "frameId": null, "roundness": null, "boundElements": [], "updated": 1688314176184, "link": null, "locked": false }, { "type": "rectangle", "version": 852, "versionNonce": 1473789653, "isDeleted": false, "id": "7XXBg9rngZPBd8BY3k-Dk", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 50, "angle": 0, "x": 820.9965701219508, "y": 409.40769144764795, "strokeColor": "#495057", "backgroundColor": "transparent", "width": 772.0068597560986, "height": 326.7582126998559, "seed": 1645597304, "groupIds": [], "frameId": null, "roundness": null, "boundElements": [], "updated": 1688314176185, "link": null, "locked": false }, { "type": "rectangle", "version": 820, "versionNonce": 1609532667, "isDeleted": false, "id": "1djoflJbPq1iZYxeG6vvs", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 843.094785904982, "y": 612.826386710884, "strokeColor": "#495057", "backgroundColor": "transparent", "width": 236.33086957943124, "height": 8.353548393013705, "seed": 1354805512, "groupIds": [], "frameId": null, "roundness": null, "boundElements": [], "updated": 1688314176185, "link": null, "locked": false }, { "type": "rectangle", "version": 1047, "versionNonce": 2087951413, "isDeleted": false, "id": "61SvR173kGBlNiM2hYx-i", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 843.933979602177, "y": 666.8116705347015, "strokeColor": "#495057", "backgroundColor": "transparent", "width": 141.1603070419808, "height": 35.19095464139164, "seed": 497337208, "groupIds": [], "frameId": null, "roundness": { "type": 1 }, "boundElements": [], "updated": 1688314176185, "link": null, "locked": false }, { "type": "text", "version": 1179, "versionNonce": 1800088987, "isDeleted": false, "id": "KweZDYL4pV266i29UQo2O", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 873.1942926679951, "y": 677.4126832647471, "strokeColor": "#495057", "backgroundColor": "transparent", "width": 63, "height": 17, "seed": 244666376, "groupIds": [], "frameId": null, "roundness": null, "boundElements": [], "updated": 1688314176185, "link": null, "locked": false, "fontSize": 12.613804582094819, "fontFamily": 1, "text": "Show more", "textAlign": "left", "verticalAlign": "top", "containerId": null, "originalText": "Show more", "lineHeight": 1.3477297741025214, "baseline": 12 }, { "type": "line", "version": 1055, "versionNonce": 612126101, "isDeleted": false, "id": "6UHr1F9sbQjfc3Pn3AUC9", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 4.71238898038469, "x": 955.8234234136398, "y": 681.4576006557272, "strokeColor": "#495057", "backgroundColor": "transparent", "width": 14.84733876817561, "height": 7.91335492900009, "seed": 1278055544, "groupIds": [], "frameId": null, "roundness": { "type": 2 }, "boundElements": [], "updated": 1688314176185, "link": null, "locked": false, "startBinding": null, "endBinding": null, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": null, "points": [ [ 0, 0 ], [ 7.737815540443359, 7.91335492900009 ], [ 14.84733876817561, 0.6726390980766681 ] ] }, { "type": "rectangle", "version": 1593, "versionNonce": 711508539, "isDeleted": false, "id": "pQ7Helj-RkhLonwDn4uxq", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 878.2965111534407, "y": 493.103306996402, "strokeColor": "#000000", "backgroundColor": "#ff7043", "width": 12.482770172023958, "height": 31.20692543005986, "seed": 1223270520, "groupIds": [ "0ClD8yewsWOTVzkWq2mkc" ], "frameId": null, "roundness": null, "boundElements": [], "updated": 1688314176185, "link": null, "locked": false }, { "type": "line", "version": 806, "versionNonce": 522703605, "isDeleted": false, "id": "bI-A-gEMD-yvyhVvuajAH", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 865.549721104171, "y": 442.4040920739574, "strokeColor": "#000000", "backgroundColor": "#12b886", "width": 0, "height": 87.3793912041677, "seed": 1280885512, "groupIds": [ "0ClD8yewsWOTVzkWq2mkc" ], "frameId": null, "roundness": { "type": 2 }, "boundElements": [], "updated": 1688314176185, "link": null, "locked": false, "startBinding": null, "endBinding": null, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": null, "points": [ [ 0, 0 ], [ 0, 87.3793912041677 ] ] }, { "type": "line", "version": 816, "versionNonce": 640336603, "isDeleted": false, "id": "X3L53b-aDRBM9J_i7xfIb", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 865.3142359887239, "y": 530.6645290499368, "strokeColor": "#000000", "backgroundColor": "#12b886", "width": 181.0001674943474, "height": 0, "seed": 1422840184, "groupIds": [ "0ClD8yewsWOTVzkWq2mkc" ], "frameId": null, "roundness": { "type": 2 }, "boundElements": [], "updated": 1688314176185, "link": null, "locked": false, "startBinding": null, "endBinding": null, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": null, "points": [ [ 0, 0 ], [ 181.0001674943474, 0 ] ] }, { "type": "rectangle", "version": 1768, "versionNonce": 956017749, "isDeleted": false, "id": "Fk7rL2dsob9lBYBWtqIaE", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 909.5034365835013, "y": 461.80381850437016, "strokeColor": "#000000", "backgroundColor": "#ffeb3b", "width": 12.48277017202394, "height": 62.41385086011974, "seed": 1096491528, "groupIds": [ "0ClD8yewsWOTVzkWq2mkc" ], "frameId": null, "roundness": null, "boundElements": [], "updated": 1688314176185, "link": null, "locked": false }, { "type": "rectangle", "version": 1638, "versionNonce": 656650107, "isDeleted": false, "id": "V3UV1tCb1upEwCUx2NF_U", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 940.7103620135604, "y": 480.5279737624063, "strokeColor": "#000000", "backgroundColor": "#03a9f4", "width": 12.482770172023953, "height": 43.68969560208382, "seed": 1860559480, "groupIds": [ "0ClD8yewsWOTVzkWq2mkc" ], "frameId": null, "roundness": null, "boundElements": [], "updated": 1688314176185, "link": null, "locked": false }, { "type": "rectangle", "version": 1650, "versionNonce": 793292539, "isDeleted": false, "id": "H9TFct1zEuli3oFZ0J6Pi", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 971.9172874436208, "y": 474.28658867639376, "strokeColor": "#000000", "backgroundColor": "#7950f2", "width": 12.482770172023953, "height": 49.9310806880958, "seed": 1153016072, "groupIds": [ "0ClD8yewsWOTVzkWq2mkc" ], "frameId": null, "roundness": null, "boundElements": [], "updated": 1688314179109, "link": null, "locked": false }, { "type": "rectangle", "version": 1806, "versionNonce": 1066797083, "isDeleted": false, "id": "Yvl5ZFMXGqZoKBjooHuuL", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 1003.1242128736812, "y": 468.04520359038156, "strokeColor": "#000000", "backgroundColor": "#00e676", "width": 12.48277017202394, "height": 56.172465774107785, "seed": 905106296, "groupIds": [ "0ClD8yewsWOTVzkWq2mkc" ], "frameId": null, "roundness": null, "boundElements": [], "updated": 1688314176185, "link": null, "locked": false }, { "type": "text", "version": 820, "versionNonce": 851598101, "isDeleted": false, "id": "k2RjGkR4842FWeQqwZNqP", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 1140.5405571864028, "y": 441.0898168120742, "strokeColor": "#000000", "backgroundColor": "transparent", "width": 134, "height": 46, "seed": 351902216, "groupIds": [], "frameId": null, "roundness": null, "boundElements": [], "updated": 1688314176185, "link": null, "locked": false, "fontSize": 36, "fontFamily": 1, "text": "Results", "textAlign": "left", "verticalAlign": "top", "containerId": null, "originalText": "Results", "lineHeight": 1.2777777777777777, "baseline": 32 }, { "type": "line", "version": 387, "versionNonce": 359943355, "isDeleted": false, "id": "9BG_CdPIU12PIVg57JFjb", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 1142.5011920133527, "y": 524.9312390735229, "strokeColor": "#000000", "backgroundColor": "#12b886", "width": 387.9702360583586, "height": 0.8663462515423723, "seed": 656392968, "groupIds": [ "RaIXn6GkWfnJt6NeXQ4tC" ], "frameId": null, "roundness": { "type": 2 }, "boundElements": [], "updated": 1688314176185, "link": null, "locked": false, "startBinding": null, "endBinding": null, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": null, "points": [ [ 0, 0 ], [ 387.9702360583586, -0.8663462515423723 ] ] }, { "type": "text", "version": 959, "versionNonce": 1682976885, "isDeleted": false, "id": "ooEpA700FaTXoHICTApB5", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 1147.668463887201, "y": 537.9228771043508, "strokeColor": "#000000", "backgroundColor": "transparent", "width": 10.29019548584314, "height": 17.493332325933352, "seed": 771215736, "groupIds": [ "RaIXn6GkWfnJt6NeXQ4tC" ], "frameId": null, "roundness": null, "boundElements": [], "updated": 1688314176185, "link": null, "locked": false, "fontSize": 12.903833622972927, "fontFamily": 1, "text": "0", "textAlign": "left", "verticalAlign": "top", "containerId": null, "originalText": "0", "lineHeight": 1.3556693953949979, "baseline": 12 }, { "type": "text", "version": 1031, "versionNonce": 1818295643, "isDeleted": false, "id": "n9FAYaIZeMXggjqKvdYxs", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 1147.927904475898, "y": 577.8396648224748, "strokeColor": "#000000", "backgroundColor": "transparent", "width": 4.116078194337256, "height": 17.493332325933352, "seed": 725754376, "groupIds": [ "RaIXn6GkWfnJt6NeXQ4tC" ], "frameId": null, "roundness": null, "boundElements": [], "updated": 1688314176185, "link": null, "locked": false, "fontSize": 12.903833622972932, "fontFamily": 1, "text": "1", "textAlign": "left", "verticalAlign": "top", "containerId": null, "originalText": "1", "lineHeight": 1.3556693953949972, "baseline": 12 }, { "type": "text", "version": 1107, "versionNonce": 1144805845, "isDeleted": false, "id": "fQfYp0ljtX1ESRr5wPM9b", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 1147.927904475895, "y": 625.2941282157817, "strokeColor": "#000000", "backgroundColor": "transparent", "width": 10.29019548584314, "height": 17.493332325933352, "seed": 988734072, "groupIds": [ "RaIXn6GkWfnJt6NeXQ4tC" ], "frameId": null, "roundness": null, "boundElements": [], "updated": 1688314176185, "link": null, "locked": false, "fontSize": 12.903833622972925, "fontFamily": 1, "text": "2", "textAlign": "left", "verticalAlign": "top", "containerId": null, "originalText": "2", "lineHeight": 1.3556693953949979, "baseline": 12 }, { "type": "text", "version": 1015, "versionNonce": 1873713659, "isDeleted": false, "id": "6-kghWfO7GdeQUDWs_zBe", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 1146.639444338615, "y": 501.6706877015755, "strokeColor": "#000000", "backgroundColor": "transparent", "width": 18.522351874517668, "height": 17.493332325933352, "seed": 1505796360, "groupIds": [ "RaIXn6GkWfnJt6NeXQ4tC" ], "frameId": null, "roundness": null, "boundElements": [], "updated": 1688314176185, "link": null, "locked": false, "fontSize": 12.903833622972927, "fontFamily": 1, "text": "ID", "textAlign": "left", "verticalAlign": "top", "containerId": null, "originalText": "ID", "lineHeight": 1.3556693953949979, "baseline": 12 }, { "type": "text", "version": 1024, "versionNonce": 463230773, "isDeleted": false, "id": "UMowu2HWCm_mgorPKfHhX", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 1246.2049260479205, "y": 537.9228771043508, "strokeColor": "#000000", "backgroundColor": "transparent", "width": 54.53803607496866, "height": 17.493332325933352, "seed": 417059704, "groupIds": [ "RaIXn6GkWfnJt6NeXQ4tC" ], "frameId": null, "roundness": null, "boundElements": [], "updated": 1688314176185, "link": null, "locked": false, "fontSize": 12.903833622972925, "fontFamily": 1, "text": "_____", "textAlign": "left", "verticalAlign": "top", "containerId": null, "originalText": "_____", "lineHeight": 1.3556693953949979, "baseline": 12 }, { "type": "text", "version": 1093, "versionNonce": 1012900507, "isDeleted": false, "id": "1_YJ1fjNPFhQdtKIp-DSI", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 1246.464366636618, "y": 577.8396648224748, "strokeColor": "#000000", "backgroundColor": "transparent", "width": 54.53803607496866, "height": 17.493332325933352, "seed": 1377255432, "groupIds": [ "RaIXn6GkWfnJt6NeXQ4tC" ], "frameId": null, "roundness": null, "boundElements": [], "updated": 1688314176185, "link": null, "locked": false, "fontSize": 12.90383362297293, "fontFamily": 1, "text": "_____", "textAlign": "left", "verticalAlign": "top", "containerId": null, "originalText": "_____", "lineHeight": 1.3556693953949974, "baseline": 12 }, { "type": "text", "version": 1168, "versionNonce": 1960159381, "isDeleted": false, "id": "O5wbweMweVsQWwdNABtm7", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 1246.4643666366173, "y": 625.2941282157817, "strokeColor": "#000000", "backgroundColor": "transparent", "width": 43.2188210405412, "height": 17.493332325933352, "seed": 1845666936, "groupIds": [ "RaIXn6GkWfnJt6NeXQ4tC" ], "frameId": null, "roundness": null, "boundElements": [], "updated": 1688314176186, "link": null, "locked": false, "fontSize": 12.903833622972925, "fontFamily": 1, "text": "____", "textAlign": "left", "verticalAlign": "top", "containerId": null, "originalText": "____", "lineHeight": 1.3556693953949979, "baseline": 12 }, { "type": "text", "version": 1068, "versionNonce": 723949371, "isDeleted": false, "id": "v3I-eUd4WMJ8VzvC_7rB_", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 1246.204926047921, "y": 501.6706877015755, "strokeColor": "#000000", "backgroundColor": "transparent", "width": 32.92862555469805, "height": 17.493332325933352, "seed": 1682046728, "groupIds": [ "RaIXn6GkWfnJt6NeXQ4tC" ], "frameId": null, "roundness": null, "boundElements": [], "updated": 1688314176186, "link": null, "locked": false, "fontSize": 12.903833622972927, "fontFamily": 1, "text": "Text", "textAlign": "left", "verticalAlign": "top", "containerId": null, "originalText": "Text", "lineHeight": 1.3556693953949979, "baseline": 12 }, { "type": "text", "version": 1119, "versionNonce": 1906167285, "isDeleted": false, "id": "sdGrFMITxSJsejpV0zIFY", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 1354.7619764110675, "y": 500.94934123472103, "strokeColor": "#000000", "backgroundColor": "transparent", "width": 34.986664651866704, "height": 17.493332325933352, "seed": 161608312, "groupIds": [ "RaIXn6GkWfnJt6NeXQ4tC" ], "frameId": null, "roundness": null, "boundElements": [], "updated": 1688314176186, "link": null, "locked": false, "fontSize": 12.903833622972929, "fontFamily": 1, "text": "Score", "textAlign": "left", "verticalAlign": "top", "containerId": null, "originalText": "Score", "lineHeight": 1.3556693953949976, "baseline": 12 }, { "type": "text", "version": 1042, "versionNonce": 1516632027, "isDeleted": false, "id": "5tPTHdBAi9oW2Cme01uST", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 1356.7782004146586, "y": 539.0230645323406, "strokeColor": "#000000", "backgroundColor": "transparent", "width": 34.986664651866704, "height": 17.493332325933352, "seed": 1702057736, "groupIds": [ "RaIXn6GkWfnJt6NeXQ4tC" ], "frameId": null, "roundness": null, "boundElements": [], "updated": 1688314176186, "link": null, "locked": false, "fontSize": 12.903833622972929, "fontFamily": 1, "text": "0.851", "textAlign": "left", "verticalAlign": "top", "containerId": null, "originalText": "0.851", "lineHeight": 1.3556693953949976, "baseline": 12 }, { "type": "text", "version": 1117, "versionNonce": 1627014997, "isDeleted": false, "id": "VkvcQQqVWAFSBPXeb-631", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 1355.2070363053651, "y": 576.0677682813757, "strokeColor": "#000000", "backgroundColor": "transparent", "width": 40.131762394788254, "height": 17.493332325933352, "seed": 1116261384, "groupIds": [ "RaIXn6GkWfnJt6NeXQ4tC" ], "frameId": null, "roundness": null, "boundElements": [], "updated": 1688314176186, "link": null, "locked": false, "fontSize": 12.903833622972929, "fontFamily": 1, "text": "0.834", "textAlign": "left", "verticalAlign": "top", "containerId": null, "originalText": "0.834", "lineHeight": 1.3556693953949976, "baseline": 12 }, { "type": "text", "version": 1189, "versionNonce": 2016429179, "isDeleted": false, "id": "r5V4P6WY1YTnZqynbGcPW", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 1354.5812615574996, "y": 623.4026675162545, "strokeColor": "#000000", "backgroundColor": "transparent", "width": 38.07372329761963, "height": 17.493332325933352, "seed": 754228488, "groupIds": [ "RaIXn6GkWfnJt6NeXQ4tC" ], "frameId": null, "roundness": null, "boundElements": [], "updated": 1688314176186, "link": null, "locked": false, "fontSize": 12.903833622972924, "fontFamily": 1, "text": "0.653", "textAlign": "left", "verticalAlign": "top", "containerId": null, "originalText": "0.653", "lineHeight": 1.355669395394998, "baseline": 12 }, { "type": "text", "version": 1164, "versionNonce": 356553909, "isDeleted": false, "id": "tplkVB19dZGWPH04eoN3-", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 1452.2813974443563, "y": 501.2965906346964, "strokeColor": "#000000", "backgroundColor": "transparent", "width": 36.01568420045099, "height": 17.493332325933352, "seed": 633774600, "groupIds": [ "RaIXn6GkWfnJt6NeXQ4tC" ], "frameId": null, "roundness": null, "boundElements": [], "updated": 1688314176187, "link": null, "locked": false, "fontSize": 12.903833622972922, "fontFamily": 1, "text": "Data", "textAlign": "left", "verticalAlign": "top", "containerId": null, "originalText": "Data", "lineHeight": 1.3556693953949983, "baseline": 12 }, { "type": "text", "version": 1091, "versionNonce": 1279786267, "isDeleted": false, "id": "-O_818hE0e37gLO8X85ED", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 1456.3974756386945, "y": 536.2832552865634, "strokeColor": "#000000", "backgroundColor": "transparent", "width": 57.6250947207216, "height": 17.493332325933352, "seed": 1316501624, "groupIds": [ "RaIXn6GkWfnJt6NeXQ4tC" ], "frameId": null, "roundness": null, "boundElements": [], "updated": 1688314176187, "link": null, "locked": false, "fontSize": 12.903833622972925, "fontFamily": 1, "text": "{____}", "textAlign": "left", "verticalAlign": "top", "containerId": null, "originalText": "{____}", "lineHeight": 1.3556693953949979, "baseline": 12 }, { "type": "text", "version": 1118, "versionNonce": 2048275989, "isDeleted": false, "id": "q_5ogcScUemL3g6XJyETb", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 1458.4555147358635, "y": 569.2118808412615, "strokeColor": "#000000", "backgroundColor": "transparent", "width": 57.6250947207216, "height": 17.493332325933352, "seed": 649694840, "groupIds": [ "RaIXn6GkWfnJt6NeXQ4tC" ], "frameId": null, "roundness": null, "boundElements": [], "updated": 1688314176187, "link": null, "locked": false, "fontSize": 12.903833622972925, "fontFamily": 1, "text": "{____}", "textAlign": "left", "verticalAlign": "top", "containerId": null, "originalText": "{____}", "lineHeight": 1.3556693953949979, "baseline": 12 }, { "type": "text", "version": 1111, "versionNonce": 35627451, "isDeleted": false, "id": "hePUy_nbNvIpf6_6PBvbq", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 1458.4555147358635, "y": 618.6048191733089, "strokeColor": "#000000", "backgroundColor": "transparent", "width": 57.6250947207216, "height": 17.493332325933352, "seed": 1966475272, "groupIds": [ "RaIXn6GkWfnJt6NeXQ4tC" ], "frameId": null, "roundness": null, "boundElements": [], "updated": 1688314176187, "link": null, "locked": false, "fontSize": 12.903833622972925, "fontFamily": 1, "text": "{____}", "textAlign": "left", "verticalAlign": "top", "containerId": null, "originalText": "{____}", "lineHeight": 1.3556693953949979, "baseline": 12 } ], "appState": { "gridSize": null, "viewBackgroundColor": "#ffffff" }, "files": {} } ================================================ FILE: docs/images/faq.excalidraw ================================================ { "type": "excalidraw", "version": 2, "source": "https://excalidraw.com", "elements": [ { "type": "ellipse", "version": 236, "versionNonce": 266746677, "isDeleted": false, "id": "pLSV--mc1HnQu2oyARr4o", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 593.4166948358098, "y": -409.48426474916266, "strokeColor": "#ff7043", "backgroundColor": "#ff7043", "width": 100, "height": 92, "seed": 513066133, "groupIds": [ "agHSoiBea3xOkqCmOPa91", "dKeNjYUtPsZHi90BFF7EY" ], "frameId": null, "roundness": { "type": 2 }, "boundElements": [], "updated": 1688313196175, "link": null, "locked": false }, { "type": "text", "version": 281, "versionNonce": 1964375707, "isDeleted": false, "id": "n2qByon4fJyq2-FZxQqM0", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 629.5250284234563, "y": -397.41226924952065, "strokeColor": "#ff7043", "backgroundColor": "#2e303e", "width": 29.78333282470704, "height": 79.856009000716, "seed": 852571451, "groupIds": [ "agHSoiBea3xOkqCmOPa91", "dKeNjYUtPsZHi90BFF7EY" ], "frameId": null, "roundness": null, "boundElements": [], "updated": 1688313196175, "link": null, "locked": false, "fontSize": 63.88480720057278, "fontFamily": 1, "text": "?", "textAlign": "left", "verticalAlign": "top", "containerId": null, "originalText": "?", "lineHeight": 1.25, "baseline": 56 }, { "type": "ellipse", "version": 288, "versionNonce": 239275157, "isDeleted": false, "id": "QIRNCrcWupjspegQn6VDV", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 744.4166948358098, "y": -409.48426474916266, "strokeColor": "#ffeb3b", "backgroundColor": "#ffeb3b", "width": 100, "height": 92, "seed": 1498649243, "groupIds": [ "yonc0SR8jSwQLwuVvb9hG", "dKeNjYUtPsZHi90BFF7EY" ], "frameId": null, "roundness": { "type": 2 }, "boundElements": [], "updated": 1688313196175, "link": null, "locked": false }, { "type": "text", "version": 372, "versionNonce": 1982180597, "isDeleted": false, "id": "a-CPMQS9ApW4f7p9FjS8m", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 780.5250284234563, "y": -397.41226924952065, "strokeColor": "#ffd43b", "backgroundColor": "#ffeb3b", "width": 29.78333282470704, "height": 79.856009000716, "seed": 1679591227, "groupIds": [ "yonc0SR8jSwQLwuVvb9hG", "dKeNjYUtPsZHi90BFF7EY" ], "frameId": null, "roundness": null, "boundElements": [], "updated": 1688313209123, "link": null, "locked": false, "fontSize": 63.88480720057278, "fontFamily": 1, "text": "?", "textAlign": "left", "verticalAlign": "top", "containerId": null, "originalText": "?", "lineHeight": 1.25, "baseline": 56 }, { "type": "ellipse", "version": 326, "versionNonce": 1069462005, "isDeleted": false, "id": "nsYgl7js3PjMKKTKcLYf1", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 895.4166948358098, "y": -409.48426474916266, "strokeColor": "#03a9f4", "backgroundColor": "#03a9f4", "width": 100, "height": 92, "seed": 384682581, "groupIds": [ "M7MHx-aSMrw_q1mhaAuG6", "dKeNjYUtPsZHi90BFF7EY" ], "frameId": null, "roundness": { "type": 2 }, "boundElements": [], "updated": 1688313196175, "link": null, "locked": false }, { "type": "text", "version": 371, "versionNonce": 641029083, "isDeleted": false, "id": "a_CBcLegs1ZMstjO9f4bx", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 931.5250284234563, "y": -397.41226924952065, "strokeColor": "#03a9f4", "backgroundColor": "#03a9f4", "width": 29.78333282470704, "height": 79.856009000716, "seed": 1741404085, "groupIds": [ "M7MHx-aSMrw_q1mhaAuG6", "dKeNjYUtPsZHi90BFF7EY" ], "frameId": null, "roundness": null, "boundElements": [], "updated": 1688313196175, "link": null, "locked": false, "fontSize": 63.88480720057278, "fontFamily": 1, "text": "?", "textAlign": "left", "verticalAlign": "top", "containerId": null, "originalText": "?", "lineHeight": 1.25, "baseline": 56 }, { "type": "ellipse", "version": 328, "versionNonce": 22981461, "isDeleted": false, "id": "l88Tj2DPIgTUA2Pij6hPA", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 1046.4166948358097, "y": -409.48426474916266, "strokeColor": "#7950f2", "backgroundColor": "#7950f2", "width": 100, "height": 92, "seed": 701954677, "groupIds": [ "vFCiKG5iTbg2K5S2WQSsn", "dKeNjYUtPsZHi90BFF7EY" ], "frameId": null, "roundness": { "type": 2 }, "boundElements": [], "updated": 1688313196175, "link": null, "locked": false }, { "type": "text", "version": 373, "versionNonce": 1236541563, "isDeleted": false, "id": "oqmBYU6SpmJhOl9d1JLJy", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 1082.5250284234562, "y": -397.41226924952065, "strokeColor": "#7950f2", "backgroundColor": "#7950f2", "width": 29.78333282470704, "height": 79.856009000716, "seed": 39445461, "groupIds": [ "vFCiKG5iTbg2K5S2WQSsn", "dKeNjYUtPsZHi90BFF7EY" ], "frameId": null, "roundness": null, "boundElements": [], "updated": 1688313196175, "link": null, "locked": false, "fontSize": 63.88480720057278, "fontFamily": 1, "text": "?", "textAlign": "left", "verticalAlign": "top", "containerId": null, "originalText": "?", "lineHeight": 1.25, "baseline": 56 }, { "type": "ellipse", "version": 329, "versionNonce": 1937275061, "isDeleted": false, "id": "IFZTHrjZaEk7Avy2zkma-", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 1197.4166948358097, "y": -409.48426474916266, "strokeColor": "#00e676", "backgroundColor": "#00e676", "width": 100, "height": 92, "seed": 894921429, "groupIds": [ "70qQx7edpvpMfsvEta9mo", "dKeNjYUtPsZHi90BFF7EY" ], "frameId": null, "roundness": { "type": 2 }, "boundElements": [], "updated": 1688313196175, "link": null, "locked": false }, { "type": "text", "version": 374, "versionNonce": 2029017371, "isDeleted": false, "id": "p0xoCeTYO1ZVy-xm2oET3", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 1233.5250284234562, "y": -397.41226924952065, "strokeColor": "#00e676", "backgroundColor": "#00e676", "width": 29.78333282470704, "height": 79.856009000716, "seed": 959268917, "groupIds": [ "70qQx7edpvpMfsvEta9mo", "dKeNjYUtPsZHi90BFF7EY" ], "frameId": null, "roundness": null, "boundElements": [], "updated": 1688313196175, "link": null, "locked": false, "fontSize": 63.88480720057278, "fontFamily": 1, "text": "?", "textAlign": "left", "verticalAlign": "top", "containerId": null, "originalText": "?", "lineHeight": 1.25, "baseline": 56 } ], "appState": { "gridSize": null, "viewBackgroundColor": "#ffffff" }, "files": {} } ================================================ FILE: docs/images/flows.excalidraw ================================================ { "type": "excalidraw", "version": 2, "source": "https://excalidraw.com", "elements": [ { "type": "text", "version": 603, "versionNonce": 1883892902, "isDeleted": false, "id": "Buic2Lx427wuSIW8P_Rw5", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 714, "y": 141, "strokeColor": "#000000", "backgroundColor": "#228be6", "width": 532, "height": 46, "seed": 373648901, "groupIds": [], "roundness": null, "boundElements": [], "updated": 1673791247706, "link": null, "locked": false, "fontSize": 36, "fontFamily": 1, "text": "What are semantic workflows?", "baseline": 32, "textAlign": "left", "verticalAlign": "top", "containerId": null, "originalText": "What are semantic workflows?" }, { "type": "rectangle", "version": 1671, "versionNonce": 51714234, "isDeleted": false, "id": "qYd3q0Vjks7VOHUC9RR51", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 425, "y": 339.5, "strokeColor": "#00e676", "backgroundColor": "#00e676", "width": 290.0000000000001, "height": 46, "seed": 1441952427, "groupIds": [], "roundness": null, "boundElements": [ { "type": "text", "id": "WPeWn6N4rCHf0jY16N9Ge" } ], "updated": 1673791247706, "link": null, "locked": false }, { "type": "arrow", "version": 323, "versionNonce": 921790438, "isDeleted": false, "id": "tgnQzXC9s8RY4oImBOXuB", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 562.5285999651802, "y": 272.4578674923631, "strokeColor": "#000", "backgroundColor": "#228be6", "width": 0.4714000348197942, "height": 72.5421325076369, "seed": 650463755, "groupIds": [], "roundness": { "type": 2 }, "boundElements": [], "updated": 1673791247706, "link": null, "locked": false, "startBinding": { "elementId": "B435ajoI5vAQBDvzkd8aY", "focus": 0.05963733900881362, "gap": 5.457867492363107 }, "endBinding": null, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": "arrow", "points": [ [ 0, 0 ], [ 0.4714000348197942, 72.5421325076369 ] ] }, { "type": "text", "version": 1359, "versionNonce": 2032529786, "isDeleted": false, "id": "WPeWn6N4rCHf0jY16N9Ge", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 430, "y": 344.5, "strokeColor": "#000000", "backgroundColor": "#fa5252", "width": 280, "height": 36, "seed": 870516459, "groupIds": [], "roundness": null, "boundElements": [], "updated": 1673791247706, "link": null, "locked": false, "fontSize": 28, "fontFamily": 1, "text": "Translate", "baseline": 25, "textAlign": "center", "verticalAlign": "middle", "containerId": "qYd3q0Vjks7VOHUC9RR51", "originalText": "Translate" }, { "type": "rectangle", "version": 180, "versionNonce": 159873830, "isDeleted": false, "id": "IxMxusRKX2PpnJT1uY0cC", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 819, "y": 217.5, "strokeColor": "#ffeb3b", "backgroundColor": "#ffeb3b", "width": 290.0000000000001, "height": 49, "seed": 1364021803, "groupIds": [], "roundness": null, "boundElements": [ { "type": "text", "id": "SSXjX_URKvcVC8h-Qgz4j" }, { "id": "_YNRYTAnVzdhhKxFtkVyg", "type": "arrow" } ], "updated": 1673791247706, "link": null, "locked": false }, { "type": "text", "version": 85, "versionNonce": 1360900666, "isDeleted": false, "id": "SSXjX_URKvcVC8h-Qgz4j", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 824, "y": 224, "strokeColor": "#000000", "backgroundColor": "#82c91e", "width": 280, "height": 36, "seed": 948915563, "groupIds": [], "roundness": null, "boundElements": [], "updated": 1673791247706, "link": null, "locked": false, "fontSize": 28, "fontFamily": 1, "text": "Extract Text", "baseline": 25, "textAlign": "center", "verticalAlign": "middle", "containerId": "IxMxusRKX2PpnJT1uY0cC", "originalText": "Extract Text" }, { "type": "rectangle", "version": 217, "versionNonce": 511256166, "isDeleted": false, "id": "lATIKISgJPOGnUHleuFRH", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 821, "y": 338, "strokeColor": "#03a9f4", "backgroundColor": "#03a9f4", "width": 290.0000000000001, "height": 46, "seed": 1256311595, "groupIds": [], "roundness": null, "boundElements": [ { "id": "21WiUuyDtpQ9FnJ74REpJ", "type": "text" }, { "id": "Q51e0Hav-kYfjX3b8tt-r", "type": "arrow" } ], "updated": 1673791247706, "link": null, "locked": false }, { "type": "text", "version": 96, "versionNonce": 2111789818, "isDeleted": false, "id": "21WiUuyDtpQ9FnJ74REpJ", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 826, "y": 343, "strokeColor": "#000000", "backgroundColor": "#fa5252", "width": 280, "height": 36, "seed": 626266373, "groupIds": [], "roundness": null, "boundElements": [], "updated": 1673791247706, "link": null, "locked": false, "fontSize": 28, "fontFamily": 1, "text": "Summarize", "baseline": 25, "textAlign": "center", "verticalAlign": "middle", "containerId": "lATIKISgJPOGnUHleuFRH", "originalText": "Summarize" }, { "type": "arrow", "version": 162, "versionNonce": 1452501414, "isDeleted": false, "id": "_YNRYTAnVzdhhKxFtkVyg", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 962.2340210300499, "y": 270, "strokeColor": "#000", "backgroundColor": "#228be6", "width": 0.2659789699500834, "height": 75, "seed": 101249451, "groupIds": [], "roundness": { "type": 2 }, "boundElements": [], "updated": 1673791247706, "link": null, "locked": false, "startBinding": { "elementId": "IxMxusRKX2PpnJT1uY0cC", "focus": 0.012856281024868153, "gap": 3.5 }, "endBinding": null, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": "arrow", "points": [ [ 0, 0 ], [ 0.2659789699500834, 75 ] ] }, { "type": "rectangle", "version": 225, "versionNonce": 502131642, "isDeleted": false, "id": "6B3J0wJfi561c9gMsgtXG", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 824, "y": 455, "strokeColor": "#7950f2", "backgroundColor": "#7950f2", "width": 290.0000000000001, "height": 46, "seed": 1164190923, "groupIds": [], "roundness": null, "boundElements": [ { "id": "qJDqmDEZF9Ewh2UrjXq5Z", "type": "text" }, { "id": "Q51e0Hav-kYfjX3b8tt-r", "type": "arrow" } ], "updated": 1673791247706, "link": null, "locked": false }, { "type": "text", "version": 147, "versionNonce": 1896039654, "isDeleted": false, "id": "qJDqmDEZF9Ewh2UrjXq5Z", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 829, "y": 460, "strokeColor": "#000000", "backgroundColor": "#fa5252", "width": 280, "height": 36, "seed": 1307943269, "groupIds": [], "roundness": null, "boundElements": [], "updated": 1673791247706, "link": null, "locked": false, "fontSize": 28, "fontFamily": 1, "text": "Build Vector Index", "baseline": 25, "textAlign": "center", "verticalAlign": "middle", "containerId": "6B3J0wJfi561c9gMsgtXG", "originalText": "Build Vector Index" }, { "type": "arrow", "version": 215, "versionNonce": 124544122, "isDeleted": false, "id": "Q51e0Hav-kYfjX3b8tt-r", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 963.600775377322, "y": 387.5, "strokeColor": "#000", "backgroundColor": "#228be6", "width": 0.29392358842710564, "height": 66.5, "seed": 59173285, "groupIds": [], "roundness": { "type": 2 }, "boundElements": [], "updated": 1673791247706, "link": null, "locked": false, "startBinding": { "elementId": "lATIKISgJPOGnUHleuFRH", "focus": 0.015253485349599789, "gap": 3.5 }, "endBinding": { "elementId": "6B3J0wJfi561c9gMsgtXG", "focus": -0.039966641220930875, "gap": 1 }, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": "arrow", "points": [ [ 0, 0 ], [ -0.29392358842710564, 66.5 ] ] }, { "type": "rectangle", "version": 1258, "versionNonce": 1062582310, "isDeleted": false, "id": "5VuUdI_BsJ5pyE1nTqJUI", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 1195, "y": 221, "strokeColor": "#7950f2", "backgroundColor": "#7950f2", "width": 290.0000000000001, "height": 46, "seed": 1044404613, "groupIds": [], "roundness": null, "boundElements": [ { "id": "bJJ9SGsJsvT071qBBH0w5", "type": "text" }, { "id": "Q51e0Hav-kYfjX3b8tt-r", "type": "arrow" } ], "updated": 1673791247706, "link": null, "locked": false }, { "type": "text", "version": 1443, "versionNonce": 1411237178, "isDeleted": false, "id": "bJJ9SGsJsvT071qBBH0w5", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 1200, "y": 226, "strokeColor": "#000000", "backgroundColor": "#fa5252", "width": 280, "height": 36, "seed": 128953675, "groupIds": [], "roundness": null, "boundElements": [], "updated": 1673791247706, "link": null, "locked": false, "fontSize": 28, "fontFamily": 1, "text": "Run similarity query", "baseline": 25, "textAlign": "center", "verticalAlign": "middle", "containerId": "5VuUdI_BsJ5pyE1nTqJUI", "originalText": "Run similarity query" }, { "type": "rectangle", "version": 359, "versionNonce": 1029930726, "isDeleted": false, "id": "pSbgtf1qAB7tWl-pTld7e", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 1197, "y": 343, "strokeColor": "#ff7043", "backgroundColor": "#ff7043", "width": 290.0000000000001, "height": 46, "seed": 210689733, "groupIds": [], "roundness": null, "boundElements": [ { "id": "pQzKSM3audka1kiQm_Sku", "type": "text" }, { "id": "Q51e0Hav-kYfjX3b8tt-r", "type": "arrow" }, { "id": "k67YzXNtt1GLh4i8Es5zJ", "type": "arrow" } ], "updated": 1673791251799, "link": null, "locked": false }, { "type": "text", "version": 320, "versionNonce": 660337658, "isDeleted": false, "id": "pQzKSM3audka1kiQm_Sku", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 1219.5, "y": 348, "strokeColor": "#000", "backgroundColor": "#fa5252", "width": 245, "height": 36, "seed": 1869028363, "groupIds": [], "roundness": null, "boundElements": [], "updated": 1673791256722, "link": null, "locked": false, "fontSize": 28, "fontFamily": 1, "text": "Send notifications", "baseline": 25, "textAlign": "center", "verticalAlign": "middle", "containerId": "pSbgtf1qAB7tWl-pTld7e", "originalText": "Send notifications" }, { "type": "arrow", "version": 329, "versionNonce": 251216122, "isDeleted": false, "id": "k67YzXNtt1GLh4i8Es5zJ", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 1339.1171359117898, "y": 265.5, "strokeColor": "#000", "backgroundColor": "#228be6", "width": 1.0119492860442278, "height": 76.5, "seed": 1656816747, "groupIds": [], "roundness": { "type": 2 }, "boundElements": [], "updated": 1673791249246, "link": null, "locked": false, "startBinding": null, "endBinding": { "elementId": "pSbgtf1qAB7tWl-pTld7e", "focus": -0.010690950588675632, "gap": 1 }, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": "arrow", "points": [ [ 0, 0 ], [ 1.0119492860442278, 76.5 ] ] }, { "type": "text", "version": 320, "versionNonce": 407191226, "isDeleted": false, "id": "kKPPLMCj8QQIJLbW-B5hm", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 1180, "y": 451, "strokeColor": "#000", "backgroundColor": "#fab005", "width": 296, "height": 52, "seed": 1079990731, "groupIds": [], "roundness": null, "boundElements": [], "updated": 1673791247706, "link": null, "locked": false, "fontSize": 20, "fontFamily": 1, "text": "- API bindings for JavaScript,\n Rust, Go and Java", "baseline": 44, "textAlign": "left", "verticalAlign": "top", "containerId": null, "originalText": "- API bindings for JavaScript,\n Rust, Go and Java" }, { "type": "text", "version": 572, "versionNonce": 1757323750, "isDeleted": false, "id": "1AQ3rj-V4weRtPA8a5-z-", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 437.5, "y": 449.5, "strokeColor": "#000", "backgroundColor": "#fab005", "width": 278, "height": 52, "seed": 836512075, "groupIds": [], "roundness": null, "boundElements": [], "updated": 1673791247706, "link": null, "locked": false, "fontSize": 20, "fontFamily": 1, "text": "- Build with Python or YAML\n- Run local or via API", "baseline": 44, "textAlign": "left", "verticalAlign": "top", "containerId": null, "originalText": "- Build with Python or YAML\n- Run local or via API" }, { "type": "rectangle", "version": 304, "versionNonce": 23156602, "isDeleted": false, "id": "B435ajoI5vAQBDvzkd8aY", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 426, "y": 221, "strokeColor": "#03a9f4", "backgroundColor": "#03a9f4", "width": 290.0000000000001, "height": 46, "seed": 942981672, "groupIds": [], "roundness": null, "boundElements": [ { "id": "AoGnxEHn4x-zq2-0C0VrT", "type": "text" }, { "id": "Q51e0Hav-kYfjX3b8tt-r", "type": "arrow" }, { "id": "tgnQzXC9s8RY4oImBOXuB", "type": "arrow" } ], "updated": 1673791247706, "link": null, "locked": false }, { "type": "text", "version": 183, "versionNonce": 1100838182, "isDeleted": false, "id": "AoGnxEHn4x-zq2-0C0VrT", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 431, "y": 226, "strokeColor": "#000000", "backgroundColor": "#fa5252", "width": 280, "height": 36, "seed": 604886104, "groupIds": [], "roundness": null, "boundElements": [], "updated": 1673791247706, "link": null, "locked": false, "fontSize": 28, "fontFamily": 1, "text": "Summarize", "baseline": 25, "textAlign": "center", "verticalAlign": "middle", "containerId": "B435ajoI5vAQBDvzkd8aY", "originalText": "Summarize" } ], "appState": { "gridSize": null, "viewBackgroundColor": "#3030" }, "files": {} } ================================================ FILE: docs/images/format.excalidraw ================================================ { "type": "excalidraw", "version": 2, "source": "https://excalidraw.com", "elements": [ { "type": "rectangle", "version": 564, "versionNonce": 555415043, "index": "aZV", "isDeleted": false, "id": "c8q6be5K81xmCJ-ukSRnZ", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 518, "y": -130.82625000000002, "strokeColor": "#03a9f4", "backgroundColor": "#03a9f4", "width": 261, "height": 183, "seed": 443859885, "groupIds": [ "2ZFdM1ONmx5lh5aHlDXqH" ], "frameId": null, "roundness": null, "boundElements": [], "updated": 1724584360105, "link": null, "locked": false }, { "type": "text", "version": 518, "versionNonce": 1253679021, "index": "aa", "isDeleted": false, "id": "uKvu2zcnLI9S3D-aFErJQ", "fillStyle": "hachure", "strokeWidth": 2, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 616.6083333333333, "y": -129.82625000000002, "strokeColor": "#1e1e1e", "backgroundColor": "#03a9f4", "width": 53.78333333333333, "height": 35, "seed": 848196621, "groupIds": [ "2ZFdM1ONmx5lh5aHlDXqH" ], "frameId": null, "roundness": null, "boundElements": [], "updated": 1724584360105, "link": null, "locked": false, "fontSize": 28, "fontFamily": 5, "text": "ANN", "textAlign": "left", "verticalAlign": "top", "containerId": null, "originalText": "ANN", "autoResize": true, "lineHeight": 1.25 }, { "type": "text", "version": 480, "versionNonce": 944506682, "index": "ab", "isDeleted": false, "id": "DCqImwTRCBh0nmPIv-3zR", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 536.8249969482422, "y": -85.17374999999998, "strokeColor": "#1e1e1e", "backgroundColor": "#03a9f4", "width": 213.35000610351562, "height": 125, "seed": 1618350307, "groupIds": [ "2ZFdM1ONmx5lh5aHlDXqH" ], "frameId": null, "roundness": null, "boundElements": [], "updated": 1724587151711, "link": null, "locked": false, "fontSize": 20, "fontFamily": 5, "text": "- Faiss\n- HNSWLib\n- Annoy\n- NumPy\n- Postgres (pgvector)", "textAlign": "left", "verticalAlign": "top", "containerId": null, "originalText": "- Faiss\n- HNSWLib\n- Annoy\n- NumPy\n- Postgres (pgvector)", "autoResize": true, "lineHeight": 1.25 }, { "type": "rectangle", "version": 780, "versionNonce": 1085123203, "index": "abV", "isDeleted": false, "id": "Jo5LO2bSebtz54pWb3XHT", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 810.5, "y": -131.82625000000002, "strokeColor": "#00e676", "backgroundColor": "#00e676", "width": 282.9999999999999, "height": 182, "seed": 848620899, "groupIds": [ "hCyHVfABhzmE_8xEhUbGD" ], "frameId": null, "roundness": null, "boundElements": [], "updated": 1724584372798, "link": null, "locked": false }, { "type": "text", "version": 717, "versionNonce": 708473645, "index": "ac", "isDeleted": false, "id": "wX4udz8nceRrNC3uUDLYf", "fillStyle": "hachure", "strokeWidth": 2, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 886.2583312988281, "y": -121.82625000000002, "strokeColor": "#1e1e1e", "backgroundColor": "#03a9f4", "width": 131.48333740234375, "height": 35, "seed": 16206083, "groupIds": [ "hCyHVfABhzmE_8xEhUbGD" ], "frameId": null, "roundness": null, "boundElements": [], "updated": 1724584372798, "link": null, "locked": false, "fontSize": 28, "fontFamily": 5, "text": "Database", "textAlign": "left", "verticalAlign": "top", "containerId": null, "originalText": "Database", "autoResize": true, "lineHeight": 1.25 }, { "type": "text", "version": 722, "versionNonce": 1181998310, "index": "ad", "isDeleted": false, "id": "-5HdWsMbtE_7EnREtXBfc", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 831.1999969482422, "y": -77.17374999999998, "strokeColor": "#1e1e1e", "backgroundColor": "#03a9f4", "width": 241.60000610351562, "height": 75, "seed": 1976381603, "groupIds": [ "hCyHVfABhzmE_8xEhUbGD" ], "frameId": null, "roundness": null, "boundElements": [], "updated": 1724587158353, "link": null, "locked": false, "fontSize": 20, "fontFamily": 5, "text": "- SQLite\n- DuckDB\n- Postgres (SQLAlchemy)", "textAlign": "left", "verticalAlign": "top", "containerId": null, "originalText": "- SQLite\n- DuckDB\n- Postgres (SQLAlchemy)", "autoResize": true, "lineHeight": 1.25 }, { "type": "rectangle", "version": 900, "versionNonce": 1747610221, "index": "adV", "isDeleted": false, "id": "AK1nt4FrnmjinS2uZVZ2g", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 1125, "y": -132.32625000000002, "strokeColor": "#ffeb3b", "backgroundColor": "#ffeb3b", "width": 266.9999999999999, "height": 180, "seed": 961514755, "groupIds": [ "KOc5NUqUBL03A8qlCEUgw" ], "frameId": null, "roundness": null, "boundElements": [], "updated": 1724584372798, "link": null, "locked": false }, { "type": "text", "version": 828, "versionNonce": 1440317155, "index": "ae", "isDeleted": false, "id": "yeFldcgggtNS8S44yWdtq", "fillStyle": "hachure", "strokeWidth": 2, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 1218.7916666666667, "y": -129.32625000000002, "strokeColor": "#1e1e1e", "backgroundColor": "#ffeb3b", "width": 79.41666666666667, "height": 35, "seed": 41743523, "groupIds": [ "KOc5NUqUBL03A8qlCEUgw" ], "frameId": null, "roundness": null, "boundElements": [], "updated": 1724584372798, "link": null, "locked": false, "fontSize": 28, "fontFamily": 5, "text": "Graph", "textAlign": "left", "verticalAlign": "top", "containerId": null, "originalText": "Graph", "autoResize": true, "lineHeight": 1.25 }, { "type": "text", "version": 937, "versionNonce": 766285005, "index": "af", "isDeleted": false, "id": "FSHrZ4DMc-ubEPZmgY7M3", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 1128.8999938964844, "y": -78.67374999999998, "strokeColor": "#1e1e1e", "backgroundColor": "#ffeb3b", "width": 259.20001220703125, "height": 75, "seed": 2094819395, "groupIds": [ "KOc5NUqUBL03A8qlCEUgw" ], "frameId": null, "roundness": null, "boundElements": [], "updated": 1724584372798, "link": null, "locked": false, "fontSize": 20, "fontFamily": 5, "text": "- NetworkX (MessagePack)\n- Postgres (grand-graph)\n", "textAlign": "left", "verticalAlign": "top", "containerId": null, "originalText": "- NetworkX (MessagePack)\n- Postgres (grand-graph)\n", "autoResize": true, "lineHeight": 1.25 }, { "type": "rectangle", "version": 784, "versionNonce": 868973283, "index": "afV", "isDeleted": false, "id": "C_7EOPOUjQk0kE44hF9WC", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 1423.5, "y": -130.82625000000002, "strokeColor": "#9775fa", "backgroundColor": "#9775fa", "width": 290.9999999999999, "height": 174, "seed": 606086243, "groupIds": [ "-E2Q3MkgggNSCRYPrSvhr" ], "frameId": null, "roundness": null, "boundElements": [], "updated": 1724584154381, "link": null, "locked": false }, { "type": "text", "version": 712, "versionNonce": 1558730957, "index": "ag", "isDeleted": false, "id": "nbfo2fj-5l1xNK7r9SnT8", "fillStyle": "hachure", "strokeWidth": 2, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 1520.2333335876465, "y": -127.82625000000002, "strokeColor": "#1e1e1e", "backgroundColor": "#ffeb3b", "width": 97.53333282470703, "height": 35, "seed": 704880643, "groupIds": [ "-E2Q3MkgggNSCRYPrSvhr" ], "frameId": null, "roundness": null, "boundElements": [], "updated": 1724584154381, "link": null, "locked": false, "fontSize": 28, "fontFamily": 5, "text": "Scoring", "textAlign": "left", "verticalAlign": "top", "containerId": null, "originalText": "Scoring", "autoResize": true, "lineHeight": 1.25 }, { "type": "text", "version": 832, "versionNonce": 2104267693, "index": "ah", "isDeleted": false, "id": "6a1UVKWBLqgnaAvPSuSfw", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 1431.6750030517578, "y": -76.17374999999998, "strokeColor": "#1e1e1e", "backgroundColor": "#ffeb3b", "width": 274.6499938964844, "height": 75, "seed": 154576803, "groupIds": [ "-E2Q3MkgggNSCRYPrSvhr" ], "frameId": null, "roundness": null, "boundElements": [], "updated": 1724584347937, "link": null, "locked": false, "fontSize": 20, "fontFamily": 5, "text": "- Local index (MessagePack)\n- Postgres\n", "textAlign": "left", "verticalAlign": "top", "containerId": null, "originalText": "- Local index (MessagePack)\n- Postgres\n", "autoResize": true, "lineHeight": 1.25 } ], "appState": { "gridSize": 20, "gridStep": 5, "gridModeEnabled": false, "viewBackgroundColor": "#ffffff" }, "files": {} } ================================================ FILE: docs/images/further.excalidraw ================================================ { "type": "excalidraw", "version": 2, "source": "https://excalidraw.com", "elements": [ { "type": "rectangle", "version": 4646, "versionNonce": 1614521653, "isDeleted": false, "id": "8q8ph1hOgu4FaaN7bUDtj", "fillStyle": "solid", "strokeWidth": 2, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 570.5486656509088, "y": 207.02732362163974, "strokeColor": "#ff7043", "backgroundColor": "#fff", "width": 70.67858069123133, "height": 107.25081879410921, "seed": 971860443, "groupIds": [ "EGaO0Qgusro1bOGa76Zij" ], "frameId": null, "roundness": null, "boundElements": [], "updated": 1688312379126, "link": null, "locked": false }, { "type": "rectangle", "version": 4695, "versionNonce": 245642395, "isDeleted": false, "id": "NLF6LLOv1mkjsqrOC4cKB", "fillStyle": "solid", "strokeWidth": 2, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 563.0012326151946, "y": 200.00723433592552, "strokeColor": "#ff7043", "backgroundColor": "#fff", "width": 70.67858069123133, "height": 107.25081879410921, "seed": 435779195, "groupIds": [ "zukIlM5pM--6gudqqavs2", "EGaO0Qgusro1bOGa76Zij" ], "frameId": null, "roundness": null, "boundElements": [], "updated": 1688312379126, "link": null, "locked": false }, { "type": "rectangle", "version": 4793, "versionNonce": 1146828437, "isDeleted": false, "id": "b0FmtXHqO4i5gHXFgOxF6", "fillStyle": "solid", "strokeWidth": 2, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 553.6061433294792, "y": 191.75332808592574, "strokeColor": "#ff7043", "backgroundColor": "#fff", "width": 70.67858069123133, "height": 107.25081879410921, "seed": 35319579, "groupIds": [ "EGaO0Qgusro1bOGa76Zij" ], "frameId": null, "roundness": null, "boundElements": [], "updated": 1688312379126, "link": null, "locked": false }, { "type": "line", "version": 3917, "versionNonce": 722004283, "isDeleted": false, "id": "XIBG5MqpMzO91s-zg9RV6", "fillStyle": "solid", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 565.5925699163902, "y": 240.5005651332784, "strokeColor": "#ff7043", "backgroundColor": "#fff", "width": 46.57983585730082, "height": 3.249953844290203, "seed": 616846267, "groupIds": [ "EGaO0Qgusro1bOGa76Zij" ], "frameId": null, "roundness": { "type": 2 }, "boundElements": [], "updated": 1688312379126, "link": null, "locked": false, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": null, "points": [ [ 0, 0 ], [ 40.42449133807562, 0.1573930526684746 ], [ 46.57983585730082, -3.0925607916217284 ] ] }, { "type": "line", "version": 3943, "versionNonce": 356621301, "isDeleted": false, "id": "Ak-qKgUN0py0hmrGe0aKl", "fillStyle": "solid", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 567.4478221544045, "y": 209.03527783443982, "strokeColor": "#ff7043", "backgroundColor": "#fff", "width": 45.567415680676426, "height": 2.8032978840147194, "seed": 1108878427, "groupIds": [ "EGaO0Qgusro1bOGa76Zij" ], "frameId": null, "roundness": { "type": 2 }, "boundElements": [], "updated": 1688312379126, "link": null, "locked": false, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": null, "points": [ [ 0, 0 ], [ 16.832548902953302, -2.8032978840147194 ], [ 45.567415680676426, -0.3275477042019195 ] ] }, { "type": "line", "version": 3967, "versionNonce": 5917339, "isDeleted": false, "id": "1CwtdEiRMjK88DqZoUvFn", "fillStyle": "solid", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 567.3712226380351, "y": 276.4296521047969, "strokeColor": "#ff7043", "backgroundColor": "#fff", "width": 48.33668263438425, "height": 4.280657518731036, "seed": 1911087355, "groupIds": [ "EGaO0Qgusro1bOGa76Zij" ], "frameId": null, "roundness": { "type": 2 }, "boundElements": [], "updated": 1688312375231, "link": null, "locked": false, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": null, "points": [ [ 0, 0 ], [ 26.41225578429045, -0.2552319773002338 ], [ 37.62000339651456, 2.3153712935189787 ], [ 48.33668263438425, -1.9652862252120569 ] ] }, { "type": "line", "version": 4004, "versionNonce": 20545685, "isDeleted": false, "id": "EtF5vn14aeWzQBbD8KpKB", "fillStyle": "solid", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 564.1871754565047, "y": 287.6728005130757, "strokeColor": "#ff7043", "backgroundColor": "#fff", "width": 54.40694982784246, "height": 2.9096445412231735, "seed": 767101339, "groupIds": [ "EGaO0Qgusro1bOGa76Zij" ], "frameId": null, "roundness": { "type": 2 }, "boundElements": [], "updated": 1688312375231, "link": null, "locked": false, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": null, "points": [ [ 0, 0 ], [ 10.166093050596771, -1.166642430373031 ], [ 16.130660965377448, -0.8422655250909383 ], [ 46.26079588567538, 0.6125567455206506 ], [ 54.40694982784246, -2.297087795702523 ] ] }, { "type": "line", "version": 3970, "versionNonce": 237898203, "isDeleted": false, "id": "ETXFUxT0kF7UCijUSi0-u", "fillStyle": "solid", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 564.4720183520706, "y": 224.09587938770105, "strokeColor": "#ff7043", "backgroundColor": "#fff", "width": 46.92865289294453, "height": 2.4757501798128, "seed": 448470587, "groupIds": [ "EGaO0Qgusro1bOGa76Zij" ], "frameId": null, "roundness": { "type": 2 }, "boundElements": [], "updated": 1688312379126, "link": null, "locked": false, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": null, "points": [ [ 0, 0 ], [ 18.193786115221407, -0.5912874140789839 ], [ 46.92865289294453, 1.884462765733816 ] ] }, { "type": "line", "version": 3984, "versionNonce": 1942398453, "isDeleted": false, "id": "lLMO9OdzPiwVIPsiVJjmS", "fillStyle": "solid", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 564.5795065482209, "y": 258.57920842354054, "strokeColor": "#ff7043", "backgroundColor": "#fff", "width": 46.92865289294453, "height": 2.4757501798128, "seed": 332504795, "groupIds": [ "EGaO0Qgusro1bOGa76Zij" ], "frameId": null, "roundness": { "type": 2 }, "boundElements": [], "updated": 1688312375231, "link": null, "locked": false, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": null, "points": [ [ 0, 0 ], [ 8.093938105125233, 1.4279702913643746 ], [ 18.193786115221407, -0.5912874140789839 ], [ 46.92865289294453, 1.884462765733816 ] ] }, { "type": "rectangle", "version": 4612, "versionNonce": 232169435, "isDeleted": false, "id": "SfdOgKSw2Op0gU02ZRj6h", "fillStyle": "solid", "strokeWidth": 2, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 724.2986656509086, "y": 207.02732362163974, "strokeColor": "#ffeb3b", "backgroundColor": "#fff", "width": 70.67858069123133, "height": 107.25081879410921, "seed": 1938754427, "groupIds": [ "qd_DFmKp5LE3bMw3Mtfo2" ], "frameId": null, "roundness": null, "boundElements": [], "updated": 1688312375231, "link": null, "locked": false }, { "type": "rectangle", "version": 4661, "versionNonce": 1481121621, "isDeleted": false, "id": "WZq_8J-B599FxSuSWhMOY", "fillStyle": "solid", "strokeWidth": 2, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 716.7512326151943, "y": 200.00723433592552, "strokeColor": "#ffeb3b", "backgroundColor": "#fff", "width": 70.67858069123133, "height": 107.25081879410921, "seed": 506848283, "groupIds": [ "12jDSGpPx8NMUTpKIVTRd", "qd_DFmKp5LE3bMw3Mtfo2" ], "frameId": null, "roundness": null, "boundElements": [], "updated": 1688312375231, "link": null, "locked": false }, { "type": "rectangle", "version": 4759, "versionNonce": 1196158075, "isDeleted": false, "id": "6UAiKf3P2B8oHIqxMXP9B", "fillStyle": "solid", "strokeWidth": 2, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 707.3561433294792, "y": 191.75332808592574, "strokeColor": "#ffeb3b", "backgroundColor": "#fff", "width": 70.67858069123133, "height": 107.25081879410921, "seed": 1270393019, "groupIds": [ "qd_DFmKp5LE3bMw3Mtfo2" ], "frameId": null, "roundness": null, "boundElements": [], "updated": 1688312375231, "link": null, "locked": false }, { "type": "line", "version": 3883, "versionNonce": 353594549, "isDeleted": false, "id": "PtmQWP_l-8IxINtiwRUhu", "fillStyle": "solid", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 719.3425699163902, "y": 240.5005651332784, "strokeColor": "#ffeb3b", "backgroundColor": "#fff", "width": 46.57983585730082, "height": 3.249953844290203, "seed": 297392475, "groupIds": [ "qd_DFmKp5LE3bMw3Mtfo2" ], "frameId": null, "roundness": { "type": 2 }, "boundElements": [], "updated": 1688312375231, "link": null, "locked": false, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": null, "points": [ [ 0, 0 ], [ 40.42449133807562, 0.1573930526684746 ], [ 46.57983585730082, -3.0925607916217284 ] ] }, { "type": "line", "version": 3909, "versionNonce": 162440475, "isDeleted": false, "id": "Q3WDbscMJKM8bJF79W-YD", "fillStyle": "solid", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 721.1978221544043, "y": 209.03527783443982, "strokeColor": "#ffeb3b", "backgroundColor": "#fff", "width": 45.567415680676426, "height": 2.8032978840147194, "seed": 764147195, "groupIds": [ "qd_DFmKp5LE3bMw3Mtfo2" ], "frameId": null, "roundness": { "type": 2 }, "boundElements": [], "updated": 1688312375231, "link": null, "locked": false, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": null, "points": [ [ 0, 0 ], [ 16.832548902953302, -2.8032978840147194 ], [ 45.567415680676426, -0.3275477042019195 ] ] }, { "type": "line", "version": 3934, "versionNonce": 701369877, "isDeleted": false, "id": "BS2q9beMoZf_-aYf5c8vt", "fillStyle": "solid", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 721.1212226380351, "y": 276.42965210479707, "strokeColor": "#ffeb3b", "backgroundColor": "#fff", "width": 48.33668263438425, "height": 4.280657518731036, "seed": 238124699, "groupIds": [ "qd_DFmKp5LE3bMw3Mtfo2" ], "frameId": null, "roundness": { "type": 2 }, "boundElements": [], "updated": 1688312375231, "link": null, "locked": false, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": null, "points": [ [ 0, 0 ], [ 26.41225578429045, -0.2552319773002338 ], [ 37.62000339651456, 2.3153712935189787 ], [ 48.33668263438425, -1.9652862252120569 ] ] }, { "type": "line", "version": 3971, "versionNonce": 745621947, "isDeleted": false, "id": "oMYU6FaZy7M2x90y9S3Pt", "fillStyle": "solid", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 717.9371754565047, "y": 287.67280051307563, "strokeColor": "#ffeb3b", "backgroundColor": "#fff", "width": 54.40694982784246, "height": 2.9096445412231735, "seed": 509408059, "groupIds": [ "qd_DFmKp5LE3bMw3Mtfo2" ], "frameId": null, "roundness": { "type": 2 }, "boundElements": [], "updated": 1688312375231, "link": null, "locked": false, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": null, "points": [ [ 0, 0 ], [ 10.166093050596771, -1.166642430373031 ], [ 16.130660965377448, -0.8422655250909383 ], [ 46.26079588567538, 0.6125567455206506 ], [ 54.40694982784246, -2.297087795702523 ] ] }, { "type": "line", "version": 3936, "versionNonce": 91063157, "isDeleted": false, "id": "PHkrZ4_UHjutk9XZElk_e", "fillStyle": "solid", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 718.2220183520706, "y": 224.09587938770105, "strokeColor": "#ffeb3b", "backgroundColor": "#fff", "width": 46.92865289294453, "height": 2.4757501798128, "seed": 1316017115, "groupIds": [ "qd_DFmKp5LE3bMw3Mtfo2" ], "frameId": null, "roundness": { "type": 2 }, "boundElements": [], "updated": 1688312375231, "link": null, "locked": false, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": null, "points": [ [ 0, 0 ], [ 18.193786115221407, -0.5912874140789839 ], [ 46.92865289294453, 1.884462765733816 ] ] }, { "type": "line", "version": 3951, "versionNonce": 1958107739, "isDeleted": false, "id": "Dorr3dO30jUkk79PopIf_", "fillStyle": "solid", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 718.3295065482209, "y": 258.57920842354076, "strokeColor": "#ffeb3b", "backgroundColor": "#fff", "width": 46.92865289294453, "height": 2.4757501798128, "seed": 1585422459, "groupIds": [ "qd_DFmKp5LE3bMw3Mtfo2" ], "frameId": null, "roundness": { "type": 2 }, "boundElements": [], "updated": 1688312375232, "link": null, "locked": false, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": null, "points": [ [ 0, 0 ], [ 8.093938105125233, 1.4279702913643746 ], [ 18.193786115221407, -0.5912874140789839 ], [ 46.92865289294453, 1.884462765733816 ] ] }, { "type": "rectangle", "version": 4610, "versionNonce": 989091157, "isDeleted": false, "id": "Jt4NvPMeLg6mhAlXKpPrq", "fillStyle": "solid", "strokeWidth": 2, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 878.0486656509083, "y": 207.02732362163974, "strokeColor": "#03a9f4", "backgroundColor": "#fff", "width": 70.67858069123133, "height": 107.25081879410921, "seed": 1749801243, "groupIds": [ "oifR3K1EkgRm0woSpwQEe" ], "frameId": null, "roundness": null, "boundElements": [], "updated": 1688312379126, "link": null, "locked": false }, { "type": "rectangle", "version": 4659, "versionNonce": 1562777211, "isDeleted": false, "id": "WgPx2gWYtu6RU7xJArs4i", "fillStyle": "solid", "strokeWidth": 2, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 870.5012326151941, "y": 200.00723433592552, "strokeColor": "#03a9f4", "backgroundColor": "#fff", "width": 70.67858069123133, "height": 107.25081879410921, "seed": 692583867, "groupIds": [ "Tzrs5VR8o2cJk5khXPvuR", "oifR3K1EkgRm0woSpwQEe" ], "frameId": null, "roundness": null, "boundElements": [], "updated": 1688312379126, "link": null, "locked": false }, { "type": "rectangle", "version": 4757, "versionNonce": 13165237, "isDeleted": false, "id": "O11wLzeZnprhHXZl10o-v", "fillStyle": "solid", "strokeWidth": 2, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 861.106143329479, "y": 191.75332808592574, "strokeColor": "#03a9f4", "backgroundColor": "#fff", "width": 70.67858069123133, "height": 107.25081879410921, "seed": 211193435, "groupIds": [ "oifR3K1EkgRm0woSpwQEe" ], "frameId": null, "roundness": null, "boundElements": [], "updated": 1688312379126, "link": null, "locked": false }, { "type": "line", "version": 3881, "versionNonce": 1991479067, "isDeleted": false, "id": "HZv1jZzJjUlbXWXKpVgPq", "fillStyle": "solid", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 873.09256991639, "y": 240.5005651332784, "strokeColor": "#03a9f4", "backgroundColor": "#fff", "width": 46.57983585730082, "height": 3.249953844290203, "seed": 1963592443, "groupIds": [ "oifR3K1EkgRm0woSpwQEe" ], "frameId": null, "roundness": { "type": 2 }, "boundElements": [], "updated": 1688312379126, "link": null, "locked": false, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": null, "points": [ [ 0, 0 ], [ 40.42449133807562, 0.1573930526684746 ], [ 46.57983585730082, -3.0925607916217284 ] ] }, { "type": "line", "version": 3907, "versionNonce": 728024085, "isDeleted": false, "id": "z9PwounZ94sfL3PfhSiQ4", "fillStyle": "solid", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 874.9478221544041, "y": 209.03527783443982, "strokeColor": "#03a9f4", "backgroundColor": "#fff", "width": 45.567415680676426, "height": 2.8032978840147194, "seed": 1193603995, "groupIds": [ "oifR3K1EkgRm0woSpwQEe" ], "frameId": null, "roundness": { "type": 2 }, "boundElements": [], "updated": 1688312379126, "link": null, "locked": false, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": null, "points": [ [ 0, 0 ], [ 16.832548902953302, -2.8032978840147194 ], [ 45.567415680676426, -0.3275477042019195 ] ] }, { "type": "line", "version": 3931, "versionNonce": 557651003, "isDeleted": false, "id": "Imd8yI2Lf6IQozPsseCce", "fillStyle": "solid", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 874.8712226380349, "y": 276.4296521047971, "strokeColor": "#03a9f4", "backgroundColor": "#fff", "width": 48.33668263438425, "height": 4.280657518731036, "seed": 857327675, "groupIds": [ "oifR3K1EkgRm0woSpwQEe" ], "frameId": null, "roundness": { "type": 2 }, "boundElements": [], "updated": 1688312375232, "link": null, "locked": false, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": null, "points": [ [ 0, 0 ], [ 26.41225578429045, -0.2552319773002338 ], [ 37.62000339651456, 2.3153712935189787 ], [ 48.33668263438425, -1.9652862252120569 ] ] }, { "type": "line", "version": 3968, "versionNonce": 1811521781, "isDeleted": false, "id": "ZH0iHUr1LSzlOrGkrE_Bg", "fillStyle": "solid", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 871.6871754565045, "y": 287.6728005130757, "strokeColor": "#03a9f4", "backgroundColor": "#fff", "width": 54.40694982784246, "height": 2.9096445412231735, "seed": 1053493467, "groupIds": [ "oifR3K1EkgRm0woSpwQEe" ], "frameId": null, "roundness": { "type": 2 }, "boundElements": [], "updated": 1688312375232, "link": null, "locked": false, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": null, "points": [ [ 0, 0 ], [ 10.166093050596771, -1.166642430373031 ], [ 16.130660965377448, -0.8422655250909383 ], [ 46.26079588567538, 0.6125567455206506 ], [ 54.40694982784246, -2.297087795702523 ] ] }, { "type": "line", "version": 3934, "versionNonce": 1028191163, "isDeleted": false, "id": "uboh1NQ170zSPaXGoSESZ", "fillStyle": "solid", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 871.9720183520703, "y": 224.09587938770105, "strokeColor": "#03a9f4", "backgroundColor": "#fff", "width": 46.92865289294453, "height": 2.4757501798128, "seed": 264975739, "groupIds": [ "oifR3K1EkgRm0woSpwQEe" ], "frameId": null, "roundness": { "type": 2 }, "boundElements": [], "updated": 1688312379126, "link": null, "locked": false, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": null, "points": [ [ 0, 0 ], [ 18.193786115221407, -0.5912874140789839 ], [ 46.92865289294453, 1.884462765733816 ] ] }, { "type": "line", "version": 3948, "versionNonce": 1717811797, "isDeleted": false, "id": "XLAGZPKHS8Xwt91UpHZ_0", "fillStyle": "solid", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 872.0795065482207, "y": 258.57920842354076, "strokeColor": "#03a9f4", "backgroundColor": "#fff", "width": 46.92865289294453, "height": 2.4757501798128, "seed": 1969016347, "groupIds": [ "oifR3K1EkgRm0woSpwQEe" ], "frameId": null, "roundness": { "type": 2 }, "boundElements": [], "updated": 1688312375232, "link": null, "locked": false, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": null, "points": [ [ 0, 0 ], [ 8.093938105125233, 1.4279702913643746 ], [ 18.193786115221407, -0.5912874140789839 ], [ 46.92865289294453, 1.884462765733816 ] ] }, { "type": "rectangle", "version": 4634, "versionNonce": 1854636411, "isDeleted": false, "id": "nZq35Q3L6H0QkeG3Vht31", "fillStyle": "solid", "strokeWidth": 2, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 1031.7986656509083, "y": 207.02732362163974, "strokeColor": "#7950f2", "backgroundColor": "#fff", "width": 70.67858069123133, "height": 107.25081879410921, "seed": 846353083, "groupIds": [ "0RC_0jv9T44Fq-XFPghb3" ], "frameId": null, "roundness": null, "boundElements": [], "updated": 1688312375232, "link": null, "locked": false }, { "type": "rectangle", "version": 4683, "versionNonce": 855877557, "isDeleted": false, "id": "rrNUqmwxf5WDNs8Qq_9z0", "fillStyle": "solid", "strokeWidth": 2, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 1024.251232615194, "y": 200.00723433592552, "strokeColor": "#7950f2", "backgroundColor": "#fff", "width": 70.67858069123133, "height": 107.25081879410921, "seed": 128861019, "groupIds": [ "v87qT3LfamP0fU18OmjI3", "0RC_0jv9T44Fq-XFPghb3" ], "frameId": null, "roundness": null, "boundElements": [], "updated": 1688312375232, "link": null, "locked": false }, { "type": "rectangle", "version": 4781, "versionNonce": 775133723, "isDeleted": false, "id": "hNZNlkh5OnGJBY8xlIc5t", "fillStyle": "solid", "strokeWidth": 2, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 1014.8561433294788, "y": 191.75332808592574, "strokeColor": "#7950f2", "backgroundColor": "#fff", "width": 70.67858069123133, "height": 107.25081879410921, "seed": 2103648251, "groupIds": [ "0RC_0jv9T44Fq-XFPghb3" ], "frameId": null, "roundness": null, "boundElements": [], "updated": 1688312375232, "link": null, "locked": false }, { "type": "line", "version": 3905, "versionNonce": 872343829, "isDeleted": false, "id": "KBAv7GqAYMFEu-wMY1mh0", "fillStyle": "solid", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 1026.8425699163897, "y": 240.5005651332784, "strokeColor": "#7950f2", "backgroundColor": "#fff", "width": 46.57983585730082, "height": 3.249953844290203, "seed": 1838377115, "groupIds": [ "0RC_0jv9T44Fq-XFPghb3" ], "frameId": null, "roundness": { "type": 2 }, "boundElements": [], "updated": 1688312375232, "link": null, "locked": false, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": null, "points": [ [ 0, 0 ], [ 40.42449133807562, 0.1573930526684746 ], [ 46.57983585730082, -3.0925607916217284 ] ] }, { "type": "line", "version": 3931, "versionNonce": 1049599675, "isDeleted": false, "id": "vBzGi3sNPrEnLnXkuTDui", "fillStyle": "solid", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 1028.697822154404, "y": 209.03527783443982, "strokeColor": "#7950f2", "backgroundColor": "#fff", "width": 45.567415680676426, "height": 2.8032978840147194, "seed": 1307680059, "groupIds": [ "0RC_0jv9T44Fq-XFPghb3" ], "frameId": null, "roundness": { "type": 2 }, "boundElements": [], "updated": 1688312375232, "link": null, "locked": false, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": null, "points": [ [ 0, 0 ], [ 16.832548902953302, -2.8032978840147194 ], [ 45.567415680676426, -0.3275477042019195 ] ] }, { "type": "line", "version": 3956, "versionNonce": 1791688309, "isDeleted": false, "id": "riv_87Cqm5BHH4qwhzob4", "fillStyle": "solid", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 1028.6212226380346, "y": 276.4296521047971, "strokeColor": "#7950f2", "backgroundColor": "#fff", "width": 48.33668263438425, "height": 4.280657518731036, "seed": 970997211, "groupIds": [ "0RC_0jv9T44Fq-XFPghb3" ], "frameId": null, "roundness": { "type": 2 }, "boundElements": [], "updated": 1688312375232, "link": null, "locked": false, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": null, "points": [ [ 0, 0 ], [ 26.41225578429045, -0.2552319773002338 ], [ 37.62000339651456, 2.3153712935189787 ], [ 48.33668263438425, -1.9652862252120569 ] ] }, { "type": "line", "version": 3993, "versionNonce": 977407835, "isDeleted": false, "id": "1xxX3pYb1rx-QZk3jub6u", "fillStyle": "solid", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 1025.4371754565043, "y": 287.6728005130757, "strokeColor": "#7950f2", "backgroundColor": "#fff", "width": 54.40694982784246, "height": 2.9096445412231735, "seed": 1014090363, "groupIds": [ "0RC_0jv9T44Fq-XFPghb3" ], "frameId": null, "roundness": { "type": 2 }, "boundElements": [], "updated": 1688312375232, "link": null, "locked": false, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": null, "points": [ [ 0, 0 ], [ 10.166093050596771, -1.166642430373031 ], [ 16.130660965377448, -0.8422655250909383 ], [ 46.26079588567538, 0.6125567455206506 ], [ 54.40694982784246, -2.297087795702523 ] ] }, { "type": "line", "version": 3958, "versionNonce": 237856725, "isDeleted": false, "id": "bU8iYByqD3dp76KNxsOad", "fillStyle": "solid", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 1025.72201835207, "y": 224.09587938770105, "strokeColor": "#7950f2", "backgroundColor": "#fff", "width": 46.92865289294453, "height": 2.4757501798128, "seed": 1516815131, "groupIds": [ "0RC_0jv9T44Fq-XFPghb3" ], "frameId": null, "roundness": { "type": 2 }, "boundElements": [], "updated": 1688312375232, "link": null, "locked": false, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": null, "points": [ [ 0, 0 ], [ 18.193786115221407, -0.5912874140789839 ], [ 46.92865289294453, 1.884462765733816 ] ] }, { "type": "line", "version": 3973, "versionNonce": 1134390267, "isDeleted": false, "id": "VY-TfARiaY6LsV7-7rNWC", "fillStyle": "solid", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 1025.8295065482205, "y": 258.57920842354076, "strokeColor": "#7950f2", "backgroundColor": "#fff", "width": 46.92865289294453, "height": 2.4757501798128, "seed": 1547151291, "groupIds": [ "0RC_0jv9T44Fq-XFPghb3" ], "frameId": null, "roundness": { "type": 2 }, "boundElements": [], "updated": 1688312375232, "link": null, "locked": false, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": null, "points": [ [ 0, 0 ], [ 8.093938105125233, 1.4279702913643746 ], [ 18.193786115221407, -0.5912874140789839 ], [ 46.92865289294453, 1.884462765733816 ] ] }, { "type": "rectangle", "version": 4712, "versionNonce": 628244187, "isDeleted": false, "id": "C72Ce5baIVqALdI-P3OM9", "fillStyle": "solid", "strokeWidth": 2, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 1185.548665650909, "y": 207.0273236216395, "strokeColor": "#00e676", "backgroundColor": "#fff", "width": 70.67858069123133, "height": 107.25081879410921, "seed": 1476458587, "groupIds": [ "siLS3_RDwEMKtYUwaaxN-" ], "frameId": null, "roundness": null, "boundElements": [], "updated": 1688312381572, "link": null, "locked": false }, { "type": "rectangle", "version": 4761, "versionNonce": 1358557269, "isDeleted": false, "id": "zBGI80VXFpYU7OXFGK6J6", "fillStyle": "solid", "strokeWidth": 2, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 1178.0012326151943, "y": 200.0072343359253, "strokeColor": "#00e676", "backgroundColor": "#fff", "width": 70.67858069123133, "height": 107.25081879410921, "seed": 2073507067, "groupIds": [ "oYL8Phei164CeEqvr9yAQ", "siLS3_RDwEMKtYUwaaxN-" ], "frameId": null, "roundness": null, "boundElements": [], "updated": 1688312381572, "link": null, "locked": false }, { "type": "rectangle", "version": 4859, "versionNonce": 1209772923, "isDeleted": false, "id": "cfu4VQtjnsxUMcZwQ5sVW", "fillStyle": "solid", "strokeWidth": 2, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 1168.6061433294788, "y": 191.75332808592574, "strokeColor": "#00e676", "backgroundColor": "#fff", "width": 70.67858069123133, "height": 107.25081879410921, "seed": 377281947, "groupIds": [ "siLS3_RDwEMKtYUwaaxN-" ], "frameId": null, "roundness": null, "boundElements": [], "updated": 1688312381572, "link": null, "locked": false }, { "type": "line", "version": 3983, "versionNonce": 425926069, "isDeleted": false, "id": "LmvD8cAN6D1U7VGelLV01", "fillStyle": "solid", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 1180.5925699163897, "y": 240.50056513327817, "strokeColor": "#00e676", "backgroundColor": "#fff", "width": 46.57983585730082, "height": 3.249953844290203, "seed": 1675915835, "groupIds": [ "siLS3_RDwEMKtYUwaaxN-" ], "frameId": null, "roundness": { "type": 2 }, "boundElements": [], "updated": 1688312381572, "link": null, "locked": false, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": null, "points": [ [ 0, 0 ], [ 40.42449133807562, 0.1573930526684746 ], [ 46.57983585730082, -3.0925607916217284 ] ] }, { "type": "line", "version": 4009, "versionNonce": 2016314395, "isDeleted": false, "id": "uIr1YsstwF_3qU4vEy2_0", "fillStyle": "solid", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 1182.4478221544048, "y": 209.0352778344396, "strokeColor": "#00e676", "backgroundColor": "#fff", "width": 45.567415680676426, "height": 2.8032978840147194, "seed": 347108059, "groupIds": [ "siLS3_RDwEMKtYUwaaxN-" ], "frameId": null, "roundness": { "type": 2 }, "boundElements": [], "updated": 1688312381572, "link": null, "locked": false, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": null, "points": [ [ 0, 0 ], [ 16.832548902953302, -2.8032978840147194 ], [ 45.567415680676426, -0.3275477042019195 ] ] }, { "type": "line", "version": 4034, "versionNonce": 1258266389, "isDeleted": false, "id": "soobVx_Prpyrf1BH-snOZ", "fillStyle": "solid", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 1182.3712226380346, "y": 276.4296521047969, "strokeColor": "#00e676", "backgroundColor": "#fff", "width": 48.33668263438425, "height": 4.280657518731036, "seed": 426991483, "groupIds": [ "siLS3_RDwEMKtYUwaaxN-" ], "frameId": null, "roundness": { "type": 2 }, "boundElements": [], "updated": 1688312381572, "link": null, "locked": false, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": null, "points": [ [ 0, 0 ], [ 26.41225578429045, -0.2552319773002338 ], [ 37.62000339651456, 2.3153712935189787 ], [ 48.33668263438425, -1.9652862252120569 ] ] }, { "type": "line", "version": 4071, "versionNonce": 496847035, "isDeleted": false, "id": "wrQ8ziTQK2vkQdM0O4J3M", "fillStyle": "solid", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 1179.1871754565043, "y": 287.67280051307546, "strokeColor": "#00e676", "backgroundColor": "#fff", "width": 54.40694982784246, "height": 2.9096445412231735, "seed": 745519131, "groupIds": [ "siLS3_RDwEMKtYUwaaxN-" ], "frameId": null, "roundness": { "type": 2 }, "boundElements": [], "updated": 1688312381572, "link": null, "locked": false, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": null, "points": [ [ 0, 0 ], [ 10.166093050596771, -1.166642430373031 ], [ 16.130660965377448, -0.8422655250909383 ], [ 46.26079588567538, 0.6125567455206506 ], [ 54.40694982784246, -2.297087795702523 ] ] }, { "type": "line", "version": 4036, "versionNonce": 205845621, "isDeleted": false, "id": "yZFGBgkE8nXDy8E9XYuQO", "fillStyle": "solid", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 1179.47201835207, "y": 224.09587938770082, "strokeColor": "#00e676", "backgroundColor": "#fff", "width": 46.92865289294453, "height": 2.4757501798128, "seed": 462945467, "groupIds": [ "siLS3_RDwEMKtYUwaaxN-" ], "frameId": null, "roundness": { "type": 2 }, "boundElements": [], "updated": 1688312381572, "link": null, "locked": false, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": null, "points": [ [ 0, 0 ], [ 18.193786115221407, -0.5912874140789839 ], [ 46.92865289294453, 1.884462765733816 ] ] }, { "type": "line", "version": 4051, "versionNonce": 2114615643, "isDeleted": false, "id": "eReA_XFu0cSYQhyG-92nc", "fillStyle": "solid", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 1179.5795065482205, "y": 258.57920842354054, "strokeColor": "#00e676", "backgroundColor": "#fff", "width": 46.92865289294453, "height": 2.4757501798128, "seed": 1385082203, "groupIds": [ "siLS3_RDwEMKtYUwaaxN-" ], "frameId": null, "roundness": { "type": 2 }, "boundElements": [], "updated": 1688312381572, "link": null, "locked": false, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": null, "points": [ [ 0, 0 ], [ 8.093938105125233, 1.4279702913643746 ], [ 18.193786115221407, -0.5912874140789839 ], [ 46.92865289294453, 1.884462765733816 ] ] } ], "appState": { "gridSize": null, "viewBackgroundColor": "#ffffff" }, "files": {} } ================================================ FILE: docs/images/indexing.excalidraw ================================================ { "type": "excalidraw", "version": 2, "source": "https://excalidraw.com", "elements": [ { "id": "gu5_xiA8n2ahi-4mERgcR", "type": "rectangle", "x": 1278.0454545454545, "y": 234.17792970816078, "width": 319.99999999999983, "height": 286.9999999999999, "angle": 0, "strokeColor": "#00e676", "backgroundColor": "#00e676", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 60, "groupIds": [], "strokeSharpness": "sharp", "seed": 260819005, "version": 681, "versionNonce": 1445045779, "isDeleted": false, "boundElements": [ { "id": "ZyDOc33tL-6s4vwoFYHAq", "type": "arrow" }, { "id": "5m7yE6eTRKFfSCbA82DrS", "type": "arrow" }, { "id": "-D1yDTCnqxjTFsQU0jBjN", "type": "arrow" } ], "updated": 1641735984230 }, { "type": "line", "version": 5110, "versionNonce": 785100115, "isDeleted": false, "id": "BcQI3V6na893s7vMmTK-q", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 1000.4754963515834, "y": 217.20853592127958, "strokeColor": "#03a9f4", "backgroundColor": "#03a9f4", "width": 77.09201683999922, "height": 99.49948667804088, "seed": 597539171, "groupIds": [ "ByeZbnpBUniBnZxL1XiMR", "YDUnsOO4AJOXnzopVjTfR" ], "strokeSharpness": "round", "boundElements": [], "updated": 1641735908376, "startBinding": null, "endBinding": null, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": null, "points": [ [ 0, 0 ], [ 0.2542098813493443, 75.20117273657175 ], [ 0.011896425679918422, 83.76249969444815 ], [ 3.970409367559332, 87.46174320643391 ], [ 17.75573317066317, 90.59250103325854 ], [ 41.05683533152865, 91.56737225214069 ], [ 63.319497586673116, 90.01084754868091 ], [ 75.14781395923075, 86.28844687220405 ], [ 76.81603792670788, 83.15042405259751 ], [ 77.05033394391478, 76.25776215104557 ], [ 76.86643881413028, 6.3089586511537865 ], [ 76.45188016352971, -0.2999144698665015 ], [ 71.50179495549581, -3.9936571317850627 ], [ 61.077971898861186, -6.132877429442784 ], [ 37.32348754161154, -7.932114425900202 ], [ 18.278415656797975, -6.859225353587373 ], [ 3.2995959613238286, -3.2201165291205287 ], [ -0.04168289608444441, -0.045185660461322996 ], [ 0, 0 ] ] }, { "type": "line", "version": 2808, "versionNonce": 923714909, "isDeleted": false, "id": "UFrJE_4GqAUPwzMDMeN7A", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 998.9994257898584, "y": 272.3348195790094, "strokeColor": "#03a9f4", "backgroundColor": "transparent", "width": 77.17198221193564, "height": 8.562348957853038, "seed": 960447053, "groupIds": [ "ByeZbnpBUniBnZxL1XiMR", "YDUnsOO4AJOXnzopVjTfR" ], "strokeSharpness": "round", "boundElements": [], "updated": 1641735908377, "startBinding": null, "endBinding": null, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": null, "points": [ [ 0, 0 ], [ 2.033150371639873, 3.413095389435587 ], [ 10.801287372573954, 6.276651055277943 ], [ 22.468666942209353, 8.010803051612635 ], [ 40.747074201802775, 8.168828515515864 ], [ 62.077348233027564, 7.0647721921469495 ], [ 74.53446931782398, 3.04824021069218 ], [ 77.17198221193564, -0.3935204423371723 ] ] }, { "type": "line", "version": 2894, "versionNonce": 439388915, "isDeleted": false, "id": "fqsQ_Qaf-7qXlNZOtktUp", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 997.9675684673548, "y": 243.3707642437863, "strokeColor": "#03a9f4", "backgroundColor": "transparent", "width": 77.17198221193564, "height": 8.562348957853038, "seed": 1862702339, "groupIds": [ "ByeZbnpBUniBnZxL1XiMR", "YDUnsOO4AJOXnzopVjTfR" ], "strokeSharpness": "round", "boundElements": [], "updated": 1641735908377, "startBinding": null, "endBinding": null, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": null, "points": [ [ 0, 0 ], [ 2.033150371639873, 3.413095389435587 ], [ 10.801287372573954, 6.276651055277943 ], [ 22.468666942209353, 8.010803051612635 ], [ 40.747074201802775, 8.168828515515864 ], [ 62.077348233027564, 7.0647721921469495 ], [ 74.53446931782398, 3.04824021069218 ], [ 77.17198221193564, -0.3935204423371723 ] ] }, { "type": "ellipse", "version": 5897, "versionNonce": 1193808317, "isDeleted": false, "id": "4l6IrZqyWS_r_TywqVTEW", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 996.9096194852727, "y": 208.07626729559803, "strokeColor": "#03a9f4", "backgroundColor": "#fff", "width": 76.59753601865496, "height": 15.49127539284798, "seed": 1511747757, "groupIds": [ "ByeZbnpBUniBnZxL1XiMR", "YDUnsOO4AJOXnzopVjTfR" ], "strokeSharpness": "sharp", "boundElements": [ { "type": "arrow", "id": "bxuMGTzXLn7H-uBCptINx" } ], "updated": 1641735908377 }, { "type": "ellipse", "version": 1298, "versionNonce": 745059475, "isDeleted": false, "id": "vrSWcPDbYZk3CuUPgODtk", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 1033.4016065904952, "y": 232.55293497550144, "strokeColor": "#03a9f4", "backgroundColor": "#03a9f4", "width": 11.226103154161754, "height": 12.183758484455605, "seed": 2081619107, "groupIds": [ "ByeZbnpBUniBnZxL1XiMR", "YDUnsOO4AJOXnzopVjTfR" ], "strokeSharpness": "sharp", "boundElements": [], "updated": 1641735908377 }, { "type": "ellipse", "version": 1586, "versionNonce": 857264669, "isDeleted": false, "id": "ptDnFZABr-xnrQYrkdvCj", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 1033.7527483744288, "y": 258.76954926256656, "strokeColor": "#03a9f4", "backgroundColor": "#03a9f4", "width": 11.226103154161754, "height": 12.183758484455605, "seed": 1076024077, "groupIds": [ "ByeZbnpBUniBnZxL1XiMR", "YDUnsOO4AJOXnzopVjTfR" ], "strokeSharpness": "sharp", "boundElements": [], "updated": 1641735908377 }, { "type": "ellipse", "version": 1470, "versionNonce": 1160423987, "isDeleted": false, "id": "2v5HdhgHuudJY2EmM0hV_", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 6.239590202363168, "x": 1034.4016065904952, "y": 287.53827583387294, "strokeColor": "#03a9f4", "backgroundColor": "#03a9f4", "width": 11.226103154161754, "height": 12.183758484455605, "seed": 1718710339, "groupIds": [ "ByeZbnpBUniBnZxL1XiMR", "YDUnsOO4AJOXnzopVjTfR" ], "strokeSharpness": "sharp", "boundElements": [], "updated": 1641735908377 }, { "type": "text", "version": 975, "versionNonce": 2010285693, "isDeleted": false, "id": "YJIBcqHtzJtKcCgFzxnQV", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 989.625, "y": 310.0375083418067, "strokeColor": "#03a9f4", "backgroundColor": "transparent", "width": 94, "height": 46, "seed": 131000685, "groupIds": [ "YDUnsOO4AJOXnzopVjTfR" ], "strokeSharpness": "sharp", "boundElements": [], "updated": 1641735908377, "fontSize": 17.4778970902999, "fontFamily": 1, "text": "Structured\nDatabase", "baseline": 39, "textAlign": "center", "verticalAlign": "top", "containerId": null, "originalText": "Structured\nDatabase" }, { "type": "rectangle", "version": 1600, "versionNonce": 157084627, "isDeleted": false, "id": "YI607mwBteKrjS78gtTXD", "fillStyle": "solid", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 0, "opacity": 50, "angle": 0, "x": 1281.9090909090908, "y": 473.0000000000002, "strokeColor": "#00e676", "backgroundColor": "#00e676", "width": 315.00000000000006, "height": 40, "seed": 928276467, "groupIds": [], "strokeSharpness": "round", "boundElements": [ { "id": "-D1yDTCnqxjTFsQU0jBjN", "type": "arrow" } ], "updated": 1641735908377 }, { "type": "rectangle", "version": 1299, "versionNonce": 2014752477, "isDeleted": false, "id": "UsRZ6Y682zf4FIZyNDOtq", "fillStyle": "solid", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 0, "opacity": 50, "angle": 0, "x": 1280.9090909090908, "y": 238.6363636363636, "strokeColor": "#00e676", "backgroundColor": "#00e676", "width": 316.1818181818184, "height": 47.00000000000002, "seed": 1663520371, "groupIds": [], "strokeSharpness": "round", "boundElements": [ { "id": "5m7yE6eTRKFfSCbA82DrS", "type": "arrow" }, { "id": "ZyDOc33tL-6s4vwoFYHAq", "type": "arrow" } ], "updated": 1641735908377 }, { "type": "line", "version": 6222, "versionNonce": 1024850291, "isDeleted": false, "id": "qtt-6VisYFKR2M8Cu45p-", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 1002.5946179731507, "y": 439.26150708178005, "strokeColor": "#ff7043", "backgroundColor": "#ff7043", "width": 76.99810389727404, "height": 99.37827711605759, "seed": 89125379, "groupIds": [ "XwpXytlIickMBY-WleyOp", "fZX3gzhfhBW0U-bYngONa", "q5UWDdBSxh3UYgBDbSFBQ" ], "strokeSharpness": "round", "boundElements": [], "updated": 1641735908378, "startBinding": null, "endBinding": null, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": null, "points": [ [ 0, 0 ], [ 0.25390020469272123, 75.10956320658954 ], [ 0.011881933539366223, 83.66046081728857 ], [ 3.9655726433067375, 87.35519793732486 ], [ 17.73410326369428, 90.48214189738947 ], [ 41.00682018880676, 91.45582553513545 ], [ 63.24236222825349, 89.90119697892054 ], [ 75.05626943052894, 86.18333090428511 ], [ 76.72246117951802, 83.04913080160064 ], [ 76.95647177899504, 76.16486549140681 ], [ 76.77280066894052, 6.301273122914302 ], [ 76.35874703071867, -0.299549116207539 ], [ 71.41469198102895, -3.9887920872726523 ], [ 61.003567150978974, -6.125406402086429 ], [ 37.27802033642641, -7.922451580922154 ], [ 18.256149021768273, -6.850869494392455 ], [ 3.2955764171578545, -3.2161938062295934 ], [ -0.04163211827899763, -0.0451306156134037 ], [ 0, 0 ] ] }, { "type": "ellipse", "version": 6957, "versionNonce": 1115081533, "isDeleted": false, "id": "xmFIoJeyaqzUfiLrt7yo1", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 1002.1009502312106, "y": 430.490580458146, "strokeColor": "#ff7043", "backgroundColor": "#fff", "width": 76.50422544892463, "height": 15.472404032124233, "seed": 761422765, "groupIds": [ "XwpXytlIickMBY-WleyOp", "fZX3gzhfhBW0U-bYngONa", "q5UWDdBSxh3UYgBDbSFBQ" ], "strokeSharpness": "sharp", "boundElements": [ { "type": "arrow", "id": "bxuMGTzXLn7H-uBCptINx" } ], "updated": 1641735908378 }, { "type": "text", "version": 2153, "versionNonce": 1570525971, "isDeleted": false, "id": "drtrHswdiUqZPuNgKc90d", "fillStyle": "hachure", "strokeWidth": 2, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 1020.1035285928494, "y": 465.17753663299726, "strokeColor": "#ff7043", "backgroundColor": "transparent", "width": 18, "height": 36, "seed": 455653795, "groupIds": [ "fZX3gzhfhBW0U-bYngONa", "q5UWDdBSxh3UYgBDbSFBQ" ], "strokeSharpness": "round", "boundElements": [], "updated": 1641735908378, "fontSize": 29.219434366479078, "fontFamily": 3, "text": "<", "baseline": 29, "textAlign": "left", "verticalAlign": "top", "containerId": null, "originalText": "<" }, { "type": "text", "version": 2133, "versionNonce": 391404445, "isDeleted": false, "id": "Tof7f8O_qqAbmeVWbwNel", "fillStyle": "hachure", "strokeWidth": 2, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 1042.114401938595, "y": 465.6542466049163, "strokeColor": "#ff7043", "backgroundColor": "transparent", "width": 18, "height": 36, "seed": 336815629, "groupIds": [ "fZX3gzhfhBW0U-bYngONa", "q5UWDdBSxh3UYgBDbSFBQ" ], "strokeSharpness": "round", "boundElements": [], "updated": 1641735908378, "fontSize": 29.384515916572173, "fontFamily": 3, "text": ">", "baseline": 29, "textAlign": "left", "verticalAlign": "top", "containerId": null, "originalText": ">" }, { "type": "text", "version": 1428, "versionNonce": 2070313139, "isDeleted": false, "id": "VzdNhewfEay7ILpboy3kh", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 877.8181818181818, "y": 531.2795921207232, "strokeColor": "#ff7043", "backgroundColor": "transparent", "width": 324, "height": 48, "seed": 1976012099, "groupIds": [ "q5UWDdBSxh3UYgBDbSFBQ" ], "strokeSharpness": "sharp", "boundElements": [], "updated": 1641735908378, "fontSize": 18.16360832146819, "fontFamily": 1, "text": "Approximate Nearest Neighbor (ANN)\nIndex", "baseline": 41, "textAlign": "center", "verticalAlign": "top", "containerId": null, "originalText": "Approximate Nearest Neighbor (ANN)\nIndex" }, { "type": "rectangle", "version": 4844, "versionNonce": 786489341, "isDeleted": false, "id": "J8c0oXxyYGwX5-DrgmaWs", "fillStyle": "solid", "strokeWidth": 2, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 594.6310358331666, "y": 328.3910028343104, "strokeColor": "#343a40", "backgroundColor": "#fff", "width": 63.17951567316358, "height": 95.87140433060104, "seed": 982009293, "groupIds": [ "BldQyeSIaH-oKkL4FMf2Q" ], "strokeSharpness": "sharp", "boundElements": [ { "type": "arrow", "id": "CFu0B4Mw_1wC1Hbgx8Fs0" }, { "type": "arrow", "id": "XIl_NhaFtRO00pX5Pq6VU" }, { "type": "arrow", "id": "EndiSTFlx1AT7vcBVjgve" }, { "id": "eDQoIxWK3cF_GuzA8jGlK", "type": "arrow" }, { "id": "UldaTf_SD8SLGi5U0s3az", "type": "arrow" } ], "updated": 1641735908378 }, { "type": "rectangle", "version": 4895, "versionNonce": 1411050067, "isDeleted": false, "id": "m9UEIWpFo5o-W_PZIjS5i", "fillStyle": "solid", "strokeWidth": 2, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 587.8843926619461, "y": 322.1157517367904, "strokeColor": "#343a40", "backgroundColor": "#fff", "width": 63.17951567316358, "height": 95.87140433060104, "seed": 1164020099, "groupIds": [ "OQ3HaFukpFi0eOGW5R5UG", "BldQyeSIaH-oKkL4FMf2Q" ], "strokeSharpness": "sharp", "boundElements": [ { "type": "arrow", "id": "CFu0B4Mw_1wC1Hbgx8Fs0" }, { "type": "arrow", "id": "XIl_NhaFtRO00pX5Pq6VU" }, { "type": "arrow", "id": "EndiSTFlx1AT7vcBVjgve" }, { "id": "eDQoIxWK3cF_GuzA8jGlK", "type": "arrow" }, { "id": "UldaTf_SD8SLGi5U0s3az", "type": "arrow" } ], "updated": 1641735908378 }, { "type": "rectangle", "version": 4989, "versionNonce": 1058920541, "isDeleted": false, "id": "7d9_4d4sJrQ9DvGC8ybba", "fillStyle": "solid", "strokeWidth": 2, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 579.4861313362021, "y": 314.7375928350884, "strokeColor": "#343a40", "backgroundColor": "#fff", "width": 63.17951567316358, "height": 95.87140433060104, "seed": 1652032557, "groupIds": [ "BldQyeSIaH-oKkL4FMf2Q" ], "strokeSharpness": "sharp", "boundElements": [ { "type": "arrow", "id": "CFu0B4Mw_1wC1Hbgx8Fs0" }, { "type": "arrow", "id": "XIl_NhaFtRO00pX5Pq6VU" }, { "type": "arrow", "id": "EndiSTFlx1AT7vcBVjgve" } ], "updated": 1641735908378 }, { "type": "line", "version": 4113, "versionNonce": 747644915, "isDeleted": false, "id": "sbNllVyO-9z2sQtdPITdl", "fillStyle": "solid", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 590.173416729491, "y": 358.1987100051989, "strokeColor": "#343a40", "backgroundColor": "#fff", "width": 41.63767071747142, "height": 2.905130632707587, "seed": 605774115, "groupIds": [ "BldQyeSIaH-oKkL4FMf2Q" ], "strokeSharpness": "round", "boundElements": [], "updated": 1641735908379, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": null, "points": [ [ 0, 0 ], [ 36.135414139555195, 0.14069349922795835 ], [ 41.63767071747142, -2.7644371334796283 ] ] }, { "type": "line", "version": 4136, "versionNonce": 1104987325, "isDeleted": false, "id": "eUkNzCc4WrlKLnap75eGL", "fillStyle": "solid", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 591.7725612633524, "y": 330.1315411841826, "strokeColor": "#343a40", "backgroundColor": "#fff", "width": 40.73266929000497, "height": 2.5058652970606046, "seed": 1184953997, "groupIds": [ "BldQyeSIaH-oKkL4FMf2Q" ], "strokeSharpness": "round", "boundElements": [], "updated": 1641735908379, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": null, "points": [ [ 0, 0 ], [ 15.046599363382079, -2.5058652970606046 ], [ 40.73266929000497, -0.2927945794743634 ] ] }, { "type": "line", "version": 4162, "versionNonce": 1791799699, "isDeleted": false, "id": "RfDT1TO01chDwfw5Cbtzd", "fillStyle": "solid", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 591.7870401126618, "y": 390.3595353798012, "strokeColor": "#343a40", "backgroundColor": "#fff", "width": 43.20811437101611, "height": 3.8264756613833097, "seed": 687424707, "groupIds": [ "BldQyeSIaH-oKkL4FMf2Q" ], "strokeSharpness": "round", "boundElements": [], "updated": 1641735908379, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": null, "points": [ [ 0, 0 ], [ 23.609890181258436, -0.22815162036966638 ], [ 33.628485051192264, 2.0697081845366485 ], [ 43.20811437101611, -1.7567674768466617 ] ] }, { "type": "line", "version": 4198, "versionNonce": 2059347229, "isDeleted": false, "id": "eWFd9AM2v-w_xidHMXRZY", "fillStyle": "solid", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 588.8193171598485, "y": 400.3650618336177, "strokeColor": "#343a40", "backgroundColor": "#fff", "width": 48.6343204088089, "height": 2.600928472214653, "seed": 1908186349, "groupIds": [ "BldQyeSIaH-oKkL4FMf2Q" ], "strokeSharpness": "round", "boundElements": [], "updated": 1641735908379, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": null, "points": [ [ 0, 0 ], [ 9.087460853677031, -1.042860552572967 ], [ 14.419182407365316, -0.7529003472200484 ], [ 41.35248119200923, 0.5475638888872795 ], [ 48.6343204088089, -2.053364583327373 ] ] }, { "type": "line", "version": 4165, "versionNonce": 1879508787, "isDeleted": false, "id": "x-HvHTslIzJHS5qwLLpGG", "fillStyle": "solid", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 589.2138897559879, "y": 343.5358653753724, "strokeColor": "#343a40", "backgroundColor": "#fff", "width": 41.94947793197669, "height": 2.2130707175862407, "seed": 194622563, "groupIds": [ "BldQyeSIaH-oKkL4FMf2Q" ], "strokeSharpness": "round", "boundElements": [], "updated": 1641735908379, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": null, "points": [ [ 0, 0 ], [ 16.263408005353806, -0.5285512538564915 ], [ 41.94947793197669, 1.6845194637297491 ] ] }, { "type": "line", "version": 4180, "versionNonce": 1026980221, "isDeleted": false, "id": "0lRPNpWYP4Azt2euXF2xI", "fillStyle": "solid", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 589.1700215474542, "y": 374.3250106316369, "strokeColor": "#343a40", "backgroundColor": "#fff", "width": 41.94947793197669, "height": 2.2130707175862407, "seed": 1532262221, "groupIds": [ "BldQyeSIaH-oKkL4FMf2Q" ], "strokeSharpness": "round", "boundElements": [], "updated": 1641735908379, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": null, "points": [ [ 0, 0 ], [ 7.2351635300143835, 1.2764612775431747 ], [ 16.263408005353806, -0.5285512538564915 ], [ 41.94947793197669, 1.6845194637297491 ] ] }, { "type": "text", "version": 470, "versionNonce": 849717459, "isDeleted": false, "id": "RZJSTAnmzzhRhyGEUMHmo", "fillStyle": "cross-hatch", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 557, "y": 432, "strokeColor": "#343a40", "backgroundColor": "transparent", "width": 117, "height": 26, "seed": 705534605, "groupIds": [], "strokeSharpness": "round", "boundElements": [], "updated": 1641735908379, "fontSize": 20, "fontFamily": 1, "text": "Input Data", "baseline": 18, "textAlign": "left", "verticalAlign": "top", "containerId": null, "originalText": "Input Data" }, { "type": "diamond", "version": 1413, "versionNonce": 1921270237, "isDeleted": false, "id": "TVvWBG3ghORc3yVIw7lIh", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 783.6763173734828, "y": 422.8472597163642, "strokeColor": "#ff7043", "backgroundColor": "#ff7043", "width": 73.64736525303452, "height": 24.04199580676619, "seed": 1762684669, "groupIds": [ "sgYs6pusUBDCv2FmxIFXR" ], "strokeSharpness": "round", "boundElements": [], "updated": 1641735908379 }, { "type": "diamond", "version": 1455, "versionNonce": 406787699, "isDeleted": false, "id": "Z9ZwzURs4p74Wb1ozju71", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 783.6763173734828, "y": 417.04700672905824, "strokeColor": "#ff7043", "backgroundColor": "#ff7043", "width": 73.64736525303452, "height": 24.04199580676619, "seed": 1250133853, "groupIds": [ "sgYs6pusUBDCv2FmxIFXR" ], "strokeSharpness": "round", "boundElements": [ { "id": "CjwxjqXOKYE1zbZ-NBw5A", "type": "arrow" } ], "updated": 1641735908380 }, { "type": "diamond", "version": 1536, "versionNonce": 1606024765, "isDeleted": false, "id": "1eOQN_-IrMENzxR2quYRb", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 783.6763173734828, "y": 408.7893130516918, "strokeColor": "#ff7043", "backgroundColor": "#ff7043", "width": 73.64736525303452, "height": 24.04199580676619, "seed": 1387566013, "groupIds": [ "sgYs6pusUBDCv2FmxIFXR" ], "strokeSharpness": "round", "boundElements": [ { "id": "UldaTf_SD8SLGi5U0s3az", "type": "arrow" }, { "id": "CjwxjqXOKYE1zbZ-NBw5A", "type": "arrow" } ], "updated": 1641735908380 }, { "type": "diamond", "version": 1584, "versionNonce": 261738515, "isDeleted": false, "id": "UO_8s06e5WPXOXcLW1Gji", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 783.6763173734828, "y": 400.5962705882732, "strokeColor": "#ff7043", "backgroundColor": "#ff7043", "width": 73.64736525303452, "height": 24.04199580676619, "seed": 1573196829, "groupIds": [ "sgYs6pusUBDCv2FmxIFXR" ], "strokeSharpness": "round", "boundElements": [ { "id": "UldaTf_SD8SLGi5U0s3az", "type": "arrow" }, { "id": "CjwxjqXOKYE1zbZ-NBw5A", "type": "arrow" } ], "updated": 1641735908380 }, { "type": "text", "version": 595, "versionNonce": 733556381, "isDeleted": false, "id": "Rc3sIwmWEVTP-seViLAF-", "fillStyle": "cross-hatch", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 776, "y": 456, "strokeColor": "#ff7043", "backgroundColor": "#ffeb3b", "width": 90, "height": 26, "seed": 1267515533, "groupIds": [], "strokeSharpness": "round", "boundElements": [ { "id": "CjwxjqXOKYE1zbZ-NBw5A", "type": "arrow" } ], "updated": 1641735908380, "fontSize": 20, "fontFamily": 1, "text": "Vectorize", "baseline": 18, "textAlign": "left", "verticalAlign": "top", "containerId": null, "originalText": "Vectorize" }, { "type": "arrow", "version": 2357, "versionNonce": 1789679027, "isDeleted": false, "id": "UldaTf_SD8SLGi5U0s3az", "fillStyle": "cross-hatch", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 662.8742386138015, "y": 381.70953615760675, "strokeColor": "#ff7043", "backgroundColor": "#ffeb3b", "width": 115.84862824570905, "height": 39.38975678911993, "seed": 1103614243, "groupIds": [], "strokeSharpness": "round", "boundElements": [], "updated": 1641735908380, "startBinding": { "elementId": "m9UEIWpFo5o-W_PZIjS5i", "focus": -0.05280483881151566, "gap": 11.810330278691765 }, "endBinding": { "elementId": "UO_8s06e5WPXOXcLW1Gji", "focus": -1.8872552019606, "gap": 9.60046258817383 }, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": "arrow", "points": [ [ 0, 0 ], [ 115.84862824570905, 39.38975678911993 ] ] }, { "type": "arrow", "version": 852, "versionNonce": 624120573, "isDeleted": false, "id": "eDQoIxWK3cF_GuzA8jGlK", "fillStyle": "cross-hatch", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 664.8519551997895, "y": 376.6331008210118, "strokeColor": "#03a9f4", "backgroundColor": "#ffeb3b", "width": 330.68241691348294, "height": 116.55682104186076, "seed": 1088931821, "groupIds": [], "strokeSharpness": "round", "boundElements": [], "updated": 1641735908380, "startBinding": { "elementId": "m9UEIWpFo5o-W_PZIjS5i", "focus": 0.3821910694005074, "gap": 13.788046864679757 }, "endBinding": null, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": "arrow", "points": [ [ 0, 0 ], [ 330.68241691348294, -116.55682104186076 ] ] }, { "type": "arrow", "version": 963, "versionNonce": 2023565139, "isDeleted": false, "id": "CjwxjqXOKYE1zbZ-NBw5A", "fillStyle": "cross-hatch", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 863.6765930965225, "y": 445.6574693739797, "strokeColor": "#ff7043", "backgroundColor": "#ffeb3b", "width": 137.0993469797395, "height": 39.74595185737945, "seed": 1654741539, "groupIds": [], "strokeSharpness": "round", "boundElements": [], "updated": 1641735908380, "startBinding": { "elementId": "Rc3sIwmWEVTP-seViLAF-", "focus": -1.3712294571679748, "gap": 10.342530626020277 }, "endBinding": null, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": "arrow", "points": [ [ 0, 0 ], [ 137.0993469797395, 39.74595185737945 ] ] }, { "type": "text", "version": 879, "versionNonce": 1219095389, "isDeleted": false, "id": "jTu74ufZGB6NCYWR8F6cX", "fillStyle": "solid", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 1409.561741267423, "y": 407.263410502541, "strokeColor": "#343a40", "backgroundColor": "#868e96", "width": 65, "height": 25, "seed": 1925987693, "groupIds": [], "strokeSharpness": "round", "boundElements": [], "updated": 1641735908381, "fontSize": 20, "fontFamily": 1, "text": "Search", "baseline": 18, "textAlign": "left", "verticalAlign": "top", "containerId": null, "originalText": "" }, { "type": "rectangle", "version": 1230, "versionNonce": 1606599923, "isDeleted": false, "id": "94RaVBybGnyM0x9Gqgwk5", "fillStyle": "solid", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0.7882111358327366, "x": 1433.0842846752632, "y": 375.5724519986199, "strokeColor": "#000000", "backgroundColor": "#868e96", "width": 70.38608542855415, "height": 6.900596610642562, "seed": 489839587, "groupIds": [ "_mSpNqaGUToycQhKOY82U" ], "strokeSharpness": "round", "boundElements": [], "updated": 1641735908381 }, { "type": "ellipse", "version": 1066, "versionNonce": 107902909, "isDeleted": false, "id": "x9CPx10TrNm9T_lgZgUAM", "fillStyle": "solid", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 1401.9154962081336, "y": 314.9184076792772, "strokeColor": "#000000", "backgroundColor": "#868e96", "width": 66.0077798878363, "height": 66.0077798878363, "seed": 1251739597, "groupIds": [ "_mSpNqaGUToycQhKOY82U" ], "strokeSharpness": "round", "boundElements": [], "updated": 1641735908381 }, { "type": "ellipse", "version": 1153, "versionNonce": 933473939, "isDeleted": false, "id": "EcCeRt1xfAokj2Az9b8OW", "fillStyle": "solid", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 1411.809595977187, "y": 325.8539916345475, "strokeColor": "#000000", "backgroundColor": "white", "width": 47.35405419542672, "height": 47.35405419542672, "seed": 1122166659, "groupIds": [ "_mSpNqaGUToycQhKOY82U" ], "strokeSharpness": "round", "boundElements": [], "updated": 1641735908381 }, { "type": "text", "version": 1087, "versionNonce": 1218607133, "isDeleted": false, "id": "haJCsmkSJ7-AvLSKoAVa0", "fillStyle": "cross-hatch", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 1293.0909090909092, "y": 248.72727272727272, "strokeColor": "#343a40", "backgroundColor": "#fd7e14", "width": 297, "height": 26, "seed": 1064787981, "groupIds": [], "strokeSharpness": "round", "boundElements": [ { "id": "5m7yE6eTRKFfSCbA82DrS", "type": "arrow" }, { "id": "ZyDOc33tL-6s4vwoFYHAq", "type": "arrow" } ], "updated": 1641735908381, "fontSize": 20, "fontFamily": 1, "text": "SQL> SELECT {} FROM txtai", "baseline": 18, "textAlign": "left", "verticalAlign": "top", "containerId": null, "originalText": "SQL> SELECT {} FROM txtai" }, { "type": "text", "version": 1452, "versionNonce": 1660314675, "isDeleted": false, "id": "M5hw4wHqIGw3PXnaLCp6p", "fillStyle": "cross-hatch", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 1307.1818181818182, "y": 478.18181818181836, "strokeColor": "#343a40", "backgroundColor": "#fd7e14", "width": 261, "height": 26, "seed": 1708000109, "groupIds": [], "strokeSharpness": "round", "boundElements": [ { "id": "-D1yDTCnqxjTFsQU0jBjN", "type": "arrow" } ], "updated": 1641735908381, "fontSize": 20, "fontFamily": 1, "text": "> Natural Language Query", "baseline": 18, "textAlign": "left", "verticalAlign": "top", "containerId": null, "originalText": "> Natural Language Query" }, { "type": "arrow", "version": 1819, "versionNonce": 959933565, "isDeleted": false, "id": "5m7yE6eTRKFfSCbA82DrS", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 1272.868226475393, "y": 268.6451583650937, "strokeColor": "#ff7043", "backgroundColor": "#7950f2", "width": 188.8189376264454, "height": 219.36950558832018, "seed": 1269992291, "groupIds": [], "strokeSharpness": "round", "boundElements": [], "updated": 1641735908382, "startBinding": { "elementId": "UsRZ6Y682zf4FIZyNDOtq", "focus": 0.900241545557177, "gap": 8.040864433697834 }, "endBinding": null, "lastCommittedPoint": null, "startArrowhead": "arrow", "endArrowhead": "arrow", "points": [ [ 0, 0 ], [ -188.8189376264454, 219.36950558832018 ] ] }, { "type": "arrow", "version": 1276, "versionNonce": 698443219, "isDeleted": false, "id": "ZyDOc33tL-6s4vwoFYHAq", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 1077.7543216412146, "y": 263.29298489476895, "strokeColor": "#03a9f4", "backgroundColor": "#7950f2", "width": 199.76731534744795, "height": 0.40309918148193447, "seed": 343568205, "groupIds": [], "strokeSharpness": "round", "boundElements": [], "updated": 1641735908382, "startBinding": null, "endBinding": { "elementId": "haJCsmkSJ7-AvLSKoAVa0", "focus": -0.17292767152914681, "gap": 15.569272102246714 }, "lastCommittedPoint": null, "startArrowhead": "arrow", "endArrowhead": "arrow", "points": [ [ 0, 0 ], [ 199.76731534744795, 0.40309918148193447 ] ] }, { "type": "arrow", "version": 1786, "versionNonce": 1206834397, "isDeleted": false, "id": "-D1yDTCnqxjTFsQU0jBjN", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 1275.2935337071021, "y": 493.40248820420277, "strokeColor": "#ff7043", "backgroundColor": "#7950f2", "width": 193.34252933148332, "height": 3.1975454283124236, "seed": 855220461, "groupIds": [], "strokeSharpness": "round", "boundElements": [], "updated": 1641735908382, "startBinding": { "elementId": "YI607mwBteKrjS78gtTXD", "focus": -0.13787667355971328, "gap": 6.615557201988622 }, "endBinding": null, "lastCommittedPoint": null, "startArrowhead": "arrow", "endArrowhead": "arrow", "points": [ [ 0, 0 ], [ -193.34252933148332, -3.1975454283124236 ] ] } ], "appState": { "gridSize": null, "viewBackgroundColor": "#ffffff" }, "files": {} } ================================================ FILE: docs/images/install.excalidraw ================================================ { "type": "excalidraw", "version": 2, "source": "https://excalidraw.com", "elements": [ { "type": "rectangle", "version": 1435, "versionNonce": 1860045045, "isDeleted": false, "id": "LgVOdCQnXRH2GYHIGLwOx", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 1.5707963267948957, "x": 764, "y": 123.99999999999991, "strokeColor": "#00e676", "backgroundColor": "#00e676", "width": 20.000000000000092, "height": 199.99999999999991, "seed": 782310408, "groupIds": [ "s5mCNZzJxsETmWhZoD3qR" ], "frameId": null, "roundness": null, "boundElements": [], "updated": 1688311151676, "link": null, "locked": false }, { "type": "rectangle", "version": 1534, "versionNonce": 2090527963, "isDeleted": false, "id": "GG_mFtlk6nEiYCQ8vhzjZ", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 1.5707963267948957, "x": 953, "y": 136.9999999999999, "strokeColor": "#ced4da", "backgroundColor": "#ced4da", "width": 17.99999999999998, "height": 173.99999999999994, "seed": 810177544, "groupIds": [ "Rezl6DUXkEY8GT9kuDOQX" ], "frameId": null, "roundness": null, "boundElements": [], "updated": 1688311151677, "link": null, "locked": false }, { "type": "text", "version": 293, "versionNonce": 1618682453, "isDeleted": false, "id": "NLfZWFLgvJj3ccuF5yxD8", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 793, "y": 216.86805555555557, "strokeColor": "#343a40", "backgroundColor": "transparent", "width": 80, "height": 16, "seed": 297813880, "groupIds": [], "frameId": null, "roundness": null, "boundElements": [], "updated": 1688311151677, "link": null, "locked": false, "fontSize": 12.242424242424246, "fontFamily": 1, "text": "Installing.......", "textAlign": "left", "verticalAlign": "top", "containerId": null, "originalText": "Installing.......", "lineHeight": 1.3069306930693065, "baseline": 11 } ], "appState": { "gridSize": null, "viewBackgroundColor": "#ffffff" }, "files": {} } ================================================ FILE: docs/images/llm.excalidraw ================================================ { "type": "excalidraw", "version": 2, "source": "https://excalidraw.com", "elements": [ { "type": "text", "version": 1159, "versionNonce": 947191573, "isDeleted": false, "id": "yF7ftUwr3mnAwOlC59RMi", "fillStyle": "solid", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 0, "opacity": 100, "angle": 0, "x": 1475.7768832949857, "y": 232.08688631142672, "strokeColor": "#03a9f4", "backgroundColor": "transparent", "width": 268.6666564941406, "height": 33.6, "seed": 376471794, "groupIds": [], "frameId": null, "roundness": null, "boundElements": [], "updated": 1691335913281, "link": null, "locked": false, "fontSize": 28, "fontFamily": 1, "text": "Workflow Processing", "textAlign": "center", "verticalAlign": "top", "containerId": null, "originalText": "Workflow Processing", "lineHeight": 1.2, "baseline": 24 }, { "type": "line", "version": 1539, "versionNonce": 1192947061, "isDeleted": false, "id": "yVN5CxzfpwAZ0J0GEsEim", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 1595.7169087648438, "y": 44.55863019278951, "strokeColor": "#03a9f4", "backgroundColor": "#03a9f4", "width": 170.54791263920666, "height": 165.88781924917737, "seed": 1276926237, "groupIds": [ "ZD4Gm6iTkUS0-nxoYPoXN" ], "frameId": null, "roundness": null, "boundElements": [], "updated": 1691335877136, "link": null, "locked": false, "startBinding": null, "endBinding": null, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": null, "points": [ [ 0, 0 ], [ 0, 0 ], [ -4.396962054402082, 1.1646874339146507 ], [ -63.9402715195358, 29.56596636773484 ], [ -63.9402715195358, 29.56596636773484 ], [ -67.88778686931518, 32.68804950762777 ], [ -70.0842642471441, 37.216572224302446 ], [ -84.65888566945969, 100.95916519983551 ], [ -84.65888566945969, 100.95916519983551 ], [ -84.76907687141536, 105.46565159474407 ], [ -83.05610475455529, 109.6515846607256 ], [ -82.41499236471047, 110.54913805780339 ], [ -41.27695016258076, 161.6992265392742 ], [ -41.27695016258076, 161.6992265392742 ], [ -37.31206815320823, 164.8012680903417 ], [ -32.397540410633006, 165.88781924917737 ], [ 33.58360912709668, 165.88781924917697 ], [ 33.58360912709668, 165.88781924917697 ], [ 38.509483664717834, 164.7592029175085 ], [ 42.47369821555802, 161.6511389543416 ], [ 83.590366618572, 110.49037113635723 ], [ 83.590366618572, 110.49037113635723 ], [ 85.77883576779129, 105.97187048787097 ], [ 85.7274057950955, 100.94848586332202 ], [ 71.05662649840329, 37.1524605394965 ], [ 71.05662649840329, 37.1524605394965 ], [ 68.85613290539939, 32.62393782282196 ], [ 64.91262485436425, 29.50185468292866 ], [ 5.465486737767179, 1.1700299681670856 ], [ 5.465486737767179, 1.1700299681670856 ], [ -0.053422158085595584, -3.469446951953614e-18 ], [ 0, 0 ] ] }, { "type": "line", "version": 4960, "versionNonce": 2121044059, "isDeleted": false, "id": "aEfZn91vFxUsNoiS_231h", "fillStyle": "cross-hatch", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 1881.7866460270468, "y": -65.38351814097682, "strokeColor": "#ff7043", "backgroundColor": "#fa5252", "width": 84.50536343081912, "height": 109.06758737884206, "seed": 1361500846, "groupIds": [ "YlFNaYy8uxiVBIOqH078l" ], "frameId": null, "roundness": { "type": 2 }, "boundElements": [], "updated": 1691335877136, "link": null, "locked": false, "startBinding": null, "endBinding": null, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": null, "points": [ [ 0, 0 ], [ 0.2786552913217619, 82.43269138641281 ], [ 0.013040413480105428, 91.8172953399838 ], [ 4.352213112689576, 95.87226666144052 ], [ 19.46316552700477, 99.30408539980334 ], [ 45.00495554826318, 100.37270248918188 ], [ 69.40844688139491, 98.66649877125488 ], [ 82.37420150566703, 94.58614343882556 ], [ 84.20284574710968, 91.14636108917688 ], [ 84.45967221106837, 83.5908848820192 ], [ 84.25809330040309, 6.915642702567624 ], [ 83.80366973142185, -0.3287549387484745 ], [ 78.37757288423978, -4.377696435676519 ], [ 66.95137090060692, -6.722629103443413 ], [ 40.91260040265366, -8.694884889660191 ], [ 20.036110369548158, -7.518824323429054 ], [ 3.616892738261588, -3.529770380085963 ], [ -0.04569121975075156, -0.049530818049170484 ], [ 0, 0 ] ] }, { "type": "line", "version": 2704, "versionNonce": 1487369941, "isDeleted": false, "id": "JSkCiTP6nfh3_0WXfktLt", "fillStyle": "cross-hatch", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 1887.3924109908396, "y": -3.2136630318863695, "strokeColor": "#ff7043", "backgroundColor": "transparent", "width": 78.18555964094003, "height": 8.95436195346155, "seed": 1565257522, "groupIds": [ "YlFNaYy8uxiVBIOqH078l" ], "frameId": null, "roundness": { "type": 2 }, "boundElements": [], "updated": 1691335877136, "link": null, "locked": false, "startBinding": null, "endBinding": null, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": null, "points": [ [ 0, 0 ], [ 2.228662812409231, 3.741306533812953 ], [ 11.83996414096264, 6.880228333571024 ], [ 24.62930590722697, 8.7811403955643 ], [ 44.66540707207591, 8.95436195346155 ], [ 68.04684957381593, 7.744137021248128 ], [ 78.18555964094003, 5.099524329682907 ], [ 76.97433252487917, 5.429165362754105 ] ] }, { "type": "line", "version": 2790, "versionNonce": 13618427, "isDeleted": false, "id": "XbZa4_v84aJkwUbHNYtWo", "fillStyle": "cross-hatch", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 1881.5978130909527, "y": -29.55045127271626, "strokeColor": "#ff7043", "backgroundColor": "transparent", "width": 84.59301845781613, "height": 9.385724231428382, "seed": 1628656878, "groupIds": [ "YlFNaYy8uxiVBIOqH078l" ], "frameId": null, "roundness": { "type": 2 }, "boundElements": [], "updated": 1691335877136, "link": null, "locked": false, "startBinding": null, "endBinding": null, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": null, "points": [ [ 0, 0 ], [ 2.228662812409231, 3.7413065338129523 ], [ 11.83996414096264, 6.880228333571017 ], [ 24.62930590722697, 8.7811403955643 ], [ 44.66540707207591, 8.95436195346155 ], [ 68.04684957381593, 7.744137021248123 ], [ 81.70187622537257, 3.341366037466631 ], [ 84.59301845781613, -0.43136227796683113 ] ] }, { "type": "ellipse", "version": 5809, "versionNonce": 170997243, "isDeleted": false, "id": "WgZ_UNGw6chJCNh_YqBCg", "fillStyle": "cross-hatch", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 1883.146078835665, "y": -74.22048622222516, "strokeColor": "#ff7043", "backgroundColor": "transparent", "width": 83.96333219036016, "height": 16.980952253415385, "seed": 1814771954, "groupIds": [ "YlFNaYy8uxiVBIOqH078l" ], "frameId": null, "roundness": null, "boundElements": [], "updated": 1691336141044, "link": null, "locked": false }, { "type": "ellipse", "version": 1151, "versionNonce": 1596584437, "isDeleted": false, "id": "5zDzUgYAzqCuHz5y0fEbO", "fillStyle": "cross-hatch", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 1949.5714949045796, "y": -45.83125704870746, "strokeColor": "#ff7043", "backgroundColor": "transparent", "width": 12.305631190363249, "height": 13.3553768715004, "seed": 704022318, "groupIds": [ "YlFNaYy8uxiVBIOqH078l" ], "frameId": null, "roundness": null, "boundElements": [], "updated": 1691336146522, "link": null, "locked": false }, { "type": "ellipse", "version": 1200, "versionNonce": 1743051285, "isDeleted": false, "id": "VtzlusDzgSLcDDBt5hG2x", "fillStyle": "cross-hatch", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 1949.5714949045796, "y": -16.516229929297197, "strokeColor": "#ff7043", "backgroundColor": "transparent", "width": 12.305631190363249, "height": 13.3553768715004, "seed": 2080066226, "groupIds": [ "YlFNaYy8uxiVBIOqH078l" ], "frameId": null, "roundness": null, "boundElements": [], "updated": 1691336152276, "link": null, "locked": false }, { "type": "ellipse", "version": 1253, "versionNonce": 1752124155, "isDeleted": false, "id": "B_0DhVcOs-USQL4XV0QRo", "fillStyle": "cross-hatch", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 1949.5714949045796, "y": 15.345309440526933, "strokeColor": "#ff7043", "backgroundColor": "transparent", "width": 12.305631190363249, "height": 13.3553768715004, "seed": 1278833006, "groupIds": [ "YlFNaYy8uxiVBIOqH078l" ], "frameId": null, "roundness": null, "boundElements": [], "updated": 1691336157480, "link": null, "locked": false }, { "type": "text", "version": 1070, "versionNonce": 652075765, "isDeleted": false, "id": "4zMdwTCbTrrgg-ITOMXC0", "fillStyle": "hachure", "strokeWidth": 4, "strokeStyle": "solid", "roughness": 0, "opacity": 100, "angle": 0, "x": 1779.4509372464736, "y": 51.50880129297349, "strokeColor": "#ff7043", "backgroundColor": "transparent", "width": 298.8666687011719, "height": 33.6, "seed": 1848588206, "groupIds": [], "frameId": null, "roundness": null, "boundElements": [ { "id": "leDc6Y5qdU0OH3j90C072", "type": "arrow" } ], "updated": 1691336802938, "link": null, "locked": false, "fontSize": 28, "fontFamily": 1, "text": "Embeddings Database", "textAlign": "center", "verticalAlign": "top", "containerId": null, "originalText": "Embeddings Database", "lineHeight": 1.2, "baseline": 24 }, { "type": "arrow", "version": 3186, "versionNonce": 1016072923, "isDeleted": false, "id": "iPTphyfJkvqyMptaZS0ge", "fillStyle": "hachure", "strokeWidth": 4, "strokeStyle": "solid", "roughness": 0, "opacity": 100, "angle": 0, "x": 1878.6382080826127, "y": -7.322815845013395, "strokeColor": "#ff7043", "backgroundColor": "#7950f2", "width": 202.4871709755887, "height": 104.80578637477413, "seed": 1965679388, "groupIds": [], "frameId": null, "roundness": { "type": 2 }, "boundElements": [], "updated": 1691335877136, "link": null, "locked": false, "startBinding": null, "endBinding": null, "lastCommittedPoint": null, "startArrowhead": "arrow", "endArrowhead": "arrow", "points": [ [ 0, 0 ], [ -202.4871709755887, 104.80578637477413 ] ] }, { "type": "text", "version": 906, "versionNonce": 1278878741, "isDeleted": false, "id": "YCEwpQiKAjRS3zIG3_E8s", "fillStyle": "hachure", "strokeWidth": 2, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 1847.5109174458237, "y": 281.40729511478503, "strokeColor": "#7950f2", "backgroundColor": "transparent", "width": 159.76666259765625, "height": 33.6, "seed": 676249636, "groupIds": [], "frameId": null, "roundness": null, "boundElements": [], "updated": 1691335908714, "link": null, "locked": false, "fontSize": 28, "fontFamily": 1, "text": "LLM Models", "textAlign": "left", "verticalAlign": "top", "containerId": null, "originalText": "LLM Models", "lineHeight": 1.2, "baseline": 24 }, { "type": "arrow", "version": 3832, "versionNonce": 1540672693, "isDeleted": false, "id": "leDc6Y5qdU0OH3j90C072", "fillStyle": "hachure", "strokeWidth": 4, "strokeStyle": "solid", "roughness": 0, "opacity": 100, "angle": 0, "x": 1828.6470213908196, "y": 222.85602296508534, "strokeColor": "#7950f2", "backgroundColor": "#7950f2", "width": 145.94742162238094, "height": 61.13637008509096, "seed": 241264412, "groupIds": [], "frameId": null, "roundness": { "type": 2 }, "boundElements": [], "updated": 1691335877141, "link": null, "locked": false, "startBinding": { "elementId": "ZTKEryJuyeKfeG4nAI4_e", "focus": -0.0827769297585387, "gap": 11.926065811794956 }, "endBinding": null, "lastCommittedPoint": null, "startArrowhead": "arrow", "endArrowhead": "arrow", "points": [ [ 0, 0 ], [ -145.94742162238094, -61.13637008509096 ] ] }, { "type": "line", "version": 2313, "versionNonce": 303665589, "isDeleted": false, "id": "aFBJSnHUVo8ZDTb0uAkQ2", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 1294.8750417574363, "y": -49.56118957084571, "strokeColor": "#ffeb3b", "backgroundColor": "#ffeb3b", "width": 116.42036295658872, "height": 103.65107323746608, "seed": 1963890941, "groupIds": [], "frameId": null, "roundness": null, "boundElements": [], "updated": 1691335877136, "link": null, "locked": false, "startBinding": null, "endBinding": null, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": null, "points": [ [ 0, 0 ], [ -62.44191743896485, 19.19929080548739 ], [ -63.17668831316513, 79.43840749607878 ], [ -7.618334228588694, 103.65107323746608 ], [ 51.963117173367294, 79.15871076413049 ], [ 53.24367464342358, 21.28567723840068 ], [ 0, 0 ] ] }, { "type": "text", "version": 876, "versionNonce": 1665749019, "isDeleted": false, "id": "eqOUg003X-50uDJ_XGUnB", "fillStyle": "solid", "strokeWidth": 4, "strokeStyle": "solid", "roughness": 0, "opacity": 100, "angle": 0, "x": 1186.6810892450637, "y": 59.89380735137763, "strokeColor": "#ffb13b", "backgroundColor": "#00e676", "width": 208.96665954589844, "height": 33.6, "seed": 1122464339, "groupIds": [], "frameId": null, "roundness": { "type": 2 }, "boundElements": [], "updated": 1691335877136, "link": null, "locked": false, "fontSize": 28, "fontFamily": 1, "text": "Workflow Start", "textAlign": "left", "verticalAlign": "top", "containerId": null, "originalText": "Workflow Start", "lineHeight": 1.2, "baseline": 24 }, { "type": "rectangle", "version": 1789, "versionNonce": 1288147733, "isDeleted": false, "id": "ZTKEryJuyeKfeG4nAI4_e", "fillStyle": "cross-hatch", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 1840.5730872026145, "y": 200.79305844892008, "strokeColor": "#7950f2", "backgroundColor": "#7950f2", "width": 23.299119994671287, "height": 58.247799986678125, "seed": 1188509340, "groupIds": [ "SFux9iuPQVutxOMe0wlW3" ], "frameId": null, "roundness": null, "boundElements": [ { "id": "leDc6Y5qdU0OH3j90C072", "type": "arrow" } ], "updated": 1691335877136, "link": null, "locked": false }, { "type": "rectangle", "version": 1934, "versionNonce": 2028890229, "isDeleted": false, "id": "CckqOoWddQBHuvidbpk4P", "fillStyle": "cross-hatch", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 1888.8925200747174, "y": 145.68194499341337, "strokeColor": "#7950f2", "backgroundColor": "#7950f2", "width": 23.29911999467125, "height": 116.4955999733563, "seed": 331107492, "groupIds": [ "SFux9iuPQVutxOMe0wlW3" ], "frameId": null, "roundness": null, "boundElements": [ { "id": "leDc6Y5qdU0OH3j90C072", "type": "arrow" } ], "updated": 1691335877136, "link": null, "locked": false }, { "type": "rectangle", "version": 1815, "versionNonce": 568477019, "isDeleted": false, "id": "J8ox4BBdzEg-04DGRL-jh", "fillStyle": "cross-hatch", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 1932.9361110371237, "y": 180.63062498542024, "strokeColor": "#7950f2", "backgroundColor": "#7950f2", "width": 23.29911999467127, "height": 81.54691998134939, "seed": 1316437916, "groupIds": [ "SFux9iuPQVutxOMe0wlW3" ], "frameId": null, "roundness": null, "boundElements": [], "updated": 1691335877136, "link": null, "locked": false }, { "type": "rectangle", "version": 2003, "versionNonce": 408130005, "isDeleted": false, "id": "V54vdkZmQeI13AEahvNPV", "fillStyle": "cross-hatch", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 1974.6102076249463, "y": 155.6767771383196, "strokeColor": "#7950f2", "backgroundColor": "#7950f2", "width": 23.29911999467125, "height": 104.84603997602068, "seed": 1025829916, "groupIds": [ "SFux9iuPQVutxOMe0wlW3" ], "frameId": null, "roundness": null, "boundElements": [], "updated": 1691335877136, "link": null, "locked": false }, { "type": "arrow", "version": 279, "versionNonce": 1975968251, "isDeleted": false, "id": "Inol-LWi8GThocPMvSZGX", "fillStyle": "hachure", "strokeWidth": 4, "strokeStyle": "solid", "roughness": 0, "opacity": 100, "angle": 0, "x": 1347.5263615107983, "y": 20.516421378236373, "strokeColor": "#ffeb3b", "backgroundColor": "transparent", "width": 168.1111111111113, "height": 78.7777777777778, "seed": 1433457683, "groupIds": [], "frameId": null, "roundness": { "type": 2 }, "boundElements": [], "updated": 1691335877136, "link": null, "locked": false, "startBinding": null, "endBinding": null, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": "arrow", "points": [ [ 0, 0 ], [ 168.1111111111113, 78.7777777777778 ] ] }, { "type": "line", "version": 2357, "versionNonce": 1477218101, "isDeleted": false, "id": "8H60JYjdZgpvCjCSLf54L", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 1292.9959980885017, "y": 174.32286623612742, "strokeColor": "#00e676", "backgroundColor": "#00e676", "width": 116.42036295658872, "height": 103.65107323746608, "seed": 1027783997, "groupIds": [], "frameId": null, "roundness": null, "boundElements": [], "updated": 1691335877136, "link": null, "locked": false, "startBinding": null, "endBinding": null, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": null, "points": [ [ 0, 0 ], [ -62.44191743896485, 19.19929080548739 ], [ -63.17668831316513, 79.43840749607878 ], [ -7.618334228588694, 103.65107323746608 ], [ 51.963117173367294, 79.15871076413049 ], [ 53.24367464342358, 21.28567723840068 ], [ 0, 0 ] ] }, { "type": "text", "version": 823, "versionNonce": 1747905179, "isDeleted": false, "id": "E_wil3E0D2OO7QeMf_xiu", "fillStyle": "solid", "strokeWidth": 4, "strokeStyle": "solid", "roughness": 0, "opacity": 100, "angle": 0, "x": 1194.426360493546, "y": 283.38308804490305, "strokeColor": "#00e676", "backgroundColor": "#00e676", "width": 177.48333740234375, "height": 33.6, "seed": 754233971, "groupIds": [], "frameId": null, "roundness": { "type": 2 }, "boundElements": [], "updated": 1691335877136, "link": null, "locked": false, "fontSize": 28, "fontFamily": 1, "text": "Workflow End", "textAlign": "left", "verticalAlign": "top", "containerId": null, "originalText": "Workflow End", "lineHeight": 1.2, "baseline": 24 }, { "type": "arrow", "version": 385, "versionNonce": 645849237, "isDeleted": false, "id": "5pptt-wBgcgbxd5LUka3S", "fillStyle": "hachure", "strokeWidth": 4, "strokeStyle": "solid", "roughness": 0, "opacity": 100, "angle": 0, "x": 1514.4152503996868, "y": 167.62753248934757, "strokeColor": "#00e676", "backgroundColor": "#34bbde", "width": 171.8888888888889, "height": 57.1111111111112, "seed": 603518365, "groupIds": [], "frameId": null, "roundness": { "type": 2 }, "boundElements": [], "updated": 1691335877136, "link": null, "locked": false, "startBinding": null, "endBinding": null, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": "arrow", "points": [ [ 0, 0 ], [ -171.8888888888889, 57.1111111111112 ] ] }, { "type": "ellipse", "version": 1775, "versionNonce": 768401211, "isDeleted": false, "id": "GPI87MG84w7ACdrAaKDkG", "fillStyle": "solid", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 0, "opacity": 40, "angle": 0, "x": 1553.9985837330207, "y": 157.35401003071968, "strokeColor": "#03a9f4", "backgroundColor": "#03a9f4", "width": 28.340425531915425, "height": 27.553191489362284, "seed": 1372805341, "groupIds": [ "Q6IEbA3c43OUEmWwOqoB-" ], "frameId": null, "roundness": { "type": 2 }, "boundElements": [], "updated": 1691335877136, "link": null, "locked": false }, { "type": "rectangle", "version": 1683, "versionNonce": 157287925, "isDeleted": false, "id": "x4O7WOdzCU_nAV1UPLb10", "fillStyle": "solid", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 0, "opacity": 40, "angle": 0, "x": 1554.392200754299, "y": 90.61996747752698, "strokeColor": "#03a9f4", "backgroundColor": "#03a9f4", "width": 28.340425531915475, "height": 25.978723404255863, "seed": 1393440061, "groupIds": [ "Q6IEbA3c43OUEmWwOqoB-" ], "frameId": null, "roundness": { "type": 1 }, "boundElements": [], "updated": 1691335877136, "link": null, "locked": false }, { "type": "ellipse", "version": 1831, "versionNonce": 1090731995, "isDeleted": false, "id": "OAHRxshgLCmGZDCtG_FGl", "fillStyle": "solid", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 0, "opacity": 40, "angle": 0, "x": 1618.1581582011052, "y": 90.22635045624997, "strokeColor": "#03a9f4", "backgroundColor": "#03a9f4", "width": 28.340425531915425, "height": 27.553191489362284, "seed": 896435613, "groupIds": [ "Q6IEbA3c43OUEmWwOqoB-" ], "frameId": null, "roundness": { "type": 2 }, "boundElements": [], "updated": 1691335877136, "link": null, "locked": false }, { "type": "line", "version": 1777, "versionNonce": 867532629, "isDeleted": false, "id": "uB-LQPQxLXAYzpMkkFNId", "fillStyle": "solid", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 0, "opacity": 40, "angle": 0, "x": 1583.4087576843299, "y": 102.97914403772018, "strokeColor": "#03a9f4", "backgroundColor": "#03a9f4", "width": 35.87707425165013, "height": 0, "seed": 472238589, "groupIds": [ "Q6IEbA3c43OUEmWwOqoB-" ], "frameId": null, "roundness": { "type": 2 }, "boundElements": [], "updated": 1691335877136, "link": null, "locked": false, "startBinding": null, "endBinding": null, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": null, "points": [ [ 0, 0 ], [ 35.87707425165013, 0 ] ] }, { "type": "line", "version": 1658, "versionNonce": 736398459, "isDeleted": false, "id": "AP8DlX5fhd-adRn_4ictA", "fillStyle": "solid", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 0, "opacity": 40, "angle": 0, "x": 1568.1322242882986, "y": 117.53634476915352, "strokeColor": "#03a9f4", "backgroundColor": "#03a9f4", "width": 0, "height": 37.78723404255394, "seed": 1211466333, "groupIds": [ "Q6IEbA3c43OUEmWwOqoB-" ], "frameId": null, "roundness": { "type": 2 }, "boundElements": [], "updated": 1691335877136, "link": null, "locked": false, "startBinding": null, "endBinding": null, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": null, "points": [ [ 0, 0 ], [ 0, 37.78723404255394 ] ] } ], "appState": { "gridSize": null, "viewBackgroundColor": "#ffffff" }, "files": {} } ================================================ FILE: docs/images/models.excalidraw ================================================ { "type": "excalidraw", "version": 2, "source": "https://excalidraw.com", "elements": [ { "type": "line", "version": 1608, "versionNonce": 602661205, "isDeleted": false, "id": "yVN5CxzfpwAZ0J0GEsEim", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 1702.2261496777674, "y": -5.340583377694145, "strokeColor": "#ffeb3b", "backgroundColor": "#ffeb3b", "width": 191.85486538200286, "height": 186.6125755399512, "seed": 1276926237, "groupIds": [ "ZD4Gm6iTkUS0-nxoYPoXN" ], "frameId": null, "roundness": null, "boundElements": [], "updated": 1688309214322, "link": null, "locked": false, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": null, "points": [ [ 0, 0 ], [ 0, 0 ], [ -4.946284888409469, 1.310194580443298 ], [ -71.92848036094448, 33.259712239166916 ], [ -71.92848036094448, 33.259712239166916 ], [ -76.36916810848369, 36.77184458512375 ], [ -78.84005658267724, 41.86612631945928 ], [ -95.23551981468633, 113.57223168992945 ], [ -95.23551981468633, 113.57223168992945 ], [ -95.35947746324585, 118.64172405287644 ], [ -93.4324997019961, 123.35061560390292 ], [ -92.71129163005317, 124.36030246259705 ], [ -46.433776844719105, 181.9006920694186 ], [ -46.433776844719105, 181.9006920694186 ], [ -41.973552779866964, 185.39027898361584 ], [ -36.44504150185128, 186.6125755399512 ], [ 37.77928857887258, 186.61257553995074 ], [ 37.77928857887258, 186.61257553995074 ], [ 43.320564233786016, 185.34295850958426 ], [ 47.78003745293803, 181.84659678914795 ], [ 94.03350816005033, 124.29419365112881 ], [ 94.03350816005033, 124.29419365112881 ], [ 96.49538791875702, 119.21118606558457 ], [ 96.43753267834124, 113.56021815873193 ], [ 79.9338983420456, 41.794005010746 ], [ 79.9338983420456, 41.794005010746 ], [ 77.45849189744823, 36.69972327641062 ], [ 73.02231208993142, 33.18759093045337 ], [ 6.148302879201523, 1.3162045700933498 ], [ 6.148302879201523, 1.3162045700933498 ], [ -0.060096314222328626, -3.469446951953614e-18 ], [ 0, 0 ] ] }, { "type": "rectangle", "version": 2051, "versionNonce": 216616981, "isDeleted": false, "id": "ZTKEryJuyeKfeG4nAI4_e", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 2003.5691665051115, "y": 81.12265056081542, "strokeColor": "#00e676", "backgroundColor": "#00e676", "width": 30.25969046240687, "height": 75.64922615601705, "seed": 1188509340, "groupIds": [ "SFux9iuPQVutxOMe0wlW3" ], "frameId": null, "roundness": null, "boundElements": [], "updated": 1688309215087, "link": null, "locked": false }, { "type": "rectangle", "version": 2192, "versionNonce": 1390810587, "isDeleted": false, "id": "CckqOoWddQBHuvidbpk4P", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 2066.323943610997, "y": 9.547190432552021, "strokeColor": "#00e676", "backgroundColor": "#00e676", "width": 30.25969046240682, "height": 151.29845231203416, "seed": 331107492, "groupIds": [ "SFux9iuPQVutxOMe0wlW3" ], "frameId": null, "roundness": null, "boundElements": [ { "id": "leDc6Y5qdU0OH3j90C072", "type": "arrow" } ], "updated": 1688308769366, "link": null, "locked": false }, { "type": "rectangle", "version": 2072, "versionNonce": 1184041301, "isDeleted": false, "id": "J8ox4BBdzEg-04DGRL-jh", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 2123.5254786767036, "y": 54.936726126162256, "strokeColor": "#00e676", "backgroundColor": "#00e676", "width": 30.259690462406844, "height": 105.90891661842389, "seed": 1316437916, "groupIds": [ "SFux9iuPQVutxOMe0wlW3" ], "frameId": null, "roundness": null, "boundElements": [ { "id": "5pptt-wBgcgbxd5LUka3S", "type": "arrow" } ], "updated": 1688308769366, "link": null, "locked": false }, { "type": "rectangle", "version": 2259, "versionNonce": 1727832699, "isDeleted": false, "id": "V54vdkZmQeI13AEahvNPV", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 2177.649637157211, "y": 22.527960822066646, "strokeColor": "#00e676", "backgroundColor": "#00e676", "width": 30.25969046240682, "height": 136.16860708083075, "seed": 1025829916, "groupIds": [ "SFux9iuPQVutxOMe0wlW3" ], "frameId": null, "roundness": null, "boundElements": [], "updated": 1688308769366, "link": null, "locked": false }, { "type": "arrow", "version": 442, "versionNonce": 1922817787, "isDeleted": false, "id": "Inol-LWi8GThocPMvSZGX", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 1374.656480958798, "y": 99.23064378133208, "strokeColor": "#03a9f4", "backgroundColor": "transparent", "width": 216.36132864000353, "height": 0.12505261106101684, "seed": 1653816315, "groupIds": [], "frameId": null, "roundness": { "type": 2 }, "boundElements": [], "updated": 1688309230624, "link": null, "locked": false, "startBinding": null, "endBinding": null, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": "arrow", "points": [ [ 0, 0 ], [ 216.36132864000353, -0.12505261106101684 ] ] }, { "type": "arrow", "version": 917, "versionNonce": 151223483, "isDeleted": false, "id": "5pptt-wBgcgbxd5LUka3S", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 1802.88085185622, "y": 95.10577155105176, "strokeColor": "#00e676", "backgroundColor": "#34bbde", "width": 184.23585514045453, "height": 3.170483019063738, "seed": 1717004827, "groupIds": [], "frameId": null, "roundness": { "type": 2 }, "boundElements": [], "updated": 1688309218767, "link": null, "locked": false, "startBinding": null, "endBinding": null, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": "arrow", "points": [ [ 0, 0 ], [ 184.23585514045453, 3.170483019063738 ] ] }, { "type": "line", "version": 4226, "versionNonce": 789213499, "isDeleted": false, "id": "pikmP5MLN0MZlpnSik5KN", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 1235.1617590461694, "y": 25.06522848370164, "strokeColor": "#03a9f4", "backgroundColor": "#03a9f4", "width": 127.43688004487302, "height": 164.47752527518452, "seed": 844586005, "groupIds": [ "xkmNAKuTv_iYXPLj0sxbd" ], "frameId": null, "roundness": { "type": 2 }, "boundElements": [], "updated": 1688309232141, "link": null, "locked": false, "startBinding": null, "endBinding": null, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": null, "points": [ [ 0, 0 ], [ 0.4202213858663673, 124.31122212244284 ], [ 0.019665374373081804, 138.46351494441197 ], [ 6.563281167657464, 144.5785456702537 ], [ 29.351096662508862, 149.75384171231534 ], [ 67.86895989535736, 151.36535158937204 ], [ 104.67023109801114, 148.79234001109992 ], [ 124.22301744981843, 142.63902935804376 ], [ 126.98067338289118, 137.45172392705157 ], [ 127.36797617588668, 126.05781617962573 ], [ 127.06398851859365, 10.429017682904831 ], [ 126.37870276277025, -0.4957733094390701 ], [ 118.19596944321839, -6.601710860670866 ], [ 100.96487933911183, -10.137946798406993 ], [ 61.697553127560816, -13.11217368581247 ], [ 30.215116414714352, -11.338635495813458 ], [ 5.454393748609269, -5.323010354025768 ], [ -0.06890386898634299, -0.074694110077688 ], [ 0, 0 ] ] }, { "type": "ellipse", "version": 5067, "versionNonce": 1912378357, "isDeleted": false, "id": "sPZv-4m6hv0RYSW1Kzrrb", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 1235.9555759014816, "y": 13.472824531333586, "strokeColor": "#03a9f4", "backgroundColor": "transparent", "width": 126.61947902597214, "height": 25.607837035548464, "seed": 194842997, "groupIds": [ "xkmNAKuTv_iYXPLj0sxbd" ], "frameId": null, "roundness": null, "boundElements": [], "updated": 1688309232141, "link": null, "locked": false } ], "appState": { "gridSize": null, "viewBackgroundColor": "#ffffff" }, "files": {} } ================================================ FILE: docs/images/pipeline.excalidraw ================================================ { "type": "excalidraw", "version": 2, "source": "https://excalidraw.com", "elements": [ { "type": "rectangle", "version": 4788, "versionNonce": 1348021112, "isDeleted": false, "id": "9itz4bqXi6IMeZhGhc42J", "fillStyle": "solid", "strokeWidth": 2, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 793.1319708150991, "y": 84.01158837080237, "strokeColor": "#03a9f4", "backgroundColor": "#fff", "width": 70.67858069123133, "height": 107.25081879410921, "seed": 2026142776, "groupIds": [ "zRT55AcPrMo-vI2M6Q7JX" ], "strokeSharpness": "sharp", "boundElements": [ { "type": "arrow", "id": "CFu0B4Mw_1wC1Hbgx8Fs0" }, { "type": "arrow", "id": "XIl_NhaFtRO00pX5Pq6VU" }, { "type": "arrow", "id": "EndiSTFlx1AT7vcBVjgve" }, { "id": "sbyEF_hTthhrIaA-47du2", "type": "arrow" }, { "id": "3DJ_wB8Ty8UNX4KAZfwjT", "type": "arrow" } ], "updated": 1641741755942 }, { "type": "rectangle", "version": 4836, "versionNonce": 1572562952, "isDeleted": false, "id": "7HUYNqBGn8JbkXw6qt_jr", "fillStyle": "solid", "strokeWidth": 2, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 785.5845377793848, "y": 76.99149908508815, "strokeColor": "#03a9f4", "backgroundColor": "#fff", "width": 70.67858069123133, "height": 107.25081879410921, "seed": 1090159432, "groupIds": [ "6R5o6OnVZ7fFC_87zCo7G", "zRT55AcPrMo-vI2M6Q7JX" ], "strokeSharpness": "sharp", "boundElements": [ { "type": "arrow", "id": "CFu0B4Mw_1wC1Hbgx8Fs0" }, { "type": "arrow", "id": "XIl_NhaFtRO00pX5Pq6VU" }, { "type": "arrow", "id": "EndiSTFlx1AT7vcBVjgve" } ], "updated": 1641741755942 }, { "type": "rectangle", "version": 4932, "versionNonce": 2081750136, "isDeleted": false, "id": "mI_Mfw3cNZf0AuQhK5KUO", "fillStyle": "solid", "strokeWidth": 2, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 776.1894484936695, "y": 68.73759283508838, "strokeColor": "#03a9f4", "backgroundColor": "#fff", "width": 70.67858069123133, "height": 107.25081879410921, "seed": 1146631480, "groupIds": [ "zRT55AcPrMo-vI2M6Q7JX" ], "strokeSharpness": "sharp", "boundElements": [ { "type": "arrow", "id": "CFu0B4Mw_1wC1Hbgx8Fs0" }, { "type": "arrow", "id": "XIl_NhaFtRO00pX5Pq6VU" }, { "type": "arrow", "id": "EndiSTFlx1AT7vcBVjgve" } ], "updated": 1641741755942 }, { "type": "line", "version": 4055, "versionNonce": 958473992, "isDeleted": false, "id": "y3oKPiP36axXzEF0c7OiC", "fillStyle": "solid", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 788.1758750805805, "y": 117.48482988244103, "strokeColor": "#03a9f4", "backgroundColor": "#fff", "width": 46.57983585730082, "height": 3.249953844290203, "seed": 1952111176, "groupIds": [ "zRT55AcPrMo-vI2M6Q7JX" ], "strokeSharpness": "round", "boundElements": [], "updated": 1641741755942, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": null, "points": [ [ 0, 0 ], [ 40.42449133807562, 0.1573930526684746 ], [ 46.57983585730082, -3.0925607916217284 ] ] }, { "type": "line", "version": 4081, "versionNonce": 1110355320, "isDeleted": false, "id": "-52Udo1sfovtw-gxgUT91", "fillStyle": "solid", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 790.0311273185948, "y": 86.01954258360246, "strokeColor": "#03a9f4", "backgroundColor": "#fff", "width": 45.567415680676426, "height": 2.8032978840147194, "seed": 923586104, "groupIds": [ "zRT55AcPrMo-vI2M6Q7JX" ], "strokeSharpness": "round", "boundElements": [], "updated": 1641741755943, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": null, "points": [ [ 0, 0 ], [ 16.832548902953302, -2.8032978840147194 ], [ 45.567415680676426, -0.3275477042019195 ] ] }, { "type": "line", "version": 4106, "versionNonce": 1064764936, "isDeleted": false, "id": "7Tgba-Lgh4JYVwHG-MlYn", "fillStyle": "solid", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 789.9545278022254, "y": 153.41391685395956, "strokeColor": "#03a9f4", "backgroundColor": "#fff", "width": 48.33668263438425, "height": 4.280657518731036, "seed": 664293704, "groupIds": [ "zRT55AcPrMo-vI2M6Q7JX" ], "strokeSharpness": "round", "boundElements": [], "updated": 1641741755943, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": null, "points": [ [ 0, 0 ], [ 26.41225578429045, -0.2552319773002338 ], [ 37.62000339651456, 2.3153712935189787 ], [ 48.33668263438425, -1.9652862252120569 ] ] }, { "type": "line", "version": 4143, "versionNonce": 1542078072, "isDeleted": false, "id": "KsHbDZNNeZoS7zbhCYNon", "fillStyle": "solid", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 786.770480620695, "y": 164.65706526223835, "strokeColor": "#03a9f4", "backgroundColor": "#fff", "width": 54.40694982784246, "height": 2.9096445412231735, "seed": 2043037496, "groupIds": [ "zRT55AcPrMo-vI2M6Q7JX" ], "strokeSharpness": "round", "boundElements": [], "updated": 1641741755943, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": null, "points": [ [ 0, 0 ], [ 10.166093050596771, -1.166642430373031 ], [ 16.130660965377448, -0.8422655250909383 ], [ 46.26079588567538, 0.6125567455206506 ], [ 54.40694982784246, -2.297087795702523 ] ] }, { "type": "line", "version": 4108, "versionNonce": 1572406536, "isDeleted": false, "id": "R5nE2rgdgOpe93Q8VvYhl", "fillStyle": "solid", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 787.0553235162608, "y": 101.08014413686368, "strokeColor": "#03a9f4", "backgroundColor": "#fff", "width": 46.92865289294453, "height": 2.4757501798128, "seed": 571402312, "groupIds": [ "zRT55AcPrMo-vI2M6Q7JX" ], "strokeSharpness": "round", "boundElements": [], "updated": 1641741755943, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": null, "points": [ [ 0, 0 ], [ 18.193786115221407, -0.5912874140789839 ], [ 46.92865289294453, 1.884462765733816 ] ] }, { "type": "line", "version": 4123, "versionNonce": 973844344, "isDeleted": false, "id": "lfWLpL8lTqrf2GMoqJY8y", "fillStyle": "solid", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 787.1628117124112, "y": 135.5634731727032, "strokeColor": "#03a9f4", "backgroundColor": "#fff", "width": 46.92865289294453, "height": 2.4757501798128, "seed": 2028511288, "groupIds": [ "zRT55AcPrMo-vI2M6Q7JX" ], "strokeSharpness": "round", "boundElements": [], "updated": 1641741755943, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": null, "points": [ [ 0, 0 ], [ 8.093938105125233, 1.4279702913643746 ], [ 18.193786115221407, -0.5912874140789839 ], [ 46.92865289294453, 1.884462765733816 ] ] }, { "type": "line", "version": 3006, "versionNonce": 23274504, "isDeleted": false, "id": "8X7QMyL8Lv5BuDjYinx6w", "fillStyle": "solid", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 1.5707963267948957, "x": 1013.7787705852069, "y": 125.15514857126914, "strokeColor": "#ff7043", "backgroundColor": "transparent", "width": 50.7174766392476, "height": 12.698053371678215, "seed": 1906238776, "groupIds": [ "3orQz7tgci8hTh4ECgIAF" ], "strokeSharpness": "round", "boundElements": [], "updated": 1641741755943, "startBinding": null, "endBinding": null, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": null, "points": [ [ 0, 0 ], [ 1.3361877396713384, 5.061656285093649 ], [ 7.098613049589299, 9.308339392337079 ], [ 14.766422451441104, 11.880105003906111 ], [ 26.779003528407447, 12.114458425450186 ], [ 40.79727342221974, 10.477131310135727 ], [ 48.98410145879092, 4.5205722256349645 ], [ 50.7174766392476, -0.5835949462280285 ] ] }, { "type": "line", "version": 5373, "versionNonce": 349634680, "isDeleted": false, "id": "A4PSBtsGIGM5bIdhtvs22", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 1.5707963267948957, "x": 1038.306180989564, "y": 66.05019201370396, "strokeColor": "#ff7043", "backgroundColor": "#ff7043", "width": 52.317507746132115, "height": 154.56722543646003, "seed": 1241568072, "groupIds": [ "3orQz7tgci8hTh4ECgIAF" ], "strokeSharpness": "round", "boundElements": [], "updated": 1641741755943, "startBinding": null, "endBinding": null, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": null, "points": [ [ 0, 0 ], [ 0.1725162731731403, 116.82107121890462 ], [ 0.008073356596080744, 130.12064288619618 ], [ 2.694467356765587, 135.86722334556717 ], [ 12.049700419984944, 140.7306911579349 ], [ 27.86269432990675, 142.2451023824676 ], [ 42.97096432627199, 139.82712302629713 ], [ 50.998099415101294, 134.0445691284302 ], [ 52.13021819883883, 129.16981553143583 ], [ 52.28922018370778, 118.46242736729553 ], [ 52.16442211418855, 9.800635828982735 ], [ 51.88308720685254, -0.46590137319512337 ], [ 48.52377541516831, -6.203936550968926 ], [ 41.449781688403746, -9.527102901326515 ], [ 25.329105770103325, -12.322123053992442 ], [ 12.404412180527922, -10.655446243436785 ], [ 2.239228448568725, -5.00228186200372 ], [ -0.028287562424331975, -0.07019354973772352 ], [ 0, 0 ] ] }, { "type": "line", "version": 3140, "versionNonce": 141648648, "isDeleted": false, "id": "w17jKx2sTAqGcj5vlmOMy", "fillStyle": "solid", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 1.5707963267948957, "x": 1059.615995470046, "y": 125.12063304384947, "strokeColor": "#ff7043", "backgroundColor": "transparent", "width": 50.57247907260371, "height": 11.002162473203482, "seed": 672606792, "groupIds": [ "3orQz7tgci8hTh4ECgIAF" ], "strokeSharpness": "round", "boundElements": [], "updated": 1641741755944, "startBinding": null, "endBinding": null, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": null, "points": [ [ 0, 0 ], [ 1.332367676378171, 4.385645831062678 ], [ 7.078318632616268, 8.065162379820556 ], [ 14.724206326638113, 10.293455353024548 ], [ 26.70244431044034, 10.496509659421628 ], [ 40.68063699304561, 9.07785607393191 ], [ 48.84405948536458, 3.9168263545984514 ], [ 50.57247907260371, -0.5056528137818528 ] ] }, { "type": "ellipse", "version": 6107, "versionNonce": 2081436024, "isDeleted": false, "id": "sxpRnxFiIOgK8uOIc81pk", "fillStyle": "solid", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 1.5707963267948957, "x": 1105.8093248657638, "y": 119.64103777538094, "strokeColor": "#ff7043", "backgroundColor": "#fff", "width": 51.27812853552538, "height": 22.797152568995934, "seed": 1744666168, "groupIds": [ "3orQz7tgci8hTh4ECgIAF" ], "strokeSharpness": "sharp", "boundElements": [ { "type": "arrow", "id": "bxuMGTzXLn7H-uBCptINx" }, { "id": "ppSKhzeVi1x47qPOxgohP", "type": "arrow" }, { "id": "IEnqZv0JAf73xE5G5LY1s", "type": "arrow" } ], "updated": 1641741755944 }, { "type": "text", "version": 903, "versionNonce": 828359176, "isDeleted": false, "id": "DAMG-SGC-r93uwzYgaFaM", "fillStyle": "hachure", "strokeWidth": 4, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 961, "y": 165.3106382978724, "strokeColor": "#ff7043", "backgroundColor": "transparent", "width": 213, "height": 80, "seed": 905916728, "groupIds": [], "strokeSharpness": "sharp", "boundElements": [ { "id": "ppSKhzeVi1x47qPOxgohP", "type": "arrow" } ], "updated": 1641741755944, "fontSize": 16.51063829787236, "fontFamily": 3, "text": "[0.114, 0.344, 0.589],\n[0.894, 0.448, 0.385],\n[0.639, 0.785, 0.546],\n[0.789, 0.248, 0.187]", "baseline": 76, "textAlign": "left", "verticalAlign": "top", "containerId": null, "originalText": "[0.114, 0.344, 0.589],\n[0.894, 0.448, 0.385],\n[0.639, 0.785, 0.546],\n[0.789, 0.248, 0.187]" }, { "type": "rectangle", "version": 3994, "versionNonce": 1727742584, "isDeleted": false, "id": "0kwQuYpKaDvZuuKsvL7r5", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 1392.3414205264805, "y": 66.34139380028421, "strokeColor": "#00e676", "backgroundColor": "#d0d9dd", "width": 74.26505724978897, "height": 53.22792745407822, "seed": 1723506296, "groupIds": [], "strokeSharpness": "sharp", "boundElements": [ { "type": "arrow", "id": "XV1prKL4Cu6Tu_cyztWSc" } ], "updated": 1641741755944 }, { "type": "text", "version": 3911, "versionNonce": 187318536, "isDeleted": false, "id": "4eS6gRMMGH5z1mWIMwlWa", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 1406.4841911219582, "y": 64.65315479829201, "strokeColor": "#00e676", "backgroundColor": "#d0d9dd", "width": 47.44228316559144, "height": 24.299706011644403, "seed": 1855890696, "groupIds": [], "strokeSharpness": "sharp", "boundElements": [], "updated": 1641741755945, "fontSize": 18.51406172315763, "fontFamily": 1, "text": "Text", "baseline": 17.299706011644403, "textAlign": "center", "verticalAlign": "top", "containerId": null, "originalText": "Text" }, { "type": "rectangle", "version": 3610, "versionNonce": 1146893176, "isDeleted": false, "id": "PVROaX-PjeUDmh4MQbTl8", "fillStyle": "solid", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 1250.6356656002554, "y": 86.0327898598672, "strokeColor": "#00e676", "backgroundColor": "#ffffff", "width": 224.03197101896893, "height": 53.22792745407822, "seed": 1271926648, "groupIds": [], "strokeSharpness": "sharp", "boundElements": [], "updated": 1641741755945 }, { "type": "rectangle", "version": 3670, "versionNonce": 1699536904, "isDeleted": false, "id": "ugOritv0_fPYbcZUm6Uu4", "fillStyle": "solid", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 1250.8971808651277, "y": 111.8916716422263, "strokeColor": "#00e676", "backgroundColor": "#ffffff", "width": 224.03197101896893, "height": 26.01198231582164, "seed": 613729288, "groupIds": [], "strokeSharpness": "sharp", "boundElements": [], "updated": 1641741755945 }, { "type": "rectangle", "version": 3765, "versionNonce": 175572088, "isDeleted": false, "id": "eio4Ow124ISNuI0VAUUjS", "fillStyle": "solid", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 1250.7639028142198, "y": 137.72914909405728, "strokeColor": "#00e676", "backgroundColor": "#ffffff", "width": 224.03197101896893, "height": 26.01198231582164, "seed": 789536888, "groupIds": [], "strokeSharpness": "sharp", "boundElements": [], "updated": 1641741755945 }, { "type": "rectangle", "version": 3856, "versionNonce": 1069784840, "isDeleted": false, "id": "_6fRtYyVCjDNRCEwdM5Cy", "fillStyle": "solid", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 1250.5409710428607, "y": 163.83262915775464, "strokeColor": "#00e676", "backgroundColor": "#ffffff", "width": 224.03197101896893, "height": 26.01198231582164, "seed": 342819592, "groupIds": [], "strokeSharpness": "sharp", "boundElements": [], "updated": 1641741755945 }, { "type": "rectangle", "version": 3800, "versionNonce": 1192211832, "isDeleted": false, "id": "ZqV86lTip0q28AN63sD4l", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 1251.72784114917, "y": 86.97311738448298, "strokeColor": "#00e676", "backgroundColor": "#e6e6e7", "width": 63.93385920866094, "height": 24.3311416053828, "seed": 1883017592, "groupIds": [], "strokeSharpness": "sharp", "boundElements": [], "updated": 1641741755945 }, { "type": "rectangle", "version": 3844, "versionNonce": 990543368, "isDeleted": false, "id": "YChnGbrEsJ7DoNKmra0ci", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 1251.9893564140477, "y": 112.83199916684202, "strokeColor": "#00e676", "backgroundColor": "#e6e6e7", "width": 63.93385920866094, "height": 24.3311416053828, "seed": 749298184, "groupIds": [], "strokeSharpness": "sharp", "boundElements": [], "updated": 1641741755945 }, { "type": "rectangle", "version": 3953, "versionNonce": 795494008, "isDeleted": false, "id": "2izY3o3TkA3Cx67YfzF9f", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 1251.8560783631362, "y": 138.66947661867334, "strokeColor": "#00e676", "backgroundColor": "#e6e6e7", "width": 63.93385920866094, "height": 24.3311416053828, "seed": 1453040248, "groupIds": [], "strokeSharpness": "sharp", "boundElements": [], "updated": 1641741755945 }, { "type": "rectangle", "version": 4044, "versionNonce": 136150280, "isDeleted": false, "id": "Va8ZaCNlht0FpfumBHJKw", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 1251.633146591779, "y": 164.77295668237002, "strokeColor": "#00e676", "backgroundColor": "#e6e6e7", "width": 63.93385920866094, "height": 24.3311416053828, "seed": 742383880, "groupIds": [], "strokeSharpness": "sharp", "boundElements": [], "updated": 1641741755945 }, { "type": "text", "version": 3621, "versionNonce": 810203000, "isDeleted": false, "id": "sVvye4L-cb_iVCic8Mm8d", "fillStyle": "solid", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 1254.7093435648121, "y": 98.52504819457317, "strokeColor": "#00e676", "backgroundColor": "#ffffff", "width": 32.130700199808395, "height": 12.657548563560873, "seed": 552637304, "groupIds": [], "strokeSharpness": "sharp", "boundElements": [], "updated": 1641741755946, "fontSize": 9.73657581812376, "fontFamily": 1, "text": "____", "baseline": 8.657548563560873, "textAlign": "left", "verticalAlign": "top", "containerId": null, "originalText": "" }, { "type": "text", "version": 3688, "versionNonce": 1484299272, "isDeleted": false, "id": "Kmc9TdtkzFXb1_8VdBftg", "fillStyle": "solid", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 1254.9708588296896, "y": 124.38392997693245, "strokeColor": "#00e676", "backgroundColor": "#ffffff", "width": 32.130700199808395, "height": 12.657548563560873, "seed": 313425928, "groupIds": [], "strokeSharpness": "sharp", "boundElements": [ { "id": "IEnqZv0JAf73xE5G5LY1s", "type": "arrow" } ], "updated": 1641741755946, "fontSize": 9.73657581812376, "fontFamily": 1, "text": "____", "baseline": 8.657548563560873, "textAlign": "left", "verticalAlign": "top", "containerId": null, "originalText": "" }, { "type": "text", "version": 3782, "versionNonce": 2113210488, "isDeleted": false, "id": "4eUPUHqH2EOhDN59TKy-H", "fillStyle": "solid", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 1254.837580778779, "y": 150.22140742876354, "strokeColor": "#00e676", "backgroundColor": "#ffffff", "width": 32.130700199808395, "height": 12.657548563560873, "seed": 360670328, "groupIds": [], "strokeSharpness": "sharp", "boundElements": [ { "id": "IEnqZv0JAf73xE5G5LY1s", "type": "arrow" } ], "updated": 1641741755946, "fontSize": 9.73657581812376, "fontFamily": 1, "text": "____", "baseline": 8.657548563560873, "textAlign": "left", "verticalAlign": "top", "containerId": null, "originalText": "" }, { "type": "text", "version": 3872, "versionNonce": 1310186248, "isDeleted": false, "id": "b-QV7ztzPBTYvkYvn2n8j", "fillStyle": "solid", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 1254.614649007421, "y": 176.32488749246102, "strokeColor": "#00e676", "backgroundColor": "#ffffff", "width": 32.130700199808395, "height": 12.657548563560873, "seed": 267468552, "groupIds": [], "strokeSharpness": "sharp", "boundElements": [], "updated": 1641741755946, "fontSize": 9.73657581812376, "fontFamily": 1, "text": "____", "baseline": 8.657548563560873, "textAlign": "left", "verticalAlign": "top", "containerId": null, "originalText": "" }, { "type": "text", "version": 3901, "versionNonce": 606943608, "isDeleted": false, "id": "4KBkw2hbsRceHQmpPy7CU", "fillStyle": "solid", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 1320.8795641602012, "y": 98.59324391505857, "strokeColor": "#00e676", "backgroundColor": "#ffffff", "width": 39.91996085430739, "height": 12.657548563560873, "seed": 302781816, "groupIds": [], "strokeSharpness": "sharp", "boundElements": [], "updated": 1641741755946, "fontSize": 9.736575818123757, "fontFamily": 1, "text": "_____", "baseline": 8.657548563560873, "textAlign": "left", "verticalAlign": "top", "containerId": null, "originalText": "" }, { "type": "text", "version": 3967, "versionNonce": 1802975752, "isDeleted": false, "id": "ypoyWHhBrPKGhPAUDIGbn", "fillStyle": "solid", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 1321.141079425078, "y": 124.4521256974175, "strokeColor": "#00e676", "backgroundColor": "#ffffff", "width": 39.91996085430739, "height": 12.657548563560873, "seed": 1954815496, "groupIds": [], "strokeSharpness": "sharp", "boundElements": [], "updated": 1641741755946, "fontSize": 9.736575818123757, "fontFamily": 1, "text": "_____", "baseline": 8.657548563560873, "textAlign": "left", "verticalAlign": "top", "containerId": null, "originalText": "" }, { "type": "text", "version": 4061, "versionNonce": 372722296, "isDeleted": false, "id": "1jfyF2ZuAzbvS4YBl80za", "fillStyle": "solid", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 1321.0078013741684, "y": 150.28960314924905, "strokeColor": "#00e676", "backgroundColor": "#ffffff", "width": 39.91996085430739, "height": 12.657548563560873, "seed": 467314296, "groupIds": [], "strokeSharpness": "sharp", "boundElements": [], "updated": 1641741755946, "fontSize": 9.736575818123757, "fontFamily": 1, "text": "_____", "baseline": 8.657548563560873, "textAlign": "left", "verticalAlign": "top", "containerId": null, "originalText": "" }, { "type": "text", "version": 4152, "versionNonce": 59947272, "isDeleted": false, "id": "7coLrbQfexZARdWa0iB51", "fillStyle": "solid", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 1320.7848696028097, "y": 176.39308321294527, "strokeColor": "#00e676", "backgroundColor": "#ffffff", "width": 39.91996085430739, "height": 12.657548563560873, "seed": 560143624, "groupIds": [], "strokeSharpness": "sharp", "boundElements": [], "updated": 1641741755946, "fontSize": 9.736575818123757, "fontFamily": 1, "text": "_____", "baseline": 8.657548563560873, "textAlign": "left", "verticalAlign": "top", "containerId": null, "originalText": "" }, { "type": "text", "version": 282, "versionNonce": 1054292856, "isDeleted": false, "id": "uG1gxQCUX_W4xoK8WjzqS", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 0, "opacity": 100, "angle": 0, "x": 762, "y": 202, "strokeColor": "#03a9f4", "backgroundColor": "#03a9f4", "width": 117, "height": 26, "seed": 657172488, "groupIds": [], "strokeSharpness": "round", "boundElements": [], "updated": 1641741755946, "fontSize": 20, "fontFamily": 1, "text": "Input Data", "baseline": 18, "textAlign": "left", "verticalAlign": "top", "containerId": null, "originalText": "Input Data" }, { "type": "arrow", "version": 312, "versionNonce": 702961672, "isDeleted": false, "id": "3DJ_wB8Ty8UNX4KAZfwjT", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 878.680938765565, "y": 132.68136619602433, "strokeColor": "#03a9f4", "backgroundColor": "#ff7043", "width": 104.88363252862348, "height": 0.6395755770326446, "seed": 1663624312, "groupIds": [], "strokeSharpness": "round", "boundElements": [], "updated": 1641741755946, "startBinding": { "elementId": "9itz4bqXi6IMeZhGhc42J", "focus": -0.09772881248040827, "gap": 14.870387259234633 }, "endBinding": null, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": "arrow", "points": [ [ 0, 0 ], [ 104.88363252862348, 0.6395755770326446 ] ] }, { "type": "arrow", "version": 444, "versionNonce": 273747064, "isDeleted": false, "id": "IEnqZv0JAf73xE5G5LY1s", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 1148.3291790735977, "y": 134.9159571720989, "strokeColor": "#00e676", "backgroundColor": "#ff7043", "width": 100.9588566647617, "height": 1.6503135268827123, "seed": 2055241224, "groupIds": [], "strokeSharpness": "round", "boundElements": [], "updated": 1641741755946, "startBinding": { "elementId": "sxpRnxFiIOgK8uOIc81pk", "focus": 0.16194715672100898, "gap": 5.601483341858389 }, "endBinding": { "elementId": "Kmc9TdtkzFXb1_8VdBftg", "focus": -0.3333798833901454, "gap": 5.682823091330192 }, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": "arrow", "points": [ [ 0, 0 ], [ 100.9588566647617, -1.6503135268827123 ] ] }, { "type": "text", "version": 247, "versionNonce": 577640200, "isDeleted": false, "id": "g9t3YfCk9S7S38pod_Itm", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "dashed", "roughness": 1, "opacity": 100, "angle": 0, "x": 1007.5, "y": 71, "strokeColor": "#ff7043", "backgroundColor": "#7950f2", "width": 107, "height": 36, "seed": 1032706680, "groupIds": [], "strokeSharpness": "round", "boundElements": [], "updated": 1641741755946, "fontSize": 28, "fontFamily": 1, "text": "Process", "baseline": 25, "textAlign": "left", "verticalAlign": "top", "containerId": null, "originalText": "Process" }, { "type": "line", "version": 67, "versionNonce": 342408568, "isDeleted": false, "id": "agZFvlVOC2laS2bFwE9Rc", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "dashed", "roughness": 1, "opacity": 100, "angle": 0, "x": 1079.5, "y": 90, "strokeColor": "#ff7043", "backgroundColor": "#7950f2", "width": 0, "height": 0, "seed": 1617963896, "groupIds": [], "strokeSharpness": "round", "boundElements": [], "updated": 1641741755947, "startBinding": null, "endBinding": null, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": null, "points": [ [ 0, 0 ], [ 0, 0 ] ] } ], "appState": { "gridSize": null, "viewBackgroundColor": "#ffffff" }, "files": {} } ================================================ FILE: docs/images/query.excalidraw ================================================ { "type": "excalidraw", "version": 2, "source": "https://excalidraw.com", "elements": [ { "type": "rectangle", "version": 3038, "versionNonce": 1386987419, "isDeleted": false, "id": "pjs0X-Y0kuuXB-vpf_dvP", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 934.5, "y": -240.29250000000002, "strokeColor": "#ffeb3b", "backgroundColor": "#ffeb3b", "width": 312, "height": 146, "seed": 1534167490, "groupIds": [], "strokeSharpness": "sharp", "boundElements": [ { "id": "of5l7HpIH0kPvZ95rOMwM", "type": "text" }, { "id": "Qzp41i_jzQIBlAB_qFKFH", "type": "arrow" }, { "id": "of5l7HpIH0kPvZ95rOMwM", "type": "text" }, { "type": "text", "id": "of5l7HpIH0kPvZ95rOMwM" } ], "updated": 1669033845173, "link": null, "locked": false }, { "type": "text", "version": 2554, "versionNonce": 1319492501, "isDeleted": false, "id": "of5l7HpIH0kPvZ95rOMwM", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 939.5, "y": -235.29250000000002, "strokeColor": "#000", "backgroundColor": "#fa5252", "width": 260, "height": 130, "seed": 1972792158, "groupIds": [], "strokeSharpness": "sharp", "boundElements": [], "updated": 1669033845173, "link": null, "locked": false, "fontSize": 20, "fontFamily": 1, "text": "SELECT\n count(*), flag\nFROM txtai\nGROUP BY flag\nORDER BY count(*) DESC", "baseline": 122, "textAlign": "left", "verticalAlign": "top", "containerId": "pjs0X-Y0kuuXB-vpf_dvP", "originalText": "SELECT\n count(*), flag\nFROM txtai\nGROUP BY flag\nORDER BY count(*) DESC" }, { "type": "rectangle", "version": 3354, "versionNonce": 1004999739, "isDeleted": false, "id": "xMKXZw8aL27lv21eVGy_C", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 581, "y": -238.29250000000002, "strokeColor": "#03a9f4", "backgroundColor": "#03a9f4", "width": 302, "height": 140, "seed": 1704428866, "groupIds": [], "strokeSharpness": "sharp", "boundElements": [ { "id": "tlu8eCXVp5ApnEG74CF38", "type": "text" }, { "id": "Qzp41i_jzQIBlAB_qFKFH", "type": "arrow" }, { "type": "text", "id": "tlu8eCXVp5ApnEG74CF38" } ], "updated": 1669033845173, "link": null, "locked": false }, { "type": "text", "version": 2750, "versionNonce": 686792949, "isDeleted": false, "id": "tlu8eCXVp5ApnEG74CF38", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 586, "y": -233.29250000000002, "strokeColor": "#000", "backgroundColor": "#fa5252", "width": 290, "height": 130, "seed": 92268510, "groupIds": [], "strokeSharpness": "sharp", "boundElements": [], "updated": 1669033845173, "link": null, "locked": false, "fontSize": 20, "fontFamily": 1, "text": "SELECT\n text, flag, entry\nFROM txtai\nWHERE\n similar('text') AND flag = 1", "baseline": 122, "textAlign": "left", "verticalAlign": "top", "containerId": "xMKXZw8aL27lv21eVGy_C", "originalText": "SELECT\n text, flag, entry\nFROM txtai\nWHERE\n similar('text') AND flag = 1" }, { "type": "rectangle", "version": 3169, "versionNonce": 1477367003, "isDeleted": false, "id": "Tsn_ukSxtC7CIwEMUMHxI", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 1301.5, "y": -236.79250000000002, "strokeColor": "#00e676", "backgroundColor": "#00e676", "width": 304, "height": 140, "seed": 919682718, "groupIds": [], "strokeSharpness": "sharp", "boundElements": [ { "id": "t3OC1akz9Oe2w6N8OCavg", "type": "text" }, { "id": "Qzp41i_jzQIBlAB_qFKFH", "type": "arrow" }, { "id": "t3OC1akz9Oe2w6N8OCavg", "type": "text" }, { "id": "t3OC1akz9Oe2w6N8OCavg", "type": "text" }, { "type": "text", "id": "t3OC1akz9Oe2w6N8OCavg" } ], "updated": 1669033845173, "link": null, "locked": false }, { "type": "text", "version": 2769, "versionNonce": 345619029, "isDeleted": false, "id": "t3OC1akz9Oe2w6N8OCavg", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 1306.5, "y": -231.79250000000002, "strokeColor": "#000", "backgroundColor": "#fa5252", "width": 259, "height": 130, "seed": 1222509122, "groupIds": [], "strokeSharpness": "sharp", "boundElements": [], "updated": 1669033845173, "link": null, "locked": false, "fontSize": 20, "fontFamily": 1, "text": "SELECT\n translation(text, 'fr')\nFROM txtai\nWHERE\n similar('feel good story')", "baseline": 122, "textAlign": "left", "verticalAlign": "top", "containerId": "Tsn_ukSxtC7CIwEMUMHxI", "originalText": "SELECT\n translation(text, 'fr')\nFROM txtai\nWHERE\n similar('feel good story')" } ], "appState": { "gridSize": null, "viewBackgroundColor": "#ffffff" }, "files": {} } ================================================ FILE: docs/images/rag.excalidraw ================================================ { "type": "excalidraw", "version": 2, "source": "https://excalidraw.com", "elements": [ { "type": "text", "version": 1836, "versionNonce": 917552614, "isDeleted": false, "id": "YCEwpQiKAjRS3zIG3_E8s", "fillStyle": "hachure", "strokeWidth": 2, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 2014.213462044321, "y": -342.9945000407677, "strokeColor": "#00e676", "backgroundColor": "transparent", "width": 54.983333587646484, "height": 36, "seed": 676249636, "groupIds": [ "j11_AWYuFwJUdzbrHyyU4" ], "frameId": null, "roundness": null, "boundElements": [], "updated": 1701521662211, "link": null, "locked": false, "fontSize": 28, "fontFamily": 1, "text": "LLM", "textAlign": "left", "verticalAlign": "middle", "containerId": null, "originalText": "LLM", "lineHeight": 1.2857142857142858, "baseline": 25 }, { "type": "line", "version": 3253, "versionNonce": 1479474042, "isDeleted": false, "id": "aFBJSnHUVo8ZDTb0uAkQ2", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 1746.3803586395986, "y": -463.34177128578517, "strokeColor": "#ffeb3b", "backgroundColor": "#ffeb3b", "width": 116.42036295658872, "height": 103.65107323746608, "seed": 1963890941, "groupIds": [ "j11_AWYuFwJUdzbrHyyU4" ], "frameId": null, "roundness": null, "boundElements": [], "updated": 1701521662211, "link": null, "locked": false, "startBinding": null, "endBinding": null, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": null, "points": [ [ 0, 0 ], [ -62.44191743896485, 19.19929080548739 ], [ -63.17668831316513, 79.43840749607878 ], [ -7.618334228588694, 103.65107323746608 ], [ 51.963117173367294, 79.15871076413049 ], [ 53.24367464342358, 21.28567723840068 ], [ 0, 0 ] ] }, { "type": "rectangle", "version": 2707, "versionNonce": 1122767142, "isDeleted": false, "id": "ZTKEryJuyeKfeG4nAI4_e", "fillStyle": "cross-hatch", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 1980.1453046789213, "y": -432.63793463623756, "strokeColor": "#00e676", "backgroundColor": "#00e676", "width": 23.299119994671287, "height": 58.247799986678125, "seed": 1188509340, "groupIds": [ "SFux9iuPQVutxOMe0wlW3", "j11_AWYuFwJUdzbrHyyU4" ], "frameId": null, "roundness": null, "boundElements": [ { "id": "5xuE_JZVT_PTcKy4YrPE8", "type": "arrow" } ], "updated": 1701521662211, "link": null, "locked": false }, { "type": "rectangle", "version": 2850, "versionNonce": 1047886906, "isDeleted": false, "id": "CckqOoWddQBHuvidbpk4P", "fillStyle": "cross-hatch", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 2028.4647375510242, "y": -487.74904809174427, "strokeColor": "#00e676", "backgroundColor": "#00e676", "width": 23.29911999467125, "height": 116.4955999733563, "seed": 331107492, "groupIds": [ "SFux9iuPQVutxOMe0wlW3", "j11_AWYuFwJUdzbrHyyU4" ], "frameId": null, "roundness": null, "boundElements": [], "updated": 1701521662211, "link": null, "locked": false }, { "type": "rectangle", "version": 2732, "versionNonce": 718994534, "isDeleted": false, "id": "J8ox4BBdzEg-04DGRL-jh", "fillStyle": "cross-hatch", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 2072.5083285134306, "y": -452.8003680997374, "strokeColor": "#00e676", "backgroundColor": "#00e676", "width": 23.29911999467127, "height": 81.54691998134939, "seed": 1316437916, "groupIds": [ "SFux9iuPQVutxOMe0wlW3", "j11_AWYuFwJUdzbrHyyU4" ], "frameId": null, "roundness": null, "boundElements": [], "updated": 1701521662211, "link": null, "locked": false }, { "type": "rectangle", "version": 2936, "versionNonce": 1101915386, "isDeleted": false, "id": "V54vdkZmQeI13AEahvNPV", "fillStyle": "cross-hatch", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 2114.182425101253, "y": -477.75421594683803, "strokeColor": "#00e676", "backgroundColor": "#00e676", "width": 23.29911999467125, "height": 104.84603997602068, "seed": 1025829916, "groupIds": [ "SFux9iuPQVutxOMe0wlW3", "j11_AWYuFwJUdzbrHyyU4" ], "frameId": null, "roundness": null, "boundElements": [ { "id": "tEBpXPxm5iq2pVW3uI-zN", "type": "arrow" }, { "id": "QwMjSCRfpekAf9are5X7i", "type": "arrow" } ], "updated": 1701521662211, "link": null, "locked": false }, { "type": "text", "version": 1533, "versionNonce": 1682603942, "isDeleted": false, "id": "iRbBQnNoV4xKl3KqmeMQk", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 1354.935979235434, "y": -344.1286480222933, "strokeColor": "#03a9f4", "backgroundColor": "transparent", "width": 172.6999969482422, "height": 70, "seed": 437964304, "groupIds": [ "j11_AWYuFwJUdzbrHyyU4" ], "frameId": null, "roundness": null, "boundElements": [], "updated": 1701521662211, "link": null, "locked": false, "fontSize": 28, "fontFamily": 1, "text": "Extract and\nIndex text", "textAlign": "center", "verticalAlign": "middle", "containerId": null, "originalText": "Extract and\nIndex text", "lineHeight": 1.25, "baseline": 60 }, { "type": "text", "version": 1816, "versionNonce": 995833274, "isDeleted": false, "id": "6MGKGOL-iHR1ALgkWdtgq", "fillStyle": "solid", "strokeWidth": 4, "strokeStyle": "solid", "roughness": 0, "opacity": 100, "angle": 0, "x": 1644.3134452350068, "y": -339.59711992736334, "strokeColor": "#fab005", "backgroundColor": "#00e676", "width": 192.14999389648438, "height": 36, "seed": 737085168, "groupIds": [ "j11_AWYuFwJUdzbrHyyU4" ], "frameId": null, "roundness": { "type": 2 }, "boundElements": [], "updated": 1701521662211, "link": null, "locked": false, "fontSize": 28, "fontFamily": 1, "text": "Vector Search", "textAlign": "center", "verticalAlign": "middle", "containerId": null, "originalText": "Vector Search", "lineHeight": 1.2857142857142858, "baseline": 25 }, { "type": "arrow", "version": 1734, "versionNonce": 593992422, "isDeleted": false, "id": "TekVO-JfHFlOug8RyHvF6", "fillStyle": "hachure", "strokeWidth": 4, "strokeStyle": "solid", "roughness": 0, "opacity": 100, "angle": 0, "x": 1495.4312181086514, "y": -410.21715689579304, "strokeColor": "#03a9f4", "backgroundColor": "#f8f9fa", "width": 183.6089623410503, "height": 1.4014353332532892, "seed": 1668067473, "groupIds": [ "j11_AWYuFwJUdzbrHyyU4" ], "frameId": null, "roundness": { "type": 2 }, "boundElements": [], "updated": 1701521662211, "link": null, "locked": false, "startBinding": { "elementId": "ZX3LSm-VHgqQG71nXy8Jp", "focus": 0.09017987719185833, "gap": 1.2826405001093732 }, "endBinding": null, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": "arrow", "points": [ [ 0, 0 ], [ 183.6089623410503, 1.4014353332532892 ] ] }, { "type": "arrow", "version": 1424, "versionNonce": 1554017914, "isDeleted": false, "id": "5xuE_JZVT_PTcKy4YrPE8", "fillStyle": "hachure", "strokeWidth": 4, "strokeStyle": "solid", "roughness": 0, "opacity": 100, "angle": 0, "x": 1802.8350046840696, "y": -410.21116509773026, "strokeColor": "#00e676", "backgroundColor": "#f8f9fa", "width": 170.5772610888498, "height": 0.7934282297927382, "seed": 2139598001, "groupIds": [ "j11_AWYuFwJUdzbrHyyU4" ], "frameId": null, "roundness": { "type": 2 }, "boundElements": [], "updated": 1701521662211, "link": null, "locked": false, "startBinding": null, "endBinding": { "elementId": "ZTKEryJuyeKfeG4nAI4_e", "focus": 0.19940295698482952, "gap": 6.73303890600198 }, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": "arrow", "points": [ [ 0, 0 ], [ 170.5772610888498, 0.7934282297927382 ] ] }, { "type": "rectangle", "version": 829, "versionNonce": 247418406, "isDeleted": false, "id": "ceYyi4FMiqSJFJi9xVvwk", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 2246.94133763324, "y": -461.9464695959758, "strokeColor": "#7950f2", "backgroundColor": "#7950f2", "width": 151.86184901372147, "height": 45, "seed": 1675993499, "groupIds": [ "j11_AWYuFwJUdzbrHyyU4" ], "frameId": null, "roundness": { "type": 3 }, "boundElements": [ { "type": "text", "id": "TKBvCE2CYBCmPq45Bjqws" }, { "id": "tEBpXPxm5iq2pVW3uI-zN", "type": "arrow" } ], "updated": 1701521662211, "link": null, "locked": false }, { "type": "text", "version": 805, "versionNonce": 977309498, "isDeleted": false, "id": "TKBvCE2CYBCmPq45Bjqws", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 2277.347260614222, "y": -456.9464695959758, "strokeColor": "#1e1e1e", "backgroundColor": "#ffeb3b", "width": 91.05000305175781, "height": 35, "seed": 1955981979, "groupIds": [ "j11_AWYuFwJUdzbrHyyU4" ], "frameId": null, "roundness": null, "boundElements": [], "updated": 1701521662211, "link": null, "locked": false, "fontSize": 28, "fontFamily": 1, "text": "Answer", "textAlign": "center", "verticalAlign": "middle", "containerId": "ceYyi4FMiqSJFJi9xVvwk", "originalText": "Answer", "lineHeight": 1.25, "baseline": 25 }, { "type": "rectangle", "version": 925, "versionNonce": 1933482342, "isDeleted": false, "id": "8kLHMUMRHF0HnBTmfmRbB", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 2248.212626691299, "y": -408.2942759824447, "strokeColor": "#ff7043", "backgroundColor": "#ff7043", "width": 150.58992984312886, "height": 45, "seed": 1950178933, "groupIds": [ "j11_AWYuFwJUdzbrHyyU4" ], "frameId": null, "roundness": { "type": 3 }, "boundElements": [ { "type": "text", "id": "Z9_eH0JKgQC5deMjnP4Nl" }, { "id": "QwMjSCRfpekAf9are5X7i", "type": "arrow" } ], "updated": 1701521662211, "link": null, "locked": false }, { "type": "text", "version": 909, "versionNonce": 1486788602, "isDeleted": false, "id": "Z9_eH0JKgQC5deMjnP4Nl", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 2268.8992580252175, "y": -403.2942759824447, "strokeColor": "#1e1e1e", "backgroundColor": "#ffeb3b", "width": 109.21666717529297, "height": 35, "seed": 2052310997, "groupIds": [ "j11_AWYuFwJUdzbrHyyU4" ], "frameId": null, "roundness": null, "boundElements": [], "updated": 1701521662211, "link": null, "locked": false, "fontSize": 28, "fontFamily": 1, "text": "Citation", "textAlign": "center", "verticalAlign": "middle", "containerId": "8kLHMUMRHF0HnBTmfmRbB", "originalText": "Citation", "lineHeight": 1.25, "baseline": 25 }, { "type": "arrow", "version": 2914, "versionNonce": 2016574630, "isDeleted": false, "id": "tEBpXPxm5iq2pVW3uI-zN", "fillStyle": "hachure", "strokeWidth": 4, "strokeStyle": "solid", "roughness": 0, "opacity": 100, "angle": 0, "x": 2145.854477497225, "y": -412.39564497927813, "strokeColor": "#7950f2", "backgroundColor": "#f8f9fa", "width": 98.40776781852423, "height": 26.51909525787792, "seed": 67586843, "groupIds": [ "j11_AWYuFwJUdzbrHyyU4" ], "frameId": null, "roundness": { "type": 2 }, "boundElements": [], "updated": 1701521662211, "link": null, "locked": false, "startBinding": { "elementId": "V54vdkZmQeI13AEahvNPV", "focus": 0.3299220128455593, "gap": 8.372932401300432 }, "endBinding": { "elementId": "ceYyi4FMiqSJFJi9xVvwk", "focus": 0.48070935597063635, "gap": 2.679092317490813 }, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": "arrow", "points": [ [ 0, 0 ], [ 98.40776781852423, -26.51909525787792 ] ] }, { "type": "arrow", "version": 3333, "versionNonce": 935168186, "isDeleted": false, "id": "QwMjSCRfpekAf9are5X7i", "fillStyle": "hachure", "strokeWidth": 4, "strokeStyle": "solid", "roughness": 0, "opacity": 100, "angle": 0, "x": 2145.973109719745, "y": -412.4720176121603, "strokeColor": "#ff7043", "backgroundColor": "#f8f9fa", "width": 99.14068321372724, "height": 28.4745045092954, "seed": 1135076219, "groupIds": [ "j11_AWYuFwJUdzbrHyyU4" ], "frameId": null, "roundness": { "type": 2 }, "boundElements": [], "updated": 1701521662211, "link": null, "locked": false, "startBinding": { "elementId": "V54vdkZmQeI13AEahvNPV", "focus": 0.12685170125624176, "gap": 8.491564623820295 }, "endBinding": { "elementId": "8kLHMUMRHF0HnBTmfmRbB", "focus": -0.5509825166646869, "gap": 3.098833757826924 }, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": "arrow", "points": [ [ 0, 0 ], [ 99.14068321372724, 28.4745045092954 ] ] }, { "type": "rectangle", "version": 5987, "versionNonce": 1397225446, "isDeleted": false, "id": "Wm6KQXptnXme2EhsZEkeR", "fillStyle": "solid", "strokeWidth": 2, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 1131.175427860874, "y": -462.1767369812002, "strokeColor": "#868e96", "backgroundColor": "#fff", "width": 55.277464194934204, "height": 83.88048030659263, "seed": 2083904347, "groupIds": [ "Qj7F6yX0WhR-X_WVj6a3Q", "j11_AWYuFwJUdzbrHyyU4" ], "frameId": null, "roundness": null, "boundElements": [], "updated": 1701521662211, "link": null, "locked": false }, { "type": "rectangle", "version": 6037, "versionNonce": 1598054778, "isDeleted": false, "id": "9qC6dzMnsEUYrUkMWcOVp", "fillStyle": "solid", "strokeWidth": 2, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 1125.2726075818707, "y": -467.6671236066981, "strokeColor": "#868e96", "backgroundColor": "#fff", "width": 55.277464194934204, "height": 83.88048030659263, "seed": 758626299, "groupIds": [ "LrzsLb2hfnEcKX352d6a4", "Qj7F6yX0WhR-X_WVj6a3Q", "j11_AWYuFwJUdzbrHyyU4" ], "frameId": null, "roundness": null, "boundElements": [], "updated": 1701521662211, "link": null, "locked": false }, { "type": "rectangle", "version": 6141, "versionNonce": 2089552678, "isDeleted": false, "id": "NSEdoJ3xQn_cWFxLphwMQ", "fillStyle": "solid", "strokeWidth": 2, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 1117.9247419835488, "y": -474.1224740500794, "strokeColor": "#868e96", "backgroundColor": "#fff", "width": 55.277464194934204, "height": 83.88048030659263, "seed": 1729429659, "groupIds": [ "Qj7F6yX0WhR-X_WVj6a3Q", "j11_AWYuFwJUdzbrHyyU4" ], "frameId": null, "roundness": null, "boundElements": [], "updated": 1701521662211, "link": null, "locked": false }, { "type": "line", "version": 5257, "versionNonce": 1785340474, "isDeleted": false, "id": "YEmnwZhbbK23DEmyJvwCv", "fillStyle": "solid", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 1127.2992831812242, "y": -435.9974349769433, "strokeColor": "#868e96", "backgroundColor": "#fff", "width": 36.42992238421251, "height": 2.541777233017194, "seed": 1400911163, "groupIds": [ "Qj7F6yX0WhR-X_WVj6a3Q", "j11_AWYuFwJUdzbrHyyU4" ], "frameId": null, "roundness": { "type": 2 }, "boundElements": [], "updated": 1701521662211, "link": null, "locked": false, "startBinding": null, "endBinding": null, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": null, "points": [ [ 0, 0 ], [ 31.61584953581463, 0.12309654138955267 ], [ 36.42992238421251, -2.4186806916276415 ] ] }, { "type": "line", "version": 5281, "versionNonce": 1651226214, "isDeleted": false, "id": "MEri2dzaVIq9n6tGJD7WN", "fillStyle": "solid", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 1128.7502692954959, "y": -460.6063232007082, "strokeColor": "#868e96", "backgroundColor": "#fff", "width": 35.638112199057964, "height": 2.1924492101548854, "seed": 1104926171, "groupIds": [ "Qj7F6yX0WhR-X_WVj6a3Q", "j11_AWYuFwJUdzbrHyyU4" ], "frameId": null, "roundness": { "type": 2 }, "boundElements": [], "updated": 1701521662211, "link": null, "locked": false, "startBinding": null, "endBinding": null, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": null, "points": [ [ 0, 0 ], [ 13.164676061582496, -2.1924492101548854 ], [ 35.638112199057964, -0.25617388343228015 ] ] }, { "type": "line", "version": 5306, "versionNonce": 337496826, "isDeleted": false, "id": "vgzKUuIqRDM9pNKM15LRR", "fillStyle": "solid", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 1128.6903610887653, "y": -407.89742505595746, "strokeColor": "#868e96", "backgroundColor": "#fff", "width": 37.803945940804155, "height": 3.34788687616908, "seed": 1899651707, "groupIds": [ "Qj7F6yX0WhR-X_WVj6a3Q", "j11_AWYuFwJUdzbrHyyU4" ], "frameId": null, "roundness": { "type": 2 }, "boundElements": [], "updated": 1701521662211, "link": null, "locked": false, "startBinding": null, "endBinding": null, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": null, "points": [ [ 0, 0 ], [ 20.65693041859134, -0.19961601306414317 ], [ 29.422469585925523, 1.8108435755749763 ], [ 37.803945940804155, -1.5370433005941033 ] ] }, { "type": "line", "version": 5343, "versionNonce": 498334118, "isDeleted": false, "id": "Cp8KP5gHGupNwbMjKHtU1", "fillStyle": "solid", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 1126.200129220062, "y": -399.10419905313154, "strokeColor": "#868e96", "backgroundColor": "#fff", "width": 42.55148011818046, "height": 2.27562254893163, "seed": 442391323, "groupIds": [ "Qj7F6yX0WhR-X_WVj6a3Q", "j11_AWYuFwJUdzbrHyyU4" ], "frameId": null, "roundness": { "type": 2 }, "boundElements": [], "updated": 1701521662211, "link": null, "locked": false, "startBinding": null, "endBinding": null, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": null, "points": [ [ 0, 0 ], [ 7.9508648746316855, -0.9124268560932918 ], [ 12.61573202565594, -0.658732843111803 ], [ 36.180402367882216, 0.47907843135401373 ], [ 42.55148011818046, -1.7965441175776165 ] ] }, { "type": "line", "version": 5309, "versionNonce": 1121868730, "isDeleted": false, "id": "Z_ssChLTRG5S_-bNEkKSc", "fillStyle": "solid", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 1126.4229038259869, "y": -448.82748079933765, "strokeColor": "#868e96", "backgroundColor": "#fff", "width": 36.70273093540022, "height": 1.9362753267226067, "seed": 968507323, "groupIds": [ "Qj7F6yX0WhR-X_WVj6a3Q", "j11_AWYuFwJUdzbrHyyU4" ], "frameId": null, "roundness": { "type": 2 }, "boundElements": [], "updated": 1701521662211, "link": null, "locked": false, "startBinding": null, "endBinding": null, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": null, "points": [ [ 0, 0 ], [ 14.229294797924695, -0.4624437635986851 ], [ 36.70273093540022, 1.4738315631239216 ] ] }, { "type": "line", "version": 5323, "versionNonce": 630314214, "isDeleted": false, "id": "s_UIbn_04wQnRoID1xAww", "fillStyle": "solid", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 1126.5069699582718, "y": -421.8581929690114, "strokeColor": "#868e96", "backgroundColor": "#fff", "width": 36.70273093540022, "height": 1.9362753267226052, "seed": 426807387, "groupIds": [ "Qj7F6yX0WhR-X_WVj6a3Q", "j11_AWYuFwJUdzbrHyyU4" ], "frameId": null, "roundness": { "type": 2 }, "boundElements": [], "updated": 1701521662211, "link": null, "locked": false, "startBinding": null, "endBinding": null, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": null, "points": [ [ 0, 0 ], [ 6.330239931623038, 1.1168104379056514 ], [ 14.229294797924746, -0.4624437635986836 ], [ 36.70273093540022, 1.4738315631239216 ] ] }, { "type": "text", "version": 1665, "versionNonce": 349099130, "isDeleted": false, "id": "KQzTCckYJwmcri2ZUXolF", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 1094.845592872532, "y": -343.81672500592333, "strokeColor": "#868e96", "backgroundColor": "transparent", "width": 144.43333435058594, "height": 35, "seed": 748785339, "groupIds": [ "j11_AWYuFwJUdzbrHyyU4" ], "frameId": null, "roundness": null, "boundElements": [], "updated": 1701521662211, "link": null, "locked": false, "fontSize": 28, "fontFamily": 1, "text": "Documents", "textAlign": "center", "verticalAlign": "middle", "containerId": null, "originalText": "Documents", "lineHeight": 1.25, "baseline": 25 }, { "type": "ellipse", "version": 407, "versionNonce": 59918374, "isDeleted": false, "id": "ZX3LSm-VHgqQG71nXy8Jp", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 1372.5849155794615, "y": -469.38832974419654, "strokeColor": "#03a9f4", "backgroundColor": "#03a9f4", "width": 121.85485206598776, "height": 107.68568322110525, "seed": 54693813, "groupIds": [ "j11_AWYuFwJUdzbrHyyU4" ], "frameId": null, "roundness": { "type": 2 }, "boundElements": [ { "id": "TekVO-JfHFlOug8RyHvF6", "type": "arrow" }, { "id": "ocOrefpIzXSYwlrYeB5zZ", "type": "arrow" } ], "updated": 1701521662211, "link": null, "locked": false }, { "type": "rectangle", "version": 6067, "versionNonce": 1806841146, "isDeleted": false, "id": "896_9tzrmBEIv0wPMOAA2", "fillStyle": "solid", "strokeWidth": 2, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 1159.008440355131, "y": -433.48589612410836, "strokeColor": "#868e96", "backgroundColor": "#fff", "width": 41.38251178619768, "height": 62.79566212875594, "seed": 866216059, "groupIds": [ "jC1bSinvdhon1lqLCwioC", "j11_AWYuFwJUdzbrHyyU4" ], "frameId": null, "roundness": null, "boundElements": [], "updated": 1701521662211, "link": null, "locked": false }, { "type": "rectangle", "version": 6117, "versionNonce": 128078694, "isDeleted": false, "id": "FyfaSNb5FLCEHf0iOIi1n", "fillStyle": "solid", "strokeWidth": 2, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 1154.5893966575723, "y": -437.5961785429837, "strokeColor": "#868e96", "backgroundColor": "#fff", "width": 41.38251178619768, "height": 62.79566212875594, "seed": 1173659931, "groupIds": [ "ZcBCWYu7yQph5fcMtQ5UB", "jC1bSinvdhon1lqLCwioC", "j11_AWYuFwJUdzbrHyyU4" ], "frameId": null, "roundness": null, "boundElements": [], "updated": 1701521662211, "link": null, "locked": false }, { "type": "rectangle", "version": 6222, "versionNonce": 2028699130, "isDeleted": false, "id": "uhf9NUQGgUhU8COKxhwFD", "fillStyle": "solid", "strokeWidth": 2, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 1149.0885449243297, "y": -442.4288643345245, "strokeColor": "#868e96", "backgroundColor": "#fff", "width": 41.38251178619768, "height": 62.79566212875594, "seed": 1721222587, "groupIds": [ "jC1bSinvdhon1lqLCwioC", "j11_AWYuFwJUdzbrHyyU4" ], "frameId": null, "roundness": null, "boundElements": [ { "id": "ocOrefpIzXSYwlrYeB5zZ", "type": "arrow" } ], "updated": 1701521662211, "link": null, "locked": false }, { "type": "line", "version": 5337, "versionNonce": 2081355430, "isDeleted": false, "id": "-9TDrRU-TUCFATZg1NUYi", "fillStyle": "solid", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 1156.1066320825057, "y": -413.8872171773836, "strokeColor": "#868e96", "backgroundColor": "#fff", "width": 27.272627541642883, "height": 1.902857300622384, "seed": 481121883, "groupIds": [ "jC1bSinvdhon1lqLCwioC", "j11_AWYuFwJUdzbrHyyU4" ], "frameId": null, "roundness": { "type": 2 }, "boundElements": [], "updated": 1701521662211, "link": null, "locked": false, "startBinding": null, "endBinding": null, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": null, "points": [ [ 0, 0 ], [ 23.668655664678663, 0.09215408393064761 ], [ 27.272627541642883, -1.8107032166917363 ] ] }, { "type": "line", "version": 5361, "versionNonce": 1232973498, "isDeleted": false, "id": "XvJ-iIG8KhdoW9fCxaP1l", "fillStyle": "solid", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 1157.1928875909657, "y": -432.31023317127756, "strokeColor": "#868e96", "backgroundColor": "#fff", "width": 26.679852623386196, "height": 1.6413389543326597, "seed": 1728566011, "groupIds": [ "jC1bSinvdhon1lqLCwioC", "j11_AWYuFwJUdzbrHyyU4" ], "frameId": null, "roundness": { "type": 2 }, "boundElements": [], "updated": 1701521662211, "link": null, "locked": false, "startBinding": null, "endBinding": null, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": null, "points": [ [ 0, 0 ], [ 9.855505678747075, -1.6413389543326597 ], [ 26.679852623386196, -0.19178012061240474 ] ] }, { "type": "line", "version": 5386, "versionNonce": 1501901286, "isDeleted": false, "id": "VxXBrorj_TWBXByFNwrzO", "fillStyle": "solid", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 1157.1480383543515, "y": -392.8506336675699, "strokeColor": "#868e96", "backgroundColor": "#fff", "width": 28.30126637038239, "height": 2.5063372593097366, "seed": 1177140123, "groupIds": [ "jC1bSinvdhon1lqLCwioC", "j11_AWYuFwJUdzbrHyyU4" ], "frameId": null, "roundness": { "type": 2 }, "boundElements": [], "updated": 1701521662211, "link": null, "locked": false, "startBinding": null, "endBinding": null, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": null, "points": [ [ 0, 0 ], [ 15.464451543932462, -0.14943905502267457 ], [ 22.0266199282381, 1.355656535634992 ], [ 28.30126637038239, -1.1506807236747445 ] ] }, { "type": "line", "version": 5423, "versionNonce": 1376503674, "isDeleted": false, "id": "Y5UL5WFItbSvBNwQhfIs9", "fillStyle": "solid", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 1155.283769593875, "y": -386.26773801560205, "strokeColor": "#868e96", "backgroundColor": "#fff", "width": 31.855425229005572, "height": 1.7036052272587878, "seed": 105222203, "groupIds": [ "jC1bSinvdhon1lqLCwioC", "j11_AWYuFwJUdzbrHyyU4" ], "frameId": null, "roundness": { "type": 2 }, "boundElements": [], "updated": 1701521662211, "link": null, "locked": false, "startBinding": null, "endBinding": null, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": null, "points": [ [ 0, 0 ], [ 5.952276649750224, -0.6830724903220912 ], [ 9.444548277434599, -0.49314888157492376 ], [ 27.08582872286446, 0.3586537320544714 ], [ 31.855425229005572, -1.3449514952043164 ] ] }, { "type": "line", "version": 5394, "versionNonce": 2064778534, "isDeleted": false, "id": "48QS3eGwek6u2meHCxeow", "fillStyle": "solid", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 1155.450545926502, "y": -423.49220774985906, "strokeColor": "#868e96", "backgroundColor": "#fff", "width": 30.39283293375309, "height": 1.5921546638326731, "seed": 1228787931, "groupIds": [ "jC1bSinvdhon1lqLCwioC", "j11_AWYuFwJUdzbrHyyU4" ], "frameId": null, "roundness": { "type": 2 }, "boundElements": [], "updated": 1701521662211, "link": null, "locked": false, "startBinding": null, "endBinding": null, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": null, "points": [ [ 0, 0 ], [ -2.9159720169125194, 1.5921546638326731 ], [ 27.47686091684057, 1.103358356250995 ] ] }, { "type": "line", "version": 5403, "versionNonce": 2061027386, "isDeleted": false, "id": "KCwYMxaGjkAMea5v0qmQT", "fillStyle": "solid", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 1155.513480573754, "y": -403.30211965956437, "strokeColor": "#868e96", "backgroundColor": "#fff", "width": 27.47686091684057, "height": 1.449558833720255, "seed": 1352990075, "groupIds": [ "jC1bSinvdhon1lqLCwioC", "j11_AWYuFwJUdzbrHyyU4" ], "frameId": null, "roundness": { "type": 2 }, "boundElements": [], "updated": 1701521662211, "link": null, "locked": false, "startBinding": null, "endBinding": null, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": null, "points": [ [ 0, 0 ], [ 4.739023983735066, 0.8360807027362631 ], [ 10.652513972201453, -0.3462004774692599 ], [ 27.47686091684057, 1.103358356250995 ] ] }, { "type": "arrow", "version": 2523, "versionNonce": 1377988710, "isDeleted": false, "id": "ocOrefpIzXSYwlrYeB5zZ", "fillStyle": "hachure", "strokeWidth": 4, "strokeStyle": "solid", "roughness": 0, "opacity": 100, "angle": 0, "x": 1205.9627879525642, "y": -406.5621788129673, "strokeColor": "#868e96", "backgroundColor": "#f8f9fa", "width": 164.6348419304046, "height": 1.709635034590832, "seed": 1351597109, "groupIds": [ "j11_AWYuFwJUdzbrHyyU4" ], "frameId": null, "roundness": { "type": 2 }, "boundElements": [], "updated": 1701521662211, "link": null, "locked": false, "startBinding": { "elementId": "uhf9NUQGgUhU8COKxhwFD", "focus": 0.1532483360492926, "gap": 15.491731242036735 }, "endBinding": { "elementId": "ZX3LSm-VHgqQG71nXy8Jp", "focus": -0.12294829093000738, "gap": 2.5175197483459826 }, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": "arrow", "points": [ [ 0, 0 ], [ 164.6348419304046, -1.709635034590832 ] ] } ], "appState": { "gridSize": null, "viewBackgroundColor": "#ffffff" }, "files": {} } ================================================ FILE: docs/images/schedule.excalidraw ================================================ { "type": "excalidraw", "version": 2, "source": "https://excalidraw.com", "elements": [ { "type": "rectangle", "version": 5213, "versionNonce": 290163336, "isDeleted": false, "id": "9itz4bqXi6IMeZhGhc42J", "fillStyle": "solid", "strokeWidth": 2, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 793.1319708150991, "y": 84.01158837080237, "strokeColor": "#03a9f4", "backgroundColor": "#fff", "width": 70.67858069123133, "height": 107.25081879410921, "seed": 2026142776, "groupIds": [ "zRT55AcPrMo-vI2M6Q7JX" ], "strokeSharpness": "sharp", "boundElements": [ { "type": "arrow", "id": "CFu0B4Mw_1wC1Hbgx8Fs0" }, { "type": "arrow", "id": "XIl_NhaFtRO00pX5Pq6VU" }, { "type": "arrow", "id": "EndiSTFlx1AT7vcBVjgve" }, { "id": "sbyEF_hTthhrIaA-47du2", "type": "arrow" }, { "id": "3DJ_wB8Ty8UNX4KAZfwjT", "type": "arrow" } ], "updated": 1643333366010 }, { "type": "rectangle", "version": 5260, "versionNonce": 586039800, "isDeleted": false, "id": "7HUYNqBGn8JbkXw6qt_jr", "fillStyle": "solid", "strokeWidth": 2, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 785.5845377793848, "y": 76.99149908508815, "strokeColor": "#03a9f4", "backgroundColor": "#fff", "width": 70.67858069123133, "height": 107.25081879410921, "seed": 1090159432, "groupIds": [ "6R5o6OnVZ7fFC_87zCo7G", "zRT55AcPrMo-vI2M6Q7JX" ], "strokeSharpness": "sharp", "boundElements": [ { "type": "arrow", "id": "CFu0B4Mw_1wC1Hbgx8Fs0" }, { "type": "arrow", "id": "XIl_NhaFtRO00pX5Pq6VU" }, { "type": "arrow", "id": "EndiSTFlx1AT7vcBVjgve" } ], "updated": 1643333366010 }, { "type": "rectangle", "version": 5356, "versionNonce": 2072574344, "isDeleted": false, "id": "mI_Mfw3cNZf0AuQhK5KUO", "fillStyle": "solid", "strokeWidth": 2, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 776.1894484936695, "y": 68.73759283508838, "strokeColor": "#03a9f4", "backgroundColor": "#fff", "width": 70.67858069123133, "height": 107.25081879410921, "seed": 1146631480, "groupIds": [ "zRT55AcPrMo-vI2M6Q7JX" ], "strokeSharpness": "sharp", "boundElements": [ { "type": "arrow", "id": "CFu0B4Mw_1wC1Hbgx8Fs0" }, { "type": "arrow", "id": "XIl_NhaFtRO00pX5Pq6VU" }, { "type": "arrow", "id": "EndiSTFlx1AT7vcBVjgve" } ], "updated": 1643333366010 }, { "type": "line", "version": 4486, "versionNonce": 415449848, "isDeleted": false, "id": "y3oKPiP36axXzEF0c7OiC", "fillStyle": "solid", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 788.1758750805805, "y": 117.48482988244103, "strokeColor": "#03a9f4", "backgroundColor": "#fff", "width": 46.57983585730082, "height": 3.249953844290203, "seed": 1952111176, "groupIds": [ "zRT55AcPrMo-vI2M6Q7JX" ], "strokeSharpness": "round", "boundElements": [], "updated": 1643333366010, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": null, "points": [ [ 0, 0 ], [ 40.42449133807562, 0.1573930526684746 ], [ 46.57983585730082, -3.0925607916217284 ] ] }, { "type": "line", "version": 4512, "versionNonce": 977307784, "isDeleted": false, "id": "-52Udo1sfovtw-gxgUT91", "fillStyle": "solid", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 790.0311273185948, "y": 86.01954258360246, "strokeColor": "#03a9f4", "backgroundColor": "#fff", "width": 45.567415680676426, "height": 2.8032978840147194, "seed": 923586104, "groupIds": [ "zRT55AcPrMo-vI2M6Q7JX" ], "strokeSharpness": "round", "boundElements": [], "updated": 1643333366010, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": null, "points": [ [ 0, 0 ], [ 16.832548902953302, -2.8032978840147194 ], [ 45.567415680676426, -0.3275477042019195 ] ] }, { "type": "line", "version": 4537, "versionNonce": 1850745848, "isDeleted": false, "id": "7Tgba-Lgh4JYVwHG-MlYn", "fillStyle": "solid", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 789.9545278022254, "y": 153.41391685395956, "strokeColor": "#03a9f4", "backgroundColor": "#fff", "width": 48.33668263438425, "height": 4.280657518731036, "seed": 664293704, "groupIds": [ "zRT55AcPrMo-vI2M6Q7JX" ], "strokeSharpness": "round", "boundElements": [], "updated": 1643333366010, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": null, "points": [ [ 0, 0 ], [ 26.41225578429045, -0.2552319773002338 ], [ 37.62000339651456, 2.3153712935189787 ], [ 48.33668263438425, -1.9652862252120569 ] ] }, { "type": "line", "version": 4574, "versionNonce": 2033072008, "isDeleted": false, "id": "KsHbDZNNeZoS7zbhCYNon", "fillStyle": "solid", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 786.770480620695, "y": 164.65706526223835, "strokeColor": "#03a9f4", "backgroundColor": "#fff", "width": 54.40694982784246, "height": 2.9096445412231735, "seed": 2043037496, "groupIds": [ "zRT55AcPrMo-vI2M6Q7JX" ], "strokeSharpness": "round", "boundElements": [], "updated": 1643333366011, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": null, "points": [ [ 0, 0 ], [ 10.166093050596771, -1.166642430373031 ], [ 16.130660965377448, -0.8422655250909383 ], [ 46.26079588567538, 0.6125567455206506 ], [ 54.40694982784246, -2.297087795702523 ] ] }, { "type": "line", "version": 4536, "versionNonce": 563668216, "isDeleted": false, "id": "R5nE2rgdgOpe93Q8VvYhl", "fillStyle": "solid", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 787.0553235162608, "y": 101.08014413686368, "strokeColor": "#03a9f4", "backgroundColor": "#fff", "width": 46.92865289294453, "height": 2.4757501798128, "seed": 571402312, "groupIds": [ "zRT55AcPrMo-vI2M6Q7JX" ], "strokeSharpness": "round", "boundElements": [], "updated": 1643333366011, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": null, "points": [ [ 0, 0 ], [ 18.193786115221407, -0.5912874140789839 ], [ 46.92865289294453, 1.884462765733816 ] ] }, { "type": "line", "version": 4550, "versionNonce": 210634376, "isDeleted": false, "id": "lfWLpL8lTqrf2GMoqJY8y", "fillStyle": "solid", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 787.1628117124112, "y": 135.5634731727032, "strokeColor": "#03a9f4", "backgroundColor": "#fff", "width": 46.92865289294453, "height": 2.4757501798128, "seed": 2028511288, "groupIds": [ "zRT55AcPrMo-vI2M6Q7JX" ], "strokeSharpness": "round", "boundElements": [], "updated": 1643333366011, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": null, "points": [ [ 0, 0 ], [ 8.093938105125233, 1.4279702913643746 ], [ 18.193786115221407, -0.5912874140789839 ], [ 46.92865289294453, 1.884462765733816 ] ] }, { "type": "text", "version": 600, "versionNonce": 1344374264, "isDeleted": false, "id": "uG1gxQCUX_W4xoK8WjzqS", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 0, "opacity": 100, "angle": 0, "x": 762, "y": 202, "strokeColor": "#03a9f4", "backgroundColor": "#03a9f4", "width": 117, "height": 26, "seed": 657172488, "groupIds": [], "strokeSharpness": "round", "boundElements": [], "updated": 1643333366011, "fontSize": 20, "fontFamily": 1, "text": "Input Data", "baseline": 18, "textAlign": "left", "verticalAlign": "top", "containerId": null, "originalText": "Input Data" }, { "type": "arrow", "version": 2896, "versionNonce": 1629222280, "isDeleted": false, "id": "3DJ_wB8Ty8UNX4KAZfwjT", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 878.680938765565, "y": 131.68136619602433, "strokeColor": "#03a9f4", "backgroundColor": "#ff7043", "width": 104.88363252862348, "height": 0.6395755770326446, "seed": 1663624312, "groupIds": [], "strokeSharpness": "round", "boundElements": [], "updated": 1643333366011, "startBinding": { "elementId": "9itz4bqXi6IMeZhGhc42J", "focus": -0.11630205093491493, "gap": 14.870387259234633 }, "endBinding": null, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": "arrow", "points": [ [ 0, 0 ], [ 104.88363252862348, 0.6395755770326446 ] ] }, { "type": "arrow", "version": 2644, "versionNonce": 1190004472, "isDeleted": false, "id": "IEnqZv0JAf73xE5G5LY1s", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 1132.8223412617328, "y": 135.16943809537315, "strokeColor": "#00e676", "backgroundColor": "#ff7043", "width": 116.4656944766266, "height": 1.9037944501569655, "seed": 2055241224, "groupIds": [], "strokeSharpness": "round", "boundElements": [], "updated": 1643333366011, "startBinding": null, "endBinding": { "elementId": "lIucERyh1MMe7Vzo8_q7d", "focus": -0.22308504197782658, "gap": 15.365988697358375 }, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": "arrow", "points": [ [ 0, 0 ], [ 116.4656944766266, -1.9037944501569655 ] ] }, { "type": "text", "version": 859, "versionNonce": 1813251208, "isDeleted": false, "id": "g9t3YfCk9S7S38pod_Itm", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "dashed", "roughness": 1, "opacity": 100, "angle": 0, "x": 1023.5, "y": 66, "strokeColor": "#ff7043", "backgroundColor": "#7950f2", "width": 75, "height": 36, "seed": 1032706680, "groupIds": [], "strokeSharpness": "round", "boundElements": [], "updated": 1643333366011, "fontSize": 28, "fontFamily": 1, "text": "Timer", "baseline": 25, "textAlign": "left", "verticalAlign": "top", "containerId": null, "originalText": "Timer" }, { "type": "rectangle", "version": 5052, "versionNonce": 310536184, "isDeleted": false, "id": "qfqvZwfY8g2maDsv8tzi-", "fillStyle": "solid", "strokeWidth": 2, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 1281.5965467571473, "y": 81.99348491888458, "strokeColor": "#00e676", "backgroundColor": "#fff", "width": 70.67858069123133, "height": 107.25081879410921, "seed": 320002220, "groupIds": [ "33LizGdDAL7lJNoiSiJZy" ], "strokeSharpness": "sharp", "boundElements": [ { "type": "arrow", "id": "CFu0B4Mw_1wC1Hbgx8Fs0" }, { "type": "arrow", "id": "XIl_NhaFtRO00pX5Pq6VU" }, { "type": "arrow", "id": "EndiSTFlx1AT7vcBVjgve" }, { "id": "sbyEF_hTthhrIaA-47du2", "type": "arrow" }, { "id": "3DJ_wB8Ty8UNX4KAZfwjT", "type": "arrow" } ], "updated": 1643333366011 }, { "type": "rectangle", "version": 5100, "versionNonce": 456274824, "isDeleted": false, "id": "Cydk-vrvGXEEAXU4XHWk7", "fillStyle": "solid", "strokeWidth": 2, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 1274.049113721433, "y": 74.97339563317036, "strokeColor": "#00e676", "backgroundColor": "#fff", "width": 70.67858069123133, "height": 107.25081879410921, "seed": 2124765204, "groupIds": [ "m2CJZBWcxZ3sXLBcib5Jt", "33LizGdDAL7lJNoiSiJZy" ], "strokeSharpness": "sharp", "boundElements": [ { "type": "arrow", "id": "CFu0B4Mw_1wC1Hbgx8Fs0" }, { "type": "arrow", "id": "XIl_NhaFtRO00pX5Pq6VU" }, { "type": "arrow", "id": "EndiSTFlx1AT7vcBVjgve" } ], "updated": 1643333366011 }, { "type": "rectangle", "version": 5197, "versionNonce": 249735416, "isDeleted": false, "id": "lIucERyh1MMe7Vzo8_q7d", "fillStyle": "solid", "strokeWidth": 2, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 1264.6540244357177, "y": 66.71948938317058, "strokeColor": "#00e676", "backgroundColor": "#fff", "width": 70.67858069123133, "height": 107.25081879410921, "seed": 722133804, "groupIds": [ "33LizGdDAL7lJNoiSiJZy" ], "strokeSharpness": "sharp", "boundElements": [ { "type": "arrow", "id": "CFu0B4Mw_1wC1Hbgx8Fs0" }, { "type": "arrow", "id": "XIl_NhaFtRO00pX5Pq6VU" }, { "type": "arrow", "id": "EndiSTFlx1AT7vcBVjgve" }, { "id": "IEnqZv0JAf73xE5G5LY1s", "type": "arrow" } ], "updated": 1643333366011 }, { "type": "line", "version": 4318, "versionNonce": 1152549512, "isDeleted": false, "id": "3UaWROnZ6NNaCTSa51ntD", "fillStyle": "solid", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 1276.6404510226287, "y": 115.46672643052324, "strokeColor": "#00e676", "backgroundColor": "#fff", "width": 46.57983585730082, "height": 3.249953844290203, "seed": 166278548, "groupIds": [ "33LizGdDAL7lJNoiSiJZy" ], "strokeSharpness": "round", "boundElements": [], "updated": 1643333366011, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": null, "points": [ [ 0, 0 ], [ 40.42449133807562, 0.1573930526684746 ], [ 46.57983585730082, -3.0925607916217284 ] ] }, { "type": "line", "version": 4344, "versionNonce": 2006108664, "isDeleted": false, "id": "H3c0943V86dq4MeskNY-E", "fillStyle": "solid", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 1278.495703260643, "y": 84.00143913168466, "strokeColor": "#00e676", "backgroundColor": "#fff", "width": 45.567415680676426, "height": 2.8032978840147194, "seed": 1285397932, "groupIds": [ "33LizGdDAL7lJNoiSiJZy" ], "strokeSharpness": "round", "boundElements": [], "updated": 1643333366011, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": null, "points": [ [ 0, 0 ], [ 16.832548902953302, -2.8032978840147194 ], [ 45.567415680676426, -0.3275477042019195 ] ] }, { "type": "line", "version": 4369, "versionNonce": 391180680, "isDeleted": false, "id": "g62fLo1BCU_TuN8OEk_ie", "fillStyle": "solid", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 1278.4191037442736, "y": 151.39581340204182, "strokeColor": "#00e676", "backgroundColor": "#fff", "width": 48.33668263438425, "height": 4.280657518731036, "seed": 198533908, "groupIds": [ "33LizGdDAL7lJNoiSiJZy" ], "strokeSharpness": "round", "boundElements": [], "updated": 1643333366011, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": null, "points": [ [ 0, 0 ], [ 26.41225578429045, -0.2552319773002338 ], [ 37.62000339651456, 2.3153712935189787 ], [ 48.33668263438425, -1.9652862252120569 ] ] }, { "type": "line", "version": 4406, "versionNonce": 2006371064, "isDeleted": false, "id": "jMbD-eUPMlCihQh7GK--c", "fillStyle": "solid", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 1275.2350565627432, "y": 162.6389618103206, "strokeColor": "#00e676", "backgroundColor": "#fff", "width": 54.40694982784246, "height": 2.9096445412231735, "seed": 1358235692, "groupIds": [ "33LizGdDAL7lJNoiSiJZy" ], "strokeSharpness": "round", "boundElements": [], "updated": 1643333366011, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": null, "points": [ [ 0, 0 ], [ 10.166093050596771, -1.166642430373031 ], [ 16.130660965377448, -0.8422655250909383 ], [ 46.26079588567538, 0.6125567455206506 ], [ 54.40694982784246, -2.297087795702523 ] ] }, { "type": "line", "version": 4371, "versionNonce": 172589192, "isDeleted": false, "id": "lmzq_v_7Zzb2T4WxQPEqG", "fillStyle": "solid", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 1275.519899458309, "y": 99.06204068494588, "strokeColor": "#00e676", "backgroundColor": "#fff", "width": 46.92865289294453, "height": 2.4757501798128, "seed": 719315092, "groupIds": [ "33LizGdDAL7lJNoiSiJZy" ], "strokeSharpness": "round", "boundElements": [], "updated": 1643333366011, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": null, "points": [ [ 0, 0 ], [ 18.193786115221407, -0.5912874140789839 ], [ 46.92865289294453, 1.884462765733816 ] ] }, { "type": "line", "version": 4386, "versionNonce": 963816440, "isDeleted": false, "id": "YoZdONxrJotpnXjxFe748", "fillStyle": "solid", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 1275.6273876544594, "y": 133.54536972078546, "strokeColor": "#00e676", "backgroundColor": "#fff", "width": 46.92865289294453, "height": 2.4757501798128, "seed": 1543185068, "groupIds": [ "33LizGdDAL7lJNoiSiJZy" ], "strokeSharpness": "round", "boundElements": [], "updated": 1643333366011, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": null, "points": [ [ 0, 0 ], [ 8.093938105125233, 1.4279702913643746 ], [ 18.193786115221407, -0.5912874140789839 ], [ 46.92865289294453, 1.884462765733816 ] ] }, { "type": "text", "version": 596, "versionNonce": 1297624968, "isDeleted": false, "id": "fAIYPmdTgnjEqc8PGu-SI", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 0, "opacity": 100, "angle": 0, "x": 1255.9645759420482, "y": 203.9818965480822, "strokeColor": "#00e676", "backgroundColor": "#03a9f4", "width": 134, "height": 26, "seed": 1891202964, "groupIds": [], "strokeSharpness": "round", "boundElements": [], "updated": 1643333366011, "fontSize": 20, "fontFamily": 1, "text": "Output Data", "baseline": 18, "textAlign": "left", "verticalAlign": "top", "containerId": null, "originalText": "Output Data" }, { "type": "ellipse", "version": 901, "versionNonce": 1487149704, "isDeleted": false, "id": "j-wM9an_6t3rpXVK34zli", "fillStyle": "solid", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 1022.0933939958717, "y": 100.46179899047428, "strokeColor": "#ff7043", "backgroundColor": "#ff7043", "width": 81.77778795030434, "height": 81.77778795030434, "seed": 1020498168, "groupIds": [ "bSaKYXsjdi0TEAthptqF8", "-IHBW5QNCpMiroffvx8Q_" ], "strokeSharpness": "sharp", "boundElements": [ { "id": "3DJ_wB8Ty8UNX4KAZfwjT", "type": "arrow" }, { "id": "IEnqZv0JAf73xE5G5LY1s", "type": "arrow" } ], "updated": 1643333383394 }, { "type": "ellipse", "version": 1081, "versionNonce": 1766820232, "isDeleted": false, "id": "9vh3f3W1oSyVisk-dvz1f", "fillStyle": "solid", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 1025.1015457438984, "y": 102.95003035938365, "strokeColor": "#ff7043", "backgroundColor": "#fff", "width": 76.42992266951718, "height": 76.42992266951718, "seed": 119976440, "groupIds": [ "bSaKYXsjdi0TEAthptqF8", "-IHBW5QNCpMiroffvx8Q_" ], "strokeSharpness": "sharp", "boundElements": [ { "id": "IEnqZv0JAf73xE5G5LY1s", "type": "arrow" }, { "id": "3DJ_wB8Ty8UNX4KAZfwjT", "type": "arrow" } ], "updated": 1643333383394 }, { "type": "line", "version": 892, "versionNonce": 1354045576, "isDeleted": false, "id": "iqDqmrD3b6Ygz-GFOrXwa", "fillStyle": "solid", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 1061.8311628994297, "y": 120.0482909120675, "strokeColor": "#ff7043", "backgroundColor": "#fff", "width": 0.0883491007133227, "height": 25.95185095385813, "seed": 1013822200, "groupIds": [ "-IHBW5QNCpMiroffvx8Q_" ], "strokeSharpness": "sharp", "boundElements": [], "updated": 1643333383394, "startBinding": null, "endBinding": null, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": null, "points": [ [ 0, 0 ], [ 0.0883491007133227, 25.95185095385813 ] ] }, { "type": "line", "version": 970, "versionNonce": 1922090888, "isDeleted": false, "id": "ZwJWJndwUCnxKlrLJvMPl", "fillStyle": "solid", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 1.9734847452380198, "x": 1070.7802432970152, "y": 142.30181631205494, "strokeColor": "#ff7043", "backgroundColor": "#fff", "width": 0.8395797827843811, "height": 15.147722701630906, "seed": 321688568, "groupIds": [ "-IHBW5QNCpMiroffvx8Q_" ], "strokeSharpness": "sharp", "boundElements": [], "updated": 1643333383394, "startBinding": null, "endBinding": null, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": null, "points": [ [ 0, 0 ], [ 0.8395797827843811, 15.147722701630906 ] ] }, { "type": "line", "version": 845, "versionNonce": 1528731272, "isDeleted": false, "id": "t0x1SEUOcZRg50TGtYP-w", "fillStyle": "solid", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 6.280886784478146, "x": 1064.6162574402501, "y": 173.36067968426403, "strokeColor": "#ff7043", "backgroundColor": "#fff", "width": 0.002850147906090324, "height": 5.074594937944382, "seed": 1365726456, "groupIds": [ "-IHBW5QNCpMiroffvx8Q_" ], "strokeSharpness": "sharp", "boundElements": [], "updated": 1643333383394, "startBinding": null, "endBinding": null, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": null, "points": [ [ 0, 0 ], [ -0.002850147906090324, 5.074594937944382 ] ] }, { "type": "line", "version": 1529, "versionNonce": 790267272, "isDeleted": false, "id": "Yw89MpxBojMQgM6zgnIuk", "fillStyle": "solid", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 3.1241889472809277, "x": 1062.1357257347163, "y": 102.4866803252352, "strokeColor": "#ff7043", "backgroundColor": "#fff", "width": 0.002850147906090324, "height": 5.074594937944382, "seed": 1554118136, "groupIds": [ "YELVY3p7Zx7E_9iDQmVra", "NVEUOovoAoQURmU2RDepy", "-IHBW5QNCpMiroffvx8Q_" ], "strokeSharpness": "sharp", "boundElements": [], "updated": 1643333383394, "startBinding": null, "endBinding": null, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": null, "points": [ [ 0, 0 ], [ -0.002850147906090324, 5.074594937944382 ] ] }, { "type": "line", "version": 1166, "versionNonce": 465605768, "isDeleted": false, "id": "cvYavS_2xzzp8a7_MTILV", "fillStyle": "solid", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 5.81125135713188, "x": 1082.8189605518937, "y": 166.68716660884547, "strokeColor": "#ff7043", "backgroundColor": "#fff", "width": 0.002850147906090324, "height": 5.074594937944382, "seed": 1239447288, "groupIds": [ "p-wnEbrxq-_9Ag8WjVI7N", "SHbRHlotuJGYNAc1emjPb", "gOQmYoFEh-8jFI92IdwYc", "-IHBW5QNCpMiroffvx8Q_" ], "strokeSharpness": "sharp", "boundElements": [], "updated": 1643333383394, "startBinding": null, "endBinding": null, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": null, "points": [ [ 0, 0 ], [ -0.002850147906090324, 5.074594937944382 ] ] }, { "type": "line", "version": 682, "versionNonce": 1187824520, "isDeleted": false, "id": "yixQP0fZUpIySDIZ4MLuw", "fillStyle": "solid", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 1068.9460036459864, "y": 178.0991106962765, "strokeColor": "#ff7043", "backgroundColor": "transparent", "width": 0.13385788646434224, "height": 1.824909818276623, "seed": 1839650808, "groupIds": [ "Mnb3dkXLcGQCoV-PGx3Al", "SHbRHlotuJGYNAc1emjPb", "gOQmYoFEh-8jFI92IdwYc", "-IHBW5QNCpMiroffvx8Q_" ], "strokeSharpness": "sharp", "boundElements": [], "updated": 1643333383394, "startBinding": null, "endBinding": null, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": null, "points": [ [ 0, 0 ], [ -0.13385788646434224, -1.824909818276623 ] ] }, { "type": "line", "version": 754, "versionNonce": 1048253064, "isDeleted": false, "id": "MAdztZi4TKHJd1ANMjVVe", "fillStyle": "solid", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 6.108887538323692, "x": 1073.4119741916074, "y": 177.75916345378153, "strokeColor": "#ff7043", "backgroundColor": "transparent", "width": 0.10961722295540602, "height": 1.9312918250918398, "seed": 1811486968, "groupIds": [ "Mnb3dkXLcGQCoV-PGx3Al", "SHbRHlotuJGYNAc1emjPb", "gOQmYoFEh-8jFI92IdwYc", "-IHBW5QNCpMiroffvx8Q_" ], "strokeSharpness": "sharp", "boundElements": [], "updated": 1643333383395, "startBinding": null, "endBinding": null, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": null, "points": [ [ 0, 0 ], [ -0.10961722295540602, -1.9312918250918398 ] ] }, { "type": "line", "version": 759, "versionNonce": 1770488200, "isDeleted": false, "id": "4KXJlSh8ncnoULQOmGcEa", "fillStyle": "solid", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 6.108887538323692, "x": 1076.38361498242, "y": 176.04686252433794, "strokeColor": "#ff7043", "backgroundColor": "transparent", "width": 0.15393862916480724, "height": 1.4493087289890376, "seed": 1213938168, "groupIds": [ "Mnb3dkXLcGQCoV-PGx3Al", "SHbRHlotuJGYNAc1emjPb", "gOQmYoFEh-8jFI92IdwYc", "-IHBW5QNCpMiroffvx8Q_" ], "strokeSharpness": "sharp", "boundElements": [], "updated": 1643333383395, "startBinding": null, "endBinding": null, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": null, "points": [ [ 0, 0 ], [ -0.15393862916480724, -1.4493087289890376 ] ] }, { "type": "line", "version": 809, "versionNonce": 1999507592, "isDeleted": false, "id": "wdrgkWEU57wvnk67HRB3Z", "fillStyle": "solid", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 6.031023708299764, "x": 1079.4047722054033, "y": 174.21268269656684, "strokeColor": "#ff7043", "backgroundColor": "transparent", "width": 0.15495851926454807, "height": 1.1604068145567383, "seed": 1976646392, "groupIds": [ "Mnb3dkXLcGQCoV-PGx3Al", "SHbRHlotuJGYNAc1emjPb", "gOQmYoFEh-8jFI92IdwYc", "-IHBW5QNCpMiroffvx8Q_" ], "strokeSharpness": "sharp", "boundElements": [], "updated": 1643333383395, "startBinding": null, "endBinding": null, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": null, "points": [ [ 0, 0 ], [ -0.15495851926454807, -1.1604068145567383 ] ] }, { "type": "line", "version": 1268, "versionNonce": 1328201608, "isDeleted": false, "id": "LFYbBqV93gdPJr7Cums4g", "fillStyle": "solid", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 5.181114071523705, "x": 1095.014529821029, "y": 152.77748807307205, "strokeColor": "#ff7043", "backgroundColor": "#fff", "width": 0.002850147906090324, "height": 5.074594937944382, "seed": 1725897720, "groupIds": [ "P0muzq2rC_ILu5MzTfeLX", "995t9r7bEj_eb_vHEY1Ks", "gOQmYoFEh-8jFI92IdwYc", "-IHBW5QNCpMiroffvx8Q_" ], "strokeSharpness": "sharp", "boundElements": [], "updated": 1643333383395, "startBinding": null, "endBinding": null, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": null, "points": [ [ 0, 0 ], [ -0.002850147906090324, 5.074594937944382 ] ] }, { "type": "line", "version": 775, "versionNonce": 28937864, "isDeleted": false, "id": "Rx0uNkINQcMII9UMJRJ4k", "fillStyle": "solid", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 5.806701628990837, "x": 1086.0116981168749, "y": 169.7448604067505, "strokeColor": "#ff7043", "backgroundColor": "transparent", "width": 0.13385788646434224, "height": 1.824909818276623, "seed": 495508728, "groupIds": [ "HS-2qloHB3pdPonai7TyU", "995t9r7bEj_eb_vHEY1Ks", "gOQmYoFEh-8jFI92IdwYc", "-IHBW5QNCpMiroffvx8Q_" ], "strokeSharpness": "sharp", "boundElements": [], "updated": 1643333383395, "startBinding": null, "endBinding": null, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": null, "points": [ [ 0, 0 ], [ -0.13385788646434224, -1.824909818276623 ] ] }, { "type": "line", "version": 838, "versionNonce": 932609416, "isDeleted": false, "id": "VLCl-pe-WQcf1BfPXBLt0", "fillStyle": "solid", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 5.632403860134943, "x": 1088.9868069295608, "y": 166.505670147792, "strokeColor": "#ff7043", "backgroundColor": "transparent", "width": 0.10961722295540602, "height": 1.9312918250918398, "seed": 995261944, "groupIds": [ "HS-2qloHB3pdPonai7TyU", "995t9r7bEj_eb_vHEY1Ks", "gOQmYoFEh-8jFI92IdwYc", "-IHBW5QNCpMiroffvx8Q_" ], "strokeSharpness": "sharp", "boundElements": [], "updated": 1643333383395, "startBinding": null, "endBinding": null, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": null, "points": [ [ 0, 0 ], [ -0.10961722295540602, -1.9312918250918398 ] ] }, { "type": "line", "version": 834, "versionNonce": 570264712, "isDeleted": false, "id": "mUOYjWx-N-OWdJoscwtE-", "fillStyle": "solid", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 5.632403860134943, "x": 1092.1465529032048, "y": 163.23329625583096, "strokeColor": "#ff7043", "backgroundColor": "transparent", "width": 0.15393862916480724, "height": 1.4493087289890376, "seed": 289406712, "groupIds": [ "HS-2qloHB3pdPonai7TyU", "995t9r7bEj_eb_vHEY1Ks", "gOQmYoFEh-8jFI92IdwYc", "-IHBW5QNCpMiroffvx8Q_" ], "strokeSharpness": "sharp", "boundElements": [], "updated": 1643333383395, "startBinding": null, "endBinding": null, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": null, "points": [ [ 0, 0 ], [ -0.15393862916480724, -1.4493087289890376 ] ] }, { "type": "line", "version": 886, "versionNonce": 1789574024, "isDeleted": false, "id": "qVO9k0FHbZuyI5LKP9GmF", "fillStyle": "solid", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 5.5545400301110135, "x": 1094.2769300914624, "y": 160.64155267647118, "strokeColor": "#ff7043", "backgroundColor": "transparent", "width": 0.15495851926454807, "height": 1.1604068145567383, "seed": 802512888, "groupIds": [ "HS-2qloHB3pdPonai7TyU", "995t9r7bEj_eb_vHEY1Ks", "gOQmYoFEh-8jFI92IdwYc", "-IHBW5QNCpMiroffvx8Q_" ], "strokeSharpness": "sharp", "boundElements": [], "updated": 1643333383395, "startBinding": null, "endBinding": null, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": null, "points": [ [ 0, 0 ], [ -0.15495851926454807, -1.1604068145567383 ] ] }, { "type": "line", "version": 1308, "versionNonce": 601492104, "isDeleted": false, "id": "NadcFz4xk5R9NqW8X-MU3", "fillStyle": "solid", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 4.703634473789069, "x": 1098.3775913645477, "y": 133.59478791519757, "strokeColor": "#ff7043", "backgroundColor": "#fff", "width": 0.002850147906090324, "height": 5.074594937944382, "seed": 1997971704, "groupIds": [ "zA8Ync2bFFM2m53cjUMBg", "B0S01CmiwscoHuDIbl3Gl", "gOQmYoFEh-8jFI92IdwYc", "-IHBW5QNCpMiroffvx8Q_" ], "strokeSharpness": "sharp", "boundElements": [], "updated": 1643333383395, "startBinding": null, "endBinding": null, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": null, "points": [ [ 0, 0 ], [ -0.002850147906090324, 5.074594937944382 ] ] }, { "type": "line", "version": 809, "versionNonce": 944208264, "isDeleted": false, "id": "diAnwk9JCDVD9lfXnCqgW", "fillStyle": "solid", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 5.236849790887245, "x": 1097.7186508669784, "y": 153.30649098297056, "strokeColor": "#ff7043", "backgroundColor": "transparent", "width": 0.13385788646434224, "height": 1.824909818276623, "seed": 646431224, "groupIds": [ "DWLrPYo50Rd-VJ5VCi5f7", "B0S01CmiwscoHuDIbl3Gl", "gOQmYoFEh-8jFI92IdwYc", "-IHBW5QNCpMiroffvx8Q_" ], "strokeSharpness": "sharp", "boundElements": [], "updated": 1643333383395, "startBinding": null, "endBinding": null, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": null, "points": [ [ 0, 0 ], [ -0.13385788646434224, -1.824909818276623 ] ] }, { "type": "line", "version": 883, "versionNonce": 1529809032, "isDeleted": false, "id": "mtlzm_62GJjdDi5RAw4o1", "fillStyle": "solid", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 5.062552022031351, "x": 1098.749454343585, "y": 149.1780894880784, "strokeColor": "#ff7043", "backgroundColor": "transparent", "width": 0.10961722295540602, "height": 1.9312918250918398, "seed": 1831650040, "groupIds": [ "DWLrPYo50Rd-VJ5VCi5f7", "B0S01CmiwscoHuDIbl3Gl", "gOQmYoFEh-8jFI92IdwYc", "-IHBW5QNCpMiroffvx8Q_" ], "strokeSharpness": "sharp", "boundElements": [], "updated": 1643333383396, "startBinding": null, "endBinding": null, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": null, "points": [ [ 0, 0 ], [ -0.10961722295540602, -1.9312918250918398 ] ] }, { "type": "line", "version": 894, "versionNonce": 2018293640, "isDeleted": false, "id": "w7cJevDtNweeN4giVd7kF", "fillStyle": "solid", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 5.062552022031351, "x": 1099.6846444314046, "y": 145.2886034713148, "strokeColor": "#ff7043", "backgroundColor": "transparent", "width": 0.15393862916480724, "height": 1.4493087289890376, "seed": 2039063544, "groupIds": [ "DWLrPYo50Rd-VJ5VCi5f7", "B0S01CmiwscoHuDIbl3Gl", "gOQmYoFEh-8jFI92IdwYc", "-IHBW5QNCpMiroffvx8Q_" ], "strokeSharpness": "sharp", "boundElements": [], "updated": 1643333383396, "startBinding": null, "endBinding": null, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": null, "points": [ [ 0, 0 ], [ -0.15393862916480724, -1.4493087289890376 ] ] }, { "type": "line", "version": 942, "versionNonce": 601654920, "isDeleted": false, "id": "f56a_kgrxmRNa3Z53Q60z", "fillStyle": "solid", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 4.984688192007422, "x": 1099.6193083343842, "y": 141.54632901313482, "strokeColor": "#ff7043", "backgroundColor": "transparent", "width": 0.15495851926454807, "height": 1.1604068145567383, "seed": 161637624, "groupIds": [ "DWLrPYo50Rd-VJ5VCi5f7", "B0S01CmiwscoHuDIbl3Gl", "gOQmYoFEh-8jFI92IdwYc", "-IHBW5QNCpMiroffvx8Q_" ], "strokeSharpness": "sharp", "boundElements": [], "updated": 1643333383396, "startBinding": null, "endBinding": null, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": null, "points": [ [ 0, 0 ], [ -0.15495851926454807, -1.1604068145567383 ] ] }, { "type": "line", "version": 1389, "versionNonce": 177894792, "isDeleted": false, "id": "vR3QmWptomkwPkB3sxLie", "fillStyle": "solid", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 4.175269496869788, "x": 1091.7346739152727, "y": 117.50269095596911, "strokeColor": "#ff7043", "backgroundColor": "#fff", "width": 0.002850147906090324, "height": 5.074594937944382, "seed": 2063271416, "groupIds": [ "J0ilB_WV5wNeHV0loD0PU", "picpNQI8seSh60PT9pkMm", "gOQmYoFEh-8jFI92IdwYc", "-IHBW5QNCpMiroffvx8Q_" ], "strokeSharpness": "sharp", "boundElements": [], "updated": 1643333383396, "startBinding": null, "endBinding": null, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": null, "points": [ [ 0, 0 ], [ -0.002850147906090324, 5.074594937944382 ] ] }, { "type": "line", "version": 894, "versionNonce": 1640137864, "isDeleted": false, "id": "OotDa77gvcGT8eSVzCVHe", "fillStyle": "solid", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 4.652271214951025, "x": 1099.0076812735788, "y": 133.53986124540666, "strokeColor": "#ff7043", "backgroundColor": "transparent", "width": 0.13385788646434224, "height": 1.824909818276623, "seed": 1519879928, "groupIds": [ "hyo7dIZ-atzeosWf4KGV-", "picpNQI8seSh60PT9pkMm", "gOQmYoFEh-8jFI92IdwYc", "-IHBW5QNCpMiroffvx8Q_" ], "strokeSharpness": "sharp", "boundElements": [], "updated": 1643333383396, "startBinding": null, "endBinding": null, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": null, "points": [ [ 0, 0 ], [ -0.13385788646434224, -1.824909818276623 ] ] }, { "type": "line", "version": 970, "versionNonce": 1460712328, "isDeleted": false, "id": "KYXdIOqSNI3wL2_91JoSI", "fillStyle": "solid", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 4.477973446095131, "x": 1097.9247650219345, "y": 129.89327965571465, "strokeColor": "#ff7043", "backgroundColor": "transparent", "width": 0.10961722295540602, "height": 1.9312918250918398, "seed": 1962665976, "groupIds": [ "hyo7dIZ-atzeosWf4KGV-", "picpNQI8seSh60PT9pkMm", "gOQmYoFEh-8jFI92IdwYc", "-IHBW5QNCpMiroffvx8Q_" ], "strokeSharpness": "sharp", "boundElements": [], "updated": 1643333383396, "startBinding": null, "endBinding": null, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": null, "points": [ [ 0, 0 ], [ -0.10961722295540602, -1.9312918250918398 ] ] }, { "type": "line", "version": 984, "versionNonce": 12649096, "isDeleted": false, "id": "1amz5KsYAaVgqHJnfpUZX", "fillStyle": "solid", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 4.477973446095131, "x": 1096.4518739202772, "y": 125.37009602466333, "strokeColor": "#ff7043", "backgroundColor": "transparent", "width": 0.15393862916480724, "height": 1.4493087289890376, "seed": 1714170104, "groupIds": [ "hyo7dIZ-atzeosWf4KGV-", "picpNQI8seSh60PT9pkMm", "gOQmYoFEh-8jFI92IdwYc", "-IHBW5QNCpMiroffvx8Q_" ], "strokeSharpness": "sharp", "boundElements": [], "updated": 1643333383396, "startBinding": null, "endBinding": null, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": null, "points": [ [ 0, 0 ], [ -0.15393862916480724, -1.4493087289890376 ] ] }, { "type": "line", "version": 1030, "versionNonce": 1301246344, "isDeleted": false, "id": "M8DZXa5ygyRUlbzzJWUwL", "fillStyle": "solid", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 4.400109616071202, "x": 1094.1448873155746, "y": 122.38879744583448, "strokeColor": "#ff7043", "backgroundColor": "transparent", "width": 0.15495851926454807, "height": 1.1604068145567383, "seed": 699156984, "groupIds": [ "hyo7dIZ-atzeosWf4KGV-", "picpNQI8seSh60PT9pkMm", "gOQmYoFEh-8jFI92IdwYc", "-IHBW5QNCpMiroffvx8Q_" ], "strokeSharpness": "sharp", "boundElements": [], "updated": 1643333383396, "startBinding": null, "endBinding": null, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": null, "points": [ [ 0, 0 ], [ -0.15495851926454807, -1.1604068145567383 ] ] }, { "type": "line", "version": 1445, "versionNonce": 1958215816, "isDeleted": false, "id": "JH6nmvMEUuLCuyxh7Y-Yl", "fillStyle": "solid", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 3.866572733087537, "x": 1078.0076399536329, "y": 106.44508741487394, "strokeColor": "#ff7043", "backgroundColor": "#fff", "width": 0.002850147906090324, "height": 5.074594937944382, "seed": 713050872, "groupIds": [ "Xj8brNstIhDD7KOQvspnH", "B8ckCsKKOGs9N9f2IAKBw", "gOQmYoFEh-8jFI92IdwYc", "-IHBW5QNCpMiroffvx8Q_" ], "strokeSharpness": "sharp", "boundElements": [], "updated": 1643333383396, "startBinding": null, "endBinding": null, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": null, "points": [ [ 0, 0 ], [ -0.002850147906090324, 5.074594937944382 ] ] }, { "type": "line", "version": 965, "versionNonce": 1710665608, "isDeleted": false, "id": "GDNvbfgT_WZFh6Gi2xvwP", "fillStyle": "solid", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 4.210232827779823, "x": 1090.4068310713433, "y": 117.91497341317267, "strokeColor": "#ff7043", "backgroundColor": "transparent", "width": 0.13385788646434224, "height": 1.824909818276623, "seed": 1395403768, "groupIds": [ "hogKkVF1m8maBYwOG6eeL", "B8ckCsKKOGs9N9f2IAKBw", "gOQmYoFEh-8jFI92IdwYc", "-IHBW5QNCpMiroffvx8Q_" ], "strokeSharpness": "sharp", "boundElements": [], "updated": 1643333383396, "startBinding": null, "endBinding": null, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": null, "points": [ [ 0, 0 ], [ -0.13385788646434224, -1.824909818276623 ] ] }, { "type": "line", "version": 1030, "versionNonce": 965181064, "isDeleted": false, "id": "PvojRAHs1S-Tx4T69Sj14", "fillStyle": "solid", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 4.035935058923929, "x": 1088.4824681935847, "y": 115.14139125873646, "strokeColor": "#ff7043", "backgroundColor": "transparent", "width": 0.10961722295540602, "height": 1.9312918250918398, "seed": 498330872, "groupIds": [ "hogKkVF1m8maBYwOG6eeL", "B8ckCsKKOGs9N9f2IAKBw", "gOQmYoFEh-8jFI92IdwYc", "-IHBW5QNCpMiroffvx8Q_" ], "strokeSharpness": "sharp", "boundElements": [], "updated": 1643333383396, "startBinding": null, "endBinding": null, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": null, "points": [ [ 0, 0 ], [ -0.10961722295540602, -1.9312918250918398 ] ] }, { "type": "line", "version": 1042, "versionNonce": 539389320, "isDeleted": false, "id": "kLpgd0Yu3CP-rHZvMfTmB", "fillStyle": "solid", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 4.035935058923929, "x": 1085.3441396608778, "y": 112.34041881492135, "strokeColor": "#ff7043", "backgroundColor": "transparent", "width": 0.15393862916480724, "height": 1.4493087289890376, "seed": 597396984, "groupIds": [ "hogKkVF1m8maBYwOG6eeL", "B8ckCsKKOGs9N9f2IAKBw", "gOQmYoFEh-8jFI92IdwYc", "-IHBW5QNCpMiroffvx8Q_" ], "strokeSharpness": "sharp", "boundElements": [], "updated": 1643333383396, "startBinding": null, "endBinding": null, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": null, "points": [ [ 0, 0 ], [ -0.15393862916480724, -1.4493087289890376 ] ] }, { "type": "line", "version": 1093, "versionNonce": 1393523848, "isDeleted": false, "id": "XW0QvYMk_SlAi17ooDA2B", "fillStyle": "solid", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 3.9580712288999997, "x": 1082.318504219098, "y": 110.56683796369228, "strokeColor": "#ff7043", "backgroundColor": "transparent", "width": 0.15495851926454807, "height": 1.1604068145567383, "seed": 770117368, "groupIds": [ "hogKkVF1m8maBYwOG6eeL", "B8ckCsKKOGs9N9f2IAKBw", "gOQmYoFEh-8jFI92IdwYc", "-IHBW5QNCpMiroffvx8Q_" ], "strokeSharpness": "sharp", "boundElements": [], "updated": 1643333383396, "startBinding": null, "endBinding": null, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": null, "points": [ [ 0, 0 ], [ -0.15495851926454807, -1.1604068145567383 ] ] }, { "type": "line", "version": 1023, "versionNonce": 67021704, "isDeleted": false, "id": "F30qFOuZ0dhNLow90Cpan", "fillStyle": "solid", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 3.7190910390046312, "x": 1076.2565286147315, "y": 107.99017766168507, "strokeColor": "#ff7043", "backgroundColor": "transparent", "width": 0.13385788646434224, "height": 1.824909818276623, "seed": 1159360504, "groupIds": [ "oCizKDxwJR0_xadyu1U-s", "XOmrDYeKZbtBad03NUVlw", "gOQmYoFEh-8jFI92IdwYc", "-IHBW5QNCpMiroffvx8Q_" ], "strokeSharpness": "sharp", "boundElements": [], "updated": 1643333383396, "startBinding": null, "endBinding": null, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": null, "points": [ [ 0, 0 ], [ -0.13385788646434224, -1.824909818276623 ] ] }, { "type": "line", "version": 1095, "versionNonce": 1294989960, "isDeleted": false, "id": "LHYAA9-Xbe2aHOuAq8GMZ", "fillStyle": "solid", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 3.5447932701487375, "x": 1073.786092965824, "y": 106.12279110057776, "strokeColor": "#ff7043", "backgroundColor": "transparent", "width": 0.10961722295540602, "height": 1.9312918250918398, "seed": 1094299896, "groupIds": [ "oCizKDxwJR0_xadyu1U-s", "XOmrDYeKZbtBad03NUVlw", "gOQmYoFEh-8jFI92IdwYc", "-IHBW5QNCpMiroffvx8Q_" ], "strokeSharpness": "sharp", "boundElements": [], "updated": 1643333383396, "startBinding": null, "endBinding": null, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": null, "points": [ [ 0, 0 ], [ -0.10961722295540602, -1.9312918250918398 ] ] }, { "type": "line", "version": 1106, "versionNonce": 559901064, "isDeleted": false, "id": "bYj77Rzmjzf8NSjoCwzxt", "fillStyle": "solid", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 3.5447932701487375, "x": 1069.089741413119, "y": 105.47928063454734, "strokeColor": "#ff7043", "backgroundColor": "transparent", "width": 0.15393862916480724, "height": 1.4493087289890376, "seed": 1506333176, "groupIds": [ "oCizKDxwJR0_xadyu1U-s", "XOmrDYeKZbtBad03NUVlw", "gOQmYoFEh-8jFI92IdwYc", "-IHBW5QNCpMiroffvx8Q_" ], "strokeSharpness": "sharp", "boundElements": [], "updated": 1643333383396, "startBinding": null, "endBinding": null, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": null, "points": [ [ 0, 0 ], [ -0.15393862916480724, -1.4493087289890376 ] ] }, { "type": "line", "version": 1147, "versionNonce": 1003026568, "isDeleted": false, "id": "EHmP4t_Ln-k9nf0WDuFxp", "fillStyle": "solid", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 3.466929440124808, "x": 1065.1678246220067, "y": 104.30035945599846, "strokeColor": "#ff7043", "backgroundColor": "transparent", "width": 0.15495851926454807, "height": 1.1604068145567383, "seed": 902550264, "groupIds": [ "oCizKDxwJR0_xadyu1U-s", "XOmrDYeKZbtBad03NUVlw", "gOQmYoFEh-8jFI92IdwYc", "-IHBW5QNCpMiroffvx8Q_" ], "strokeSharpness": "sharp", "boundElements": [], "updated": 1643333383396, "startBinding": null, "endBinding": null, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": null, "points": [ [ 0, 0 ], [ -0.15495851926454807, -1.1604068145567383 ] ] }, { "type": "line", "version": 1476, "versionNonce": 271099784, "isDeleted": false, "id": "mG1ykrgdbCjwTTLG076UZ", "fillStyle": "solid", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 8.970693917422981, "x": 1043.6956781873973, "y": 109.05152880383471, "strokeColor": "#ff7043", "backgroundColor": "#fff", "width": 0.002850147906090324, "height": 5.074594937944382, "seed": 2076619768, "groupIds": [ "aWz-oPOUVw7blecn5ozPM", "htHHyoSrCWjL5m6ei2Yqp", "cSYzw6LXU8ej7pfT_uMdR", "-IHBW5QNCpMiroffvx8Q_" ], "strokeSharpness": "sharp", "boundElements": [], "updated": 1643333383396, "startBinding": null, "endBinding": null, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": null, "points": [ [ 0, 0 ], [ -0.002850147906090324, 5.074594937944382 ] ] }, { "type": "line", "version": 980, "versionNonce": 985298568, "isDeleted": false, "id": "Gg1-a5DZ1iqKankEc7NZZ", "fillStyle": "solid", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 3.159442560291101, "x": 1058.2415775748786, "y": 105.58456047024029, "strokeColor": "#ff7043", "backgroundColor": "transparent", "width": 0.13385788646434224, "height": 1.824909818276623, "seed": 1639806200, "groupIds": [ "aWJ4c9SQuu3MMphEuQ-ho", "htHHyoSrCWjL5m6ei2Yqp", "cSYzw6LXU8ej7pfT_uMdR", "-IHBW5QNCpMiroffvx8Q_" ], "strokeSharpness": "sharp", "boundElements": [], "updated": 1643333383396, "startBinding": null, "endBinding": null, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": null, "points": [ [ 0, 0 ], [ -0.13385788646434224, -1.824909818276623 ] ] }, { "type": "line", "version": 1047, "versionNonce": 1882875272, "isDeleted": false, "id": "WwKJCewx6wFvIxRehBPm7", "fillStyle": "solid", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 9.268330098614793, "x": 1054.6420144085232, "y": 105.72185538275599, "strokeColor": "#ff7043", "backgroundColor": "transparent", "width": 0.10961722295540602, "height": 1.9312918250918398, "seed": 1026823672, "groupIds": [ "aWJ4c9SQuu3MMphEuQ-ho", "htHHyoSrCWjL5m6ei2Yqp", "cSYzw6LXU8ej7pfT_uMdR", "-IHBW5QNCpMiroffvx8Q_" ], "strokeSharpness": "sharp", "boundElements": [], "updated": 1643333383397, "startBinding": null, "endBinding": null, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": null, "points": [ [ 0, 0 ], [ -0.10961722295540602, -1.9312918250918398 ] ] }, { "type": "line", "version": 1066, "versionNonce": 1843688584, "isDeleted": false, "id": "LHWQbDIvQxDN7glIRnmaX", "fillStyle": "solid", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 9.268330098614793, "x": 1049.9323826134132, "y": 107.30966487248091, "strokeColor": "#ff7043", "backgroundColor": "transparent", "width": 0.15393862916480724, "height": 1.4493087289890376, "seed": 321820408, "groupIds": [ "aWJ4c9SQuu3MMphEuQ-ho", "htHHyoSrCWjL5m6ei2Yqp", "cSYzw6LXU8ej7pfT_uMdR", "-IHBW5QNCpMiroffvx8Q_" ], "strokeSharpness": "sharp", "boundElements": [], "updated": 1643333383397, "startBinding": null, "endBinding": null, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": null, "points": [ [ 0, 0 ], [ -0.15393862916480724, -1.4493087289890376 ] ] }, { "type": "line", "version": 1109, "versionNonce": 1769251720, "isDeleted": false, "id": "WrjcrvJkD_rOZMXa3vD3o", "fillStyle": "solid", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 9.190466268590864, "x": 1046.5890933046576, "y": 108.06753249633519, "strokeColor": "#ff7043", "backgroundColor": "transparent", "width": 0.15495851926454807, "height": 1.1604068145567383, "seed": 674297848, "groupIds": [ "aWJ4c9SQuu3MMphEuQ-ho", "htHHyoSrCWjL5m6ei2Yqp", "cSYzw6LXU8ej7pfT_uMdR", "-IHBW5QNCpMiroffvx8Q_" ], "strokeSharpness": "sharp", "boundElements": [], "updated": 1643333383397, "startBinding": null, "endBinding": null, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": null, "points": [ [ 0, 0 ], [ -0.15495851926454807, -1.1604068145567383 ] ] }, { "type": "line", "version": 1562, "versionNonce": 19329672, "isDeleted": false, "id": "0xUXIFFhdKATefNgdnCTU", "fillStyle": "solid", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 8.340556631814806, "x": 1030.9425520228833, "y": 123.07231991978585, "strokeColor": "#ff7043", "backgroundColor": "#fff", "width": 0.002850147906090324, "height": 5.074594937944382, "seed": 272578808, "groupIds": [ "qYk4rrM4sZlnENZKO3UnW", "sntyvqr_NecJhARNGx2SL", "cSYzw6LXU8ej7pfT_uMdR", "-IHBW5QNCpMiroffvx8Q_" ], "strokeSharpness": "sharp", "boundElements": [], "updated": 1643333383397, "startBinding": null, "endBinding": null, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": null, "points": [ [ 0, 0 ], [ -0.002850147906090324, 5.074594937944382 ] ] }, { "type": "line", "version": 1055, "versionNonce": 733438344, "isDeleted": false, "id": "ZfGjqQgXwQywcm8hhtTbN", "fillStyle": "solid", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 8.966144189281938, "x": 1040.3818202212965, "y": 113.2999529930529, "strokeColor": "#ff7043", "backgroundColor": "transparent", "width": 0.13385788646434224, "height": 1.824909818276623, "seed": 1054693880, "groupIds": [ "TjeeBqtMlR-uheSJHDiPh", "sntyvqr_NecJhARNGx2SL", "cSYzw6LXU8ej7pfT_uMdR", "-IHBW5QNCpMiroffvx8Q_" ], "strokeSharpness": "sharp", "boundElements": [], "updated": 1643333383397, "startBinding": null, "endBinding": null, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": null, "points": [ [ 0, 0 ], [ -0.13385788646434224, -1.824909818276623 ] ] }, { "type": "line", "version": 1127, "versionNonce": 677507208, "isDeleted": false, "id": "aZlViJDSlHn7tGAruxEJU", "fillStyle": "solid", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 8.791846420426044, "x": 1037.2111805862442, "y": 115.53583148972575, "strokeColor": "#ff7043", "backgroundColor": "transparent", "width": 0.10961722295540602, "height": 1.9312918250918398, "seed": 386882296, "groupIds": [ "TjeeBqtMlR-uheSJHDiPh", "sntyvqr_NecJhARNGx2SL", "cSYzw6LXU8ej7pfT_uMdR", "-IHBW5QNCpMiroffvx8Q_" ], "strokeSharpness": "sharp", "boundElements": [], "updated": 1643333383397, "startBinding": null, "endBinding": null, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": null, "points": [ [ 0, 0 ], [ -0.10961722295540602, -1.9312918250918398 ] ] }, { "type": "line", "version": 1136, "versionNonce": 1860345736, "isDeleted": false, "id": "5d6J2AYoIsJCqodlKVvFF", "fillStyle": "solid", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 8.791846420426044, "x": 1034.5956147314077, "y": 118.39218538985745, "strokeColor": "#ff7043", "backgroundColor": "transparent", "width": 0.15393862916480724, "height": 1.4493087289890376, "seed": 2069445624, "groupIds": [ "TjeeBqtMlR-uheSJHDiPh", "sntyvqr_NecJhARNGx2SL", "cSYzw6LXU8ej7pfT_uMdR", "-IHBW5QNCpMiroffvx8Q_" ], "strokeSharpness": "sharp", "boundElements": [], "updated": 1643333383397, "startBinding": null, "endBinding": null, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": null, "points": [ [ 0, 0 ], [ -0.15393862916480724, -1.4493087289890376 ] ] }, { "type": "line", "version": 1187, "versionNonce": 527789704, "isDeleted": false, "id": "Id2K7BQkmsbEgwiaNHvnl", "fillStyle": "solid", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 8.713982590402114, "x": 1031.5298366450852, "y": 121.68478825218517, "strokeColor": "#ff7043", "backgroundColor": "transparent", "width": 0.15495851926454807, "height": 1.1604068145567383, "seed": 1572797688, "groupIds": [ "TjeeBqtMlR-uheSJHDiPh", "sntyvqr_NecJhARNGx2SL", "cSYzw6LXU8ej7pfT_uMdR", "-IHBW5QNCpMiroffvx8Q_" ], "strokeSharpness": "sharp", "boundElements": [], "updated": 1643333383397, "startBinding": null, "endBinding": null, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": null, "points": [ [ 0, 0 ], [ -0.15495851926454807, -1.1604068145567383 ] ] }, { "type": "line", "version": 1640, "versionNonce": 1926651272, "isDeleted": false, "id": "WQpwsmcJYAvTLHlb5qqMO", "fillStyle": "solid", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 7.850435269712758, "x": 1027.322055413258, "y": 141.29685506859505, "strokeColor": "#ff7043", "backgroundColor": "#fff", "width": 0.002850147906090324, "height": 5.074594937944382, "seed": 1338285560, "groupIds": [ "mdak74oZmgKI-RKor4lH3", "Lcydq1Ofxyd1uRFHNQht1", "cSYzw6LXU8ej7pfT_uMdR", "-IHBW5QNCpMiroffvx8Q_" ], "strokeSharpness": "sharp", "boundElements": [], "updated": 1643333383397, "startBinding": null, "endBinding": null, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": null, "points": [ [ 0, 0 ], [ -0.002850147906090324, 5.074594937944382 ] ] }, { "type": "line", "version": 1132, "versionNonce": 708930696, "isDeleted": false, "id": "kY4qf7bBSoSDFsDAonEva", "fillStyle": "solid", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 8.383650586810933, "x": 1027.767632811453, "y": 129.6196997470763, "strokeColor": "#ff7043", "backgroundColor": "transparent", "width": 0.13385788646434224, "height": 1.824909818276623, "seed": 309206776, "groupIds": [ "rXzQvZA_jRDvfnnUX32Os", "Lcydq1Ofxyd1uRFHNQht1", "cSYzw6LXU8ej7pfT_uMdR", "-IHBW5QNCpMiroffvx8Q_" ], "strokeSharpness": "sharp", "boundElements": [], "updated": 1643333383397, "startBinding": null, "endBinding": null, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": null, "points": [ [ 0, 0 ], [ -0.13385788646434224, -1.824909818276623 ] ] }, { "type": "line", "version": 1206, "versionNonce": 2138217352, "isDeleted": false, "id": "iYo2CtOtKYQEDM57HD40c", "fillStyle": "solid", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 8.20935281795504, "x": 1025.9097311088917, "y": 132.79754112674334, "strokeColor": "#ff7043", "backgroundColor": "transparent", "width": 0.10961722295540602, "height": 1.9312918250918398, "seed": 641695736, "groupIds": [ "rXzQvZA_jRDvfnnUX32Os", "Lcydq1Ofxyd1uRFHNQht1", "cSYzw6LXU8ej7pfT_uMdR", "-IHBW5QNCpMiroffvx8Q_" ], "strokeSharpness": "sharp", "boundElements": [], "updated": 1643333383397, "startBinding": null, "endBinding": null, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": null, "points": [ [ 0, 0 ], [ -0.10961722295540602, -1.9312918250918398 ] ] }, { "type": "line", "version": 1221, "versionNonce": 346417800, "isDeleted": false, "id": "BDJwLwzCNzpBXbwJLDZ2G", "fillStyle": "solid", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 8.20935281795504, "x": 1025.7465834795503, "y": 136.198675323957, "strokeColor": "#ff7043", "backgroundColor": "transparent", "width": 0.15393862916480724, "height": 1.4493087289890376, "seed": 1530708216, "groupIds": [ "rXzQvZA_jRDvfnnUX32Os", "Lcydq1Ofxyd1uRFHNQht1", "cSYzw6LXU8ej7pfT_uMdR", "-IHBW5QNCpMiroffvx8Q_" ], "strokeSharpness": "sharp", "boundElements": [], "updated": 1643333383397, "startBinding": null, "endBinding": null, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": null, "points": [ [ 0, 0 ], [ -0.15393862916480724, -1.4493087289890376 ] ] }, { "type": "line", "version": 1268, "versionNonce": 1687640456, "isDeleted": false, "id": "B1tuUNRwrNKyWw6chx8J7", "fillStyle": "solid", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 8.13148898793111, "x": 1025.7189251669538, "y": 140.34816595989207, "strokeColor": "#ff7043", "backgroundColor": "transparent", "width": 0.15495851926454807, "height": 1.1604068145567383, "seed": 1625940472, "groupIds": [ "rXzQvZA_jRDvfnnUX32Os", "Lcydq1Ofxyd1uRFHNQht1", "cSYzw6LXU8ej7pfT_uMdR", "-IHBW5QNCpMiroffvx8Q_" ], "strokeSharpness": "sharp", "boundElements": [], "updated": 1643333383397, "startBinding": null, "endBinding": null, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": null, "points": [ [ 0, 0 ], [ -0.15495851926454807, -1.1604068145567383 ] ] }, { "type": "line", "version": 1689, "versionNonce": 847440008, "isDeleted": false, "id": "1X5bM69TBInx5Cc39a4MP", "fillStyle": "solid", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 7.334712057160889, "x": 1034.5138068116148, "y": 158.13530394464817, "strokeColor": "#ff7043", "backgroundColor": "#fff", "width": 0.002850147906090324, "height": 5.074594937944382, "seed": 1447748344, "groupIds": [ "zN689KS9Kxbl5l-WJ8L2b", "9XDR2v2yWGCJb07_IfL46", "cSYzw6LXU8ej7pfT_uMdR", "-IHBW5QNCpMiroffvx8Q_" ], "strokeSharpness": "sharp", "boundElements": [], "updated": 1643333383397, "startBinding": null, "endBinding": null, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": null, "points": [ [ 0, 0 ], [ -0.002850147906090324, 5.074594937944382 ] ] }, { "type": "line", "version": 1190, "versionNonce": 2049218440, "isDeleted": false, "id": "smte7vi0Hb6SsmIRNQvAU", "fillStyle": "solid", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 7.811713775242126, "x": 1027.0233729407373, "y": 149.3365526837065, "strokeColor": "#ff7043", "backgroundColor": "transparent", "width": 0.13385788646434224, "height": 1.824909818276623, "seed": 1508099064, "groupIds": [ "Jioetyts4aSni0Wc5m4to", "9XDR2v2yWGCJb07_IfL46", "cSYzw6LXU8ej7pfT_uMdR", "-IHBW5QNCpMiroffvx8Q_" ], "strokeSharpness": "sharp", "boundElements": [], "updated": 1643333383397, "startBinding": null, "endBinding": null, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": null, "points": [ [ 0, 0 ], [ -0.13385788646434224, -1.824909818276623 ] ] }, { "type": "line", "version": 1269, "versionNonce": 1605920392, "isDeleted": false, "id": "7A1rcZxIgibmrNb7OsBBA", "fillStyle": "solid", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 7.637416006386232, "x": 1027.450721482312, "y": 153.30036268261665, "strokeColor": "#ff7043", "backgroundColor": "transparent", "width": 0.10961722295540602, "height": 1.9312918250918398, "seed": 431523064, "groupIds": [ "Jioetyts4aSni0Wc5m4to", "9XDR2v2yWGCJb07_IfL46", "cSYzw6LXU8ej7pfT_uMdR", "-IHBW5QNCpMiroffvx8Q_" ], "strokeSharpness": "sharp", "boundElements": [], "updated": 1643333383397, "startBinding": null, "endBinding": null, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": null, "points": [ [ 0, 0 ], [ -0.10961722295540602, -1.9312918250918398 ] ] }, { "type": "line", "version": 1281, "versionNonce": 536499592, "isDeleted": false, "id": "U7GuWg-FQFdG1o39ef9rx", "fillStyle": "solid", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 7.637416006386232, "x": 1029.1754665346089, "y": 155.6877986065798, "strokeColor": "#ff7043", "backgroundColor": "transparent", "width": 0.15393862916480724, "height": 1.4493087289890376, "seed": 1666000376, "groupIds": [ "Jioetyts4aSni0Wc5m4to", "9XDR2v2yWGCJb07_IfL46", "cSYzw6LXU8ej7pfT_uMdR", "-IHBW5QNCpMiroffvx8Q_" ], "strokeSharpness": "sharp", "boundElements": [], "updated": 1643333383397, "startBinding": null, "endBinding": null, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": null, "points": [ [ 0, 0 ], [ -0.15393862916480724, -1.4493087289890376 ] ] }, { "type": "line", "version": 1327, "versionNonce": 2516104, "isDeleted": false, "id": "h8jL7BgZwCiKbJiccyKI1", "fillStyle": "solid", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 7.559552176362303, "x": 1031.2590192142006, "y": 158.803031253158, "strokeColor": "#ff7043", "backgroundColor": "transparent", "width": 0.15495851926454807, "height": 1.1604068145567383, "seed": 866494200, "groupIds": [ "Jioetyts4aSni0Wc5m4to", "9XDR2v2yWGCJb07_IfL46", "cSYzw6LXU8ej7pfT_uMdR", "-IHBW5QNCpMiroffvx8Q_" ], "strokeSharpness": "sharp", "boundElements": [], "updated": 1643333383398, "startBinding": null, "endBinding": null, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": null, "points": [ [ 0, 0 ], [ -0.15495851926454807, -1.1604068145567383 ] ] }, { "type": "line", "version": 1734, "versionNonce": 1039700872, "isDeleted": false, "id": "HfpI3EBqLOZIUw8CopY49", "fillStyle": "solid", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 7.026015293378638, "x": 1047.1706930973965, "y": 169.5461324783206, "strokeColor": "#ff7043", "backgroundColor": "#fff", "width": 0.002850147906090324, "height": 5.074594937944382, "seed": 1195771896, "groupIds": [ "gCe_KP01zh55vsj8TEB6c", "7iwovNHTIOdyQSVLGokMy", "cSYzw6LXU8ej7pfT_uMdR", "-IHBW5QNCpMiroffvx8Q_" ], "strokeSharpness": "sharp", "boundElements": [], "updated": 1643333383398, "startBinding": null, "endBinding": null, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": null, "points": [ [ 0, 0 ], [ -0.002850147906090324, 5.074594937944382 ] ] }, { "type": "line", "version": 1257, "versionNonce": 2142036616, "isDeleted": false, "id": "xsKRboS9ZYcxtioyXFXXx", "fillStyle": "solid", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 7.369675388070924, "x": 1035.0746865041747, "y": 164.3384099264308, "strokeColor": "#ff7043", "backgroundColor": "transparent", "width": 0.13385788646434224, "height": 1.824909818276623, "seed": 707938552, "groupIds": [ "z9Y9AYl6CQNhQWcmpyr7H", "7iwovNHTIOdyQSVLGokMy", "cSYzw6LXU8ej7pfT_uMdR", "-IHBW5QNCpMiroffvx8Q_" ], "strokeSharpness": "sharp", "boundElements": [], "updated": 1643333383398, "startBinding": null, "endBinding": null, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": null, "points": [ [ 0, 0 ], [ -0.13385788646434224, -1.824909818276623 ] ] }, { "type": "line", "version": 1337, "versionNonce": 1140806024, "isDeleted": false, "id": "5s0mIhhlw3ALQwNvHssE3", "fillStyle": "solid", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 7.19537761921503, "x": 1037.6241316546934, "y": 167.32634419511794, "strokeColor": "#ff7043", "backgroundColor": "transparent", "width": 0.10961722295540602, "height": 1.9312918250918398, "seed": 1206807032, "groupIds": [ "z9Y9AYl6CQNhQWcmpyr7H", "7iwovNHTIOdyQSVLGokMy", "cSYzw6LXU8ej7pfT_uMdR", "-IHBW5QNCpMiroffvx8Q_" ], "strokeSharpness": "sharp", "boundElements": [], "updated": 1643333383398, "startBinding": null, "endBinding": null, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": null, "points": [ [ 0, 0 ], [ -0.10961722295540602, -1.9312918250918398 ] ] }, { "type": "line", "version": 1360, "versionNonce": 1378607240, "isDeleted": false, "id": "iQY4LZHj3mjwoiMUfCtHA", "fillStyle": "solid", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 7.19537761921503, "x": 1040.6724429007581, "y": 170.40154204034366, "strokeColor": "#ff7043", "backgroundColor": "transparent", "width": 0.15393862916480724, "height": 1.4493087289890376, "seed": 2071882488, "groupIds": [ "z9Y9AYl6CQNhQWcmpyr7H", "7iwovNHTIOdyQSVLGokMy", "cSYzw6LXU8ej7pfT_uMdR", "-IHBW5QNCpMiroffvx8Q_" ], "strokeSharpness": "sharp", "boundElements": [], "updated": 1643333383398, "startBinding": null, "endBinding": null, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": null, "points": [ [ 0, 0 ], [ -0.15393862916480724, -1.4493087289890376 ] ] }, { "type": "line", "version": 1394, "versionNonce": 703500168, "isDeleted": false, "id": "vsDVSWZjnA9E_gvwthaHk", "fillStyle": "solid", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 7.117513789191101, "x": 1043.7775848433466, "y": 171.6359211404589, "strokeColor": "#ff7043", "backgroundColor": "transparent", "width": 0.15495851926454807, "height": 1.1604068145567383, "seed": 526797816, "groupIds": [ "z9Y9AYl6CQNhQWcmpyr7H", "7iwovNHTIOdyQSVLGokMy", "cSYzw6LXU8ej7pfT_uMdR", "-IHBW5QNCpMiroffvx8Q_" ], "strokeSharpness": "sharp", "boundElements": [], "updated": 1643333383398, "startBinding": null, "endBinding": null, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": null, "points": [ [ 0, 0 ], [ -0.15495851926454807, -1.1604068145567383 ] ] }, { "type": "line", "version": 1319, "versionNonce": 1937989256, "isDeleted": false, "id": "olJn68aW_Z7IJUSsLvQkM", "fillStyle": "solid", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 6.878533599295732, "x": 1048.7652459830783, "y": 175.01029570474202, "strokeColor": "#ff7043", "backgroundColor": "transparent", "width": 0.13385788646434224, "height": 1.824909818276623, "seed": 497683704, "groupIds": [ "yPIdiHVxLiy_ePyCR-30f", "S_idRPhbaHho_kpbqXhbB", "cSYzw6LXU8ej7pfT_uMdR", "-IHBW5QNCpMiroffvx8Q_" ], "strokeSharpness": "sharp", "boundElements": [], "updated": 1643333383398, "startBinding": null, "endBinding": null, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": null, "points": [ [ 0, 0 ], [ -0.13385788646434224, -1.824909818276623 ] ] }, { "type": "line", "version": 1387, "versionNonce": 1873169800, "isDeleted": false, "id": "Edcd597v0NtE3ZGvSieOB", "fillStyle": "solid", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 6.704235830439838, "x": 1052.236174741296, "y": 176.6542187430698, "strokeColor": "#ff7043", "backgroundColor": "transparent", "width": 0.10961722295540602, "height": 1.9312918250918398, "seed": 2144185848, "groupIds": [ "yPIdiHVxLiy_ePyCR-30f", "S_idRPhbaHho_kpbqXhbB", "cSYzw6LXU8ej7pfT_uMdR", "-IHBW5QNCpMiroffvx8Q_" ], "strokeSharpness": "sharp", "boundElements": [], "updated": 1643333383398, "startBinding": null, "endBinding": null, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": null, "points": [ [ 0, 0 ], [ -0.10961722295540602, -1.9312918250918398 ] ] }, { "type": "line", "version": 1401, "versionNonce": 1737710728, "isDeleted": false, "id": "U24QSlSEfvlCb4V6bZh8e", "fillStyle": "solid", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 6.704235830439838, "x": 1056.1418818003908, "y": 177.28050978386295, "strokeColor": "#ff7043", "backgroundColor": "transparent", "width": 0.15393862916480724, "height": 1.4493087289890376, "seed": 2127900408, "groupIds": [ "yPIdiHVxLiy_ePyCR-30f", "S_idRPhbaHho_kpbqXhbB", "cSYzw6LXU8ej7pfT_uMdR", "-IHBW5QNCpMiroffvx8Q_" ], "strokeSharpness": "sharp", "boundElements": [], "updated": 1643333383398, "startBinding": null, "endBinding": null, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": null, "points": [ [ 0, 0 ], [ -0.15393862916480724, -1.4493087289890376 ] ] }, { "type": "line", "version": 1445, "versionNonce": 486968200, "isDeleted": false, "id": "dLghPO0y4N7KB8GxBCKSw", "fillStyle": "solid", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 6.626372000415909, "x": 1059.7637637793173, "y": 177.55113577418354, "strokeColor": "#ff7043", "backgroundColor": "transparent", "width": 0.15495851926454807, "height": 1.1604068145567383, "seed": 323260408, "groupIds": [ "yPIdiHVxLiy_ePyCR-30f", "S_idRPhbaHho_kpbqXhbB", "cSYzw6LXU8ej7pfT_uMdR", "-IHBW5QNCpMiroffvx8Q_" ], "strokeSharpness": "sharp", "boundElements": [], "updated": 1643333383398, "startBinding": null, "endBinding": null, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": null, "points": [ [ 0, 0 ], [ -0.15495851926454807, -1.1604068145567383 ] ] } ], "appState": { "gridSize": null, "viewBackgroundColor": "#ffffff" }, "files": {} } ================================================ FILE: docs/images/search.excalidraw ================================================ { "type": "excalidraw", "version": 2, "source": "https://excalidraw.com", "elements": [ { "type": "text", "version": 354, "versionNonce": 1260634512, "isDeleted": false, "id": "Buic2Lx427wuSIW8P_Rw5", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 829, "y": 184, "strokeColor": "#000000", "backgroundColor": "#228be6", "width": 452, "height": 46, "seed": 373648901, "groupIds": [], "strokeSharpness": "sharp", "boundElements": [], "updated": 1658676789638, "link": null, "locked": false, "fontSize": 36, "fontFamily": 1, "text": "What is semantic search?", "baseline": 32, "textAlign": "left", "verticalAlign": "top", "containerId": null, "originalText": "What is semantic search?" }, { "type": "rectangle", "version": 786, "versionNonce": 1756557200, "isDeleted": false, "id": "U2NgEIEiFpAlwmv5Xnyzr", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 50, "angle": 0, "x": 555.5, "y": 425.30499999999995, "strokeColor": "#7950f2", "backgroundColor": "#7950f2", "width": 995.3286713286714, "height": 339.0000000000001, "seed": 1946478225, "groupIds": [ "C_65R9XVeMQED2nMfIW2D" ], "strokeSharpness": "sharp", "boundElements": [], "updated": 1658676804492, "link": "", "locked": false }, { "type": "text", "version": 569, "versionNonce": 1675727216, "isDeleted": false, "id": "fbSO8bnZmAdvIXZxBAtHY", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 570.7237762237763, "y": 439.50080419580416, "strokeColor": "#000", "backgroundColor": "#03a9f4", "width": 967, "height": 312, "seed": 1586673137, "groupIds": [ "C_65R9XVeMQED2nMfIW2D" ], "strokeSharpness": "sharp", "boundElements": [], "updated": 1658676804492, "link": null, "locked": false, "fontSize": 20.27972027972028, "fontFamily": 1, "text": "Query Best Match\n----------------------------------------------------\nfeel good story Maine man wins $1M from $25 lottery ticket\nclimate change Canada's last fully intact ice shelf has suddenly collapsed, forming a \n Manhattan-sized iceberg\npublic health story US tops 5 million confirmed virus cases\nwar Beijing mobilises invasion craft along coast as Taiwan tensions escalate\nwildlife The National Park Service warns against sacrificing slower friends in a\n bear attack\nasia Beijing mobilises invasion craft along coast as Taiwan tensions escalate\nlucky Maine man wins $1M from $25 lottery ticket\ndishonest junk Make huge profits without work, earn up to $100,000 a day", "baseline": 304, "textAlign": "left", "verticalAlign": "top", "containerId": null, "originalText": "Query Best Match\n----------------------------------------------------\nfeel good story Maine man wins $1M from $25 lottery ticket\nclimate change Canada's last fully intact ice shelf has suddenly collapsed, forming a \n Manhattan-sized iceberg\npublic health story US tops 5 million confirmed virus cases\nwar Beijing mobilises invasion craft along coast as Taiwan tensions escalate\nwildlife The National Park Service warns against sacrificing slower friends in a\n bear attack\nasia Beijing mobilises invasion craft along coast as Taiwan tensions escalate\nlucky Maine man wins $1M from $25 lottery ticket\ndishonest junk Make huge profits without work, earn up to $100,000 a day" }, { "type": "rectangle", "version": 952, "versionNonce": 581259632, "isDeleted": false, "id": "UO6MS3wSDu7yg2421__LI", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 933, "y": 267, "strokeColor": "#ffeb3b", "backgroundColor": "#ffeb3b", "width": 214, "height": 49, "seed": 1629565989, "groupIds": [ "3sURMvhuRfR0M-Q3VRPbg" ], "strokeSharpness": "sharp", "boundElements": [ { "type": "text", "id": "8sp7H8ijWBlh6aMgZ0XTP" }, { "id": "Qzp41i_jzQIBlAB_qFKFH", "type": "arrow" }, { "id": "SJ0F0Y81z9hir5qQWAJjk", "type": "arrow" } ], "updated": 1658676831306, "link": null, "locked": false }, { "type": "rectangle", "version": 1611, "versionNonce": 1109835152, "isDeleted": false, "id": "qYd3q0Vjks7VOHUC9RR51", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 549, "y": 267.5, "strokeColor": "#03a9f4", "backgroundColor": "#03a9f4", "width": 219, "height": 52, "seed": 1441952427, "groupIds": [ "3sURMvhuRfR0M-Q3VRPbg" ], "strokeSharpness": "sharp", "boundElements": [ { "type": "text", "id": "WPeWn6N4rCHf0jY16N9Ge" }, { "id": "Qzp41i_jzQIBlAB_qFKFH", "type": "arrow" } ], "updated": 1658676831306, "link": null, "locked": false }, { "type": "text", "version": 1297, "versionNonce": 1725539184, "isDeleted": false, "id": "WPeWn6N4rCHf0jY16N9Ge", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 554, "y": 275.5, "strokeColor": "#000", "backgroundColor": "#fa5252", "width": 209, "height": 36, "seed": 870516459, "groupIds": [ "3sURMvhuRfR0M-Q3VRPbg" ], "strokeSharpness": "sharp", "boundElements": [], "updated": 1658676831306, "link": null, "locked": false, "fontSize": 28, "fontFamily": 1, "text": "Vectorize", "baseline": 25, "textAlign": "center", "verticalAlign": "middle", "containerId": "qYd3q0Vjks7VOHUC9RR51", "originalText": "Vectorize" }, { "type": "rectangle", "version": 1198, "versionNonce": 1201179536, "isDeleted": false, "id": "5VuUdI_BsJ5pyE1nTqJUI", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 1332, "y": 268, "strokeColor": "#00e676", "backgroundColor": "#00e676", "width": 218, "height": 49, "seed": 1044404613, "groupIds": [ "3sURMvhuRfR0M-Q3VRPbg" ], "strokeSharpness": "sharp", "boundElements": [ { "id": "bJJ9SGsJsvT071qBBH0w5", "type": "text" }, { "id": "bJJ9SGsJsvT071qBBH0w5", "type": "text" }, { "id": "Q51e0Hav-kYfjX3b8tt-r", "type": "arrow" }, { "type": "text", "id": "bJJ9SGsJsvT071qBBH0w5" }, { "id": "SJ0F0Y81z9hir5qQWAJjk", "type": "arrow" } ], "updated": 1658676831306, "link": null, "locked": false }, { "type": "text", "version": 1381, "versionNonce": 78886256, "isDeleted": false, "id": "bJJ9SGsJsvT071qBBH0w5", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 1337, "y": 274.5, "strokeColor": "#000", "backgroundColor": "#fa5252", "width": 208, "height": 36, "seed": 128953675, "groupIds": [ "3sURMvhuRfR0M-Q3VRPbg" ], "strokeSharpness": "sharp", "boundElements": [], "updated": 1658676831306, "link": null, "locked": false, "fontSize": 28, "fontFamily": 1, "text": "Search", "baseline": 25, "textAlign": "center", "verticalAlign": "middle", "containerId": "5VuUdI_BsJ5pyE1nTqJUI", "originalText": "Search" }, { "type": "text", "version": 942, "versionNonce": 429955472, "isDeleted": false, "id": "8sp7H8ijWBlh6aMgZ0XTP", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 938, "y": 273.5, "strokeColor": "#000", "backgroundColor": "transparent", "width": 204, "height": 36, "seed": 1854823263, "groupIds": [ "3sURMvhuRfR0M-Q3VRPbg" ], "strokeSharpness": "sharp", "boundElements": [], "updated": 1658676831306, "link": null, "locked": false, "fontSize": 28, "fontFamily": 1, "text": "Index", "baseline": 25, "textAlign": "center", "verticalAlign": "middle", "containerId": "UO6MS3wSDu7yg2421__LI", "originalText": "Index" }, { "type": "text", "version": 382, "versionNonce": 1098614640, "isDeleted": false, "id": "jWJpSXHkTCzRTCA4tbAgv", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 548.5, "y": 347.30499999999995, "strokeColor": "#000", "backgroundColor": "#03a9f4", "width": 296, "height": 42, "seed": 1241563487, "groupIds": [ "3sURMvhuRfR0M-Q3VRPbg" ], "strokeSharpness": "sharp", "boundElements": [], "updated": 1658676831307, "link": null, "locked": false, "fontSize": 16, "fontFamily": 1, "text": "- Transform input into numbers\n- Similar concepts have similar values", "baseline": 36, "textAlign": "left", "verticalAlign": "top", "containerId": null, "originalText": "- Transform input into numbers\n- Similar concepts have similar values" }, { "type": "text", "version": 381, "versionNonce": 1266083728, "isDeleted": false, "id": "qEnmXs0P_MQE8r4c4OWGh", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 931.5, "y": 346.2074999999999, "strokeColor": "#000", "backgroundColor": "#f44336", "width": 230, "height": 42, "seed": 1038536465, "groupIds": [ "3sURMvhuRfR0M-Q3VRPbg" ], "strokeSharpness": "sharp", "boundElements": [], "updated": 1658676831307, "link": null, "locked": false, "fontSize": 16, "fontFamily": 1, "text": "- Save vectors\n- Store content with vectors", "baseline": 36, "textAlign": "left", "verticalAlign": "top", "containerId": null, "originalText": "- Save vectors\n- Store content with vectors" }, { "type": "text", "version": 491, "versionNonce": 2037979504, "isDeleted": false, "id": "1q8bzjK8lnKUZj8_A9v7D", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 1243, "y": 349.2074999999999, "strokeColor": "#000", "backgroundColor": "#f44336", "width": 333, "height": 42, "seed": 304472945, "groupIds": [ "3sURMvhuRfR0M-Q3VRPbg" ], "strokeSharpness": "sharp", "boundElements": [], "updated": 1658676831307, "link": null, "locked": false, "fontSize": 16, "fontFamily": 1, "text": "- Find similar vectors with cosine similarity\n- Add rule-based filters using content", "baseline": 36, "textAlign": "left", "verticalAlign": "top", "containerId": null, "originalText": "- Find similar vectors with cosine similarity\n- Add rule-based filters using content" }, { "type": "arrow", "version": 1813, "versionNonce": 1230206352, "isDeleted": false, "id": "Qzp41i_jzQIBlAB_qFKFH", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 770.5, "y": 289.8470411964629, "strokeColor": "#000", "backgroundColor": "#f44336", "width": 158.1310513485223, "height": 0.5692601572380909, "seed": 660786897, "groupIds": [ "3sURMvhuRfR0M-Q3VRPbg" ], "strokeSharpness": "round", "boundElements": [], "updated": 1658676831307, "link": null, "locked": false, "startBinding": { "elementId": "qYd3q0Vjks7VOHUC9RR51", "focus": -0.15367587596362536, "gap": 2.5 }, "endBinding": { "elementId": "UO6MS3wSDu7yg2421__LI", "focus": 0.027437144815141, "gap": 4.3689486514776945 }, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": "arrow", "points": [ [ 0, 0 ], [ 158.1310513485223, 0.5692601572380909 ] ] }, { "type": "arrow", "version": 1962, "versionNonce": 1072982896, "isDeleted": false, "id": "SJ0F0Y81z9hir5qQWAJjk", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 1149.5, "y": 292.6790761701911, "strokeColor": "#000", "backgroundColor": "#f44336", "width": 181.5, "height": 1.5898915058209013, "seed": 899541905, "groupIds": [ "3sURMvhuRfR0M-Q3VRPbg" ], "strokeSharpness": "round", "boundElements": [], "updated": 1658676831307, "link": null, "locked": false, "startBinding": { "elementId": "UO6MS3wSDu7yg2421__LI", "focus": 0.08406032225724415, "gap": 2.5 }, "endBinding": { "elementId": "5VuUdI_BsJ5pyE1nTqJUI", "focus": 0.09327847520504394, "gap": 1 }, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": "arrow", "points": [ [ 0, 0 ], [ 181.5, -1.5898915058209013 ] ] } ], "appState": { "gridSize": null, "viewBackgroundColor": "#ffffff" }, "files": {} } ================================================ FILE: docs/images/task.excalidraw ================================================ { "type": "excalidraw", "version": 2, "source": "https://excalidraw.com", "elements": [ { "type": "rectangle", "version": 4906, "versionNonce": 976633260, "isDeleted": false, "id": "9itz4bqXi6IMeZhGhc42J", "fillStyle": "solid", "strokeWidth": 2, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 793.1319708150991, "y": 84.01158837080237, "strokeColor": "#03a9f4", "backgroundColor": "#fff", "width": 70.67858069123133, "height": 107.25081879410921, "seed": 2026142776, "groupIds": [ "zRT55AcPrMo-vI2M6Q7JX" ], "strokeSharpness": "sharp", "boundElements": [ { "type": "arrow", "id": "CFu0B4Mw_1wC1Hbgx8Fs0" }, { "type": "arrow", "id": "XIl_NhaFtRO00pX5Pq6VU" }, { "type": "arrow", "id": "EndiSTFlx1AT7vcBVjgve" }, { "id": "sbyEF_hTthhrIaA-47du2", "type": "arrow" }, { "id": "3DJ_wB8Ty8UNX4KAZfwjT", "type": "arrow" } ], "updated": 1642714818536 }, { "type": "rectangle", "version": 4954, "versionNonce": 1503052564, "isDeleted": false, "id": "7HUYNqBGn8JbkXw6qt_jr", "fillStyle": "solid", "strokeWidth": 2, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 785.5845377793848, "y": 76.99149908508815, "strokeColor": "#03a9f4", "backgroundColor": "#fff", "width": 70.67858069123133, "height": 107.25081879410921, "seed": 1090159432, "groupIds": [ "6R5o6OnVZ7fFC_87zCo7G", "zRT55AcPrMo-vI2M6Q7JX" ], "strokeSharpness": "sharp", "boundElements": [ { "type": "arrow", "id": "CFu0B4Mw_1wC1Hbgx8Fs0" }, { "type": "arrow", "id": "XIl_NhaFtRO00pX5Pq6VU" }, { "type": "arrow", "id": "EndiSTFlx1AT7vcBVjgve" } ], "updated": 1642714818536 }, { "type": "rectangle", "version": 5050, "versionNonce": 1115269164, "isDeleted": false, "id": "mI_Mfw3cNZf0AuQhK5KUO", "fillStyle": "solid", "strokeWidth": 2, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 776.1894484936695, "y": 68.73759283508838, "strokeColor": "#03a9f4", "backgroundColor": "#fff", "width": 70.67858069123133, "height": 107.25081879410921, "seed": 1146631480, "groupIds": [ "zRT55AcPrMo-vI2M6Q7JX" ], "strokeSharpness": "sharp", "boundElements": [ { "type": "arrow", "id": "CFu0B4Mw_1wC1Hbgx8Fs0" }, { "type": "arrow", "id": "XIl_NhaFtRO00pX5Pq6VU" }, { "type": "arrow", "id": "EndiSTFlx1AT7vcBVjgve" } ], "updated": 1642714818536 }, { "type": "line", "version": 4173, "versionNonce": 2037727380, "isDeleted": false, "id": "y3oKPiP36axXzEF0c7OiC", "fillStyle": "solid", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 788.1758750805805, "y": 117.48482988244103, "strokeColor": "#03a9f4", "backgroundColor": "#fff", "width": 46.57983585730082, "height": 3.249953844290203, "seed": 1952111176, "groupIds": [ "zRT55AcPrMo-vI2M6Q7JX" ], "strokeSharpness": "round", "boundElements": [], "updated": 1642714818536, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": null, "points": [ [ 0, 0 ], [ 40.42449133807562, 0.1573930526684746 ], [ 46.57983585730082, -3.0925607916217284 ] ] }, { "type": "line", "version": 4199, "versionNonce": 1944830636, "isDeleted": false, "id": "-52Udo1sfovtw-gxgUT91", "fillStyle": "solid", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 790.0311273185948, "y": 86.01954258360246, "strokeColor": "#03a9f4", "backgroundColor": "#fff", "width": 45.567415680676426, "height": 2.8032978840147194, "seed": 923586104, "groupIds": [ "zRT55AcPrMo-vI2M6Q7JX" ], "strokeSharpness": "round", "boundElements": [], "updated": 1642714818536, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": null, "points": [ [ 0, 0 ], [ 16.832548902953302, -2.8032978840147194 ], [ 45.567415680676426, -0.3275477042019195 ] ] }, { "type": "line", "version": 4224, "versionNonce": 1671958036, "isDeleted": false, "id": "7Tgba-Lgh4JYVwHG-MlYn", "fillStyle": "solid", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 789.9545278022254, "y": 153.41391685395956, "strokeColor": "#03a9f4", "backgroundColor": "#fff", "width": 48.33668263438425, "height": 4.280657518731036, "seed": 664293704, "groupIds": [ "zRT55AcPrMo-vI2M6Q7JX" ], "strokeSharpness": "round", "boundElements": [], "updated": 1642714818536, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": null, "points": [ [ 0, 0 ], [ 26.41225578429045, -0.2552319773002338 ], [ 37.62000339651456, 2.3153712935189787 ], [ 48.33668263438425, -1.9652862252120569 ] ] }, { "type": "line", "version": 4261, "versionNonce": 355896620, "isDeleted": false, "id": "KsHbDZNNeZoS7zbhCYNon", "fillStyle": "solid", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 786.770480620695, "y": 164.65706526223835, "strokeColor": "#03a9f4", "backgroundColor": "#fff", "width": 54.40694982784246, "height": 2.9096445412231735, "seed": 2043037496, "groupIds": [ "zRT55AcPrMo-vI2M6Q7JX" ], "strokeSharpness": "round", "boundElements": [], "updated": 1642714818536, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": null, "points": [ [ 0, 0 ], [ 10.166093050596771, -1.166642430373031 ], [ 16.130660965377448, -0.8422655250909383 ], [ 46.26079588567538, 0.6125567455206506 ], [ 54.40694982784246, -2.297087795702523 ] ] }, { "type": "line", "version": 4226, "versionNonce": 1764043668, "isDeleted": false, "id": "R5nE2rgdgOpe93Q8VvYhl", "fillStyle": "solid", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 787.0553235162608, "y": 101.08014413686368, "strokeColor": "#03a9f4", "backgroundColor": "#fff", "width": 46.92865289294453, "height": 2.4757501798128, "seed": 571402312, "groupIds": [ "zRT55AcPrMo-vI2M6Q7JX" ], "strokeSharpness": "round", "boundElements": [], "updated": 1642714818536, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": null, "points": [ [ 0, 0 ], [ 18.193786115221407, -0.5912874140789839 ], [ 46.92865289294453, 1.884462765733816 ] ] }, { "type": "line", "version": 4241, "versionNonce": 130287532, "isDeleted": false, "id": "lfWLpL8lTqrf2GMoqJY8y", "fillStyle": "solid", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 787.1628117124112, "y": 135.5634731727032, "strokeColor": "#03a9f4", "backgroundColor": "#fff", "width": 46.92865289294453, "height": 2.4757501798128, "seed": 2028511288, "groupIds": [ "zRT55AcPrMo-vI2M6Q7JX" ], "strokeSharpness": "round", "boundElements": [], "updated": 1642714818536, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": null, "points": [ [ 0, 0 ], [ 8.093938105125233, 1.4279702913643746 ], [ 18.193786115221407, -0.5912874140789839 ], [ 46.92865289294453, 1.884462765733816 ] ] }, { "type": "text", "version": 378, "versionNonce": 1294951852, "isDeleted": false, "id": "uG1gxQCUX_W4xoK8WjzqS", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 0, "opacity": 100, "angle": 0, "x": 762, "y": 202, "strokeColor": "#03a9f4", "backgroundColor": "#03a9f4", "width": 117, "height": 26, "seed": 657172488, "groupIds": [], "strokeSharpness": "round", "boundElements": [], "updated": 1642714818537, "fontSize": 20, "fontFamily": 1, "text": "Input Data", "baseline": 18, "textAlign": "left", "verticalAlign": "top", "containerId": null, "originalText": "Input Data" }, { "type": "arrow", "version": 657, "versionNonce": 1814305556, "isDeleted": false, "id": "3DJ_wB8Ty8UNX4KAZfwjT", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 878.680938765565, "y": 132.68136619602433, "strokeColor": "#03a9f4", "backgroundColor": "#ff7043", "width": 104.88363252862348, "height": 0.6395755770326446, "seed": 1663624312, "groupIds": [], "strokeSharpness": "round", "boundElements": [], "updated": 1642714818537, "startBinding": { "elementId": "9itz4bqXi6IMeZhGhc42J", "focus": -0.09772881248040827, "gap": 14.870387259234633 }, "endBinding": { "elementId": "FEMcxzUOmwHX8lGW-KR6K", "focus": 0.36409382861238426, "gap": 13.827470007461386 }, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": "arrow", "points": [ [ 0, 0 ], [ 104.88363252862348, 0.6395755770326446 ] ] }, { "type": "arrow", "version": 778, "versionNonce": 1812844588, "isDeleted": false, "id": "IEnqZv0JAf73xE5G5LY1s", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 1132.8223412617328, "y": 135.16943809537315, "strokeColor": "#00e676", "backgroundColor": "#ff7043", "width": 116.4656944766266, "height": 1.9037944501569655, "seed": 2055241224, "groupIds": [], "strokeSharpness": "round", "boundElements": [], "updated": 1642714818537, "startBinding": { "elementId": "SitnQfRbjMmvvsNhTxQRZ", "focus": 0.4543845673944436, "gap": 15.775328240798956 }, "endBinding": { "elementId": "lIucERyh1MMe7Vzo8_q7d", "focus": -0.22308504197782658, "gap": 15.365988697358375 }, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": "arrow", "points": [ [ 0, 0 ], [ 116.4656944766266, -1.9037944501569655 ] ] }, { "type": "text", "version": 354, "versionNonce": 9934996, "isDeleted": false, "id": "g9t3YfCk9S7S38pod_Itm", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "dashed", "roughness": 1, "opacity": 100, "angle": 0, "x": 1019.5, "y": 70, "strokeColor": "#ff7043", "backgroundColor": "#7950f2", "width": 84, "height": 36, "seed": 1032706680, "groupIds": [], "strokeSharpness": "round", "boundElements": [], "updated": 1642714818537, "fontSize": 28, "fontFamily": 1, "text": "Action", "baseline": 25, "textAlign": "left", "verticalAlign": "top", "containerId": null, "originalText": "Action" }, { "type": "diamond", "version": 775, "versionNonce": 79652372, "isDeleted": false, "id": "jfnfQYlAh24YsuMMDHNx7", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 1006.140893315531, "y": 143.61217895662338, "strokeColor": "#ff7043", "backgroundColor": "#ff7043", "width": 112.64736525303451, "height": 36.77344700318558, "seed": 2110406188, "groupIds": [ "HaVHECspFkRC2Qpa0lGum" ], "strokeSharpness": "round", "boundElements": [], "updated": 1642714818537 }, { "type": "diamond", "version": 816, "versionNonce": 903918892, "isDeleted": false, "id": "6xQj2yQQKSeIivJj9W1ju", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 1006.140893315531, "y": 134.74039902836483, "strokeColor": "#ff7043", "backgroundColor": "#ff7043", "width": 112.64736525303451, "height": 36.77344700318558, "seed": 1235290772, "groupIds": [ "HaVHECspFkRC2Qpa0lGum" ], "strokeSharpness": "round", "boundElements": [], "updated": 1642714818537 }, { "type": "diamond", "version": 898, "versionNonce": 496075668, "isDeleted": false, "id": "FEMcxzUOmwHX8lGW-KR6K", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 1006.140893315531, "y": 122.10983943365278, "strokeColor": "#ff7043", "backgroundColor": "#ff7043", "width": 112.64736525303451, "height": 36.77344700318558, "seed": 1689884844, "groupIds": [ "HaVHECspFkRC2Qpa0lGum" ], "strokeSharpness": "round", "boundElements": [ { "id": "3DJ_wB8Ty8UNX4KAZfwjT", "type": "arrow" }, { "id": "IEnqZv0JAf73xE5G5LY1s", "type": "arrow" } ], "updated": 1642714818537 }, { "type": "diamond", "version": 940, "versionNonce": 1625894828, "isDeleted": false, "id": "SitnQfRbjMmvvsNhTxQRZ", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 1006.140893315531, "y": 109.57816713635543, "strokeColor": "#ff7043", "backgroundColor": "#ff7043", "width": 112.64736525303451, "height": 36.77344700318558, "seed": 264935444, "groupIds": [ "HaVHECspFkRC2Qpa0lGum" ], "strokeSharpness": "round", "boundElements": [ { "id": "IEnqZv0JAf73xE5G5LY1s", "type": "arrow" } ], "updated": 1642714818537 }, { "type": "rectangle", "version": 4955, "versionNonce": 1631842580, "isDeleted": false, "id": "qfqvZwfY8g2maDsv8tzi-", "fillStyle": "solid", "strokeWidth": 2, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 1281.5965467571473, "y": 81.99348491888458, "strokeColor": "#00e676", "backgroundColor": "#fff", "width": 70.67858069123133, "height": 107.25081879410921, "seed": 320002220, "groupIds": [ "33LizGdDAL7lJNoiSiJZy" ], "strokeSharpness": "sharp", "boundElements": [ { "type": "arrow", "id": "CFu0B4Mw_1wC1Hbgx8Fs0" }, { "type": "arrow", "id": "XIl_NhaFtRO00pX5Pq6VU" }, { "type": "arrow", "id": "EndiSTFlx1AT7vcBVjgve" }, { "id": "sbyEF_hTthhrIaA-47du2", "type": "arrow" }, { "id": "3DJ_wB8Ty8UNX4KAZfwjT", "type": "arrow" } ], "updated": 1642714818537 }, { "type": "rectangle", "version": 5003, "versionNonce": 972970540, "isDeleted": false, "id": "Cydk-vrvGXEEAXU4XHWk7", "fillStyle": "solid", "strokeWidth": 2, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 1274.049113721433, "y": 74.97339563317036, "strokeColor": "#00e676", "backgroundColor": "#fff", "width": 70.67858069123133, "height": 107.25081879410921, "seed": 2124765204, "groupIds": [ "m2CJZBWcxZ3sXLBcib5Jt", "33LizGdDAL7lJNoiSiJZy" ], "strokeSharpness": "sharp", "boundElements": [ { "type": "arrow", "id": "CFu0B4Mw_1wC1Hbgx8Fs0" }, { "type": "arrow", "id": "XIl_NhaFtRO00pX5Pq6VU" }, { "type": "arrow", "id": "EndiSTFlx1AT7vcBVjgve" } ], "updated": 1642714818537 }, { "type": "rectangle", "version": 5100, "versionNonce": 793554580, "isDeleted": false, "id": "lIucERyh1MMe7Vzo8_q7d", "fillStyle": "solid", "strokeWidth": 2, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 1264.6540244357177, "y": 66.71948938317058, "strokeColor": "#00e676", "backgroundColor": "#fff", "width": 70.67858069123133, "height": 107.25081879410921, "seed": 722133804, "groupIds": [ "33LizGdDAL7lJNoiSiJZy" ], "strokeSharpness": "sharp", "boundElements": [ { "type": "arrow", "id": "CFu0B4Mw_1wC1Hbgx8Fs0" }, { "type": "arrow", "id": "XIl_NhaFtRO00pX5Pq6VU" }, { "type": "arrow", "id": "EndiSTFlx1AT7vcBVjgve" }, { "id": "IEnqZv0JAf73xE5G5LY1s", "type": "arrow" } ], "updated": 1642714818537 }, { "type": "line", "version": 4221, "versionNonce": 1007301804, "isDeleted": false, "id": "3UaWROnZ6NNaCTSa51ntD", "fillStyle": "solid", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 1276.6404510226287, "y": 115.46672643052324, "strokeColor": "#00e676", "backgroundColor": "#fff", "width": 46.57983585730082, "height": 3.249953844290203, "seed": 166278548, "groupIds": [ "33LizGdDAL7lJNoiSiJZy" ], "strokeSharpness": "round", "boundElements": [], "updated": 1642714818537, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": null, "points": [ [ 0, 0 ], [ 40.42449133807562, 0.1573930526684746 ], [ 46.57983585730082, -3.0925607916217284 ] ] }, { "type": "line", "version": 4247, "versionNonce": 140622868, "isDeleted": false, "id": "H3c0943V86dq4MeskNY-E", "fillStyle": "solid", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 1278.495703260643, "y": 84.00143913168466, "strokeColor": "#00e676", "backgroundColor": "#fff", "width": 45.567415680676426, "height": 2.8032978840147194, "seed": 1285397932, "groupIds": [ "33LizGdDAL7lJNoiSiJZy" ], "strokeSharpness": "round", "boundElements": [], "updated": 1642714818537, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": null, "points": [ [ 0, 0 ], [ 16.832548902953302, -2.8032978840147194 ], [ 45.567415680676426, -0.3275477042019195 ] ] }, { "type": "line", "version": 4272, "versionNonce": 1958133548, "isDeleted": false, "id": "g62fLo1BCU_TuN8OEk_ie", "fillStyle": "solid", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 1278.4191037442736, "y": 151.39581340204182, "strokeColor": "#00e676", "backgroundColor": "#fff", "width": 48.33668263438425, "height": 4.280657518731036, "seed": 198533908, "groupIds": [ "33LizGdDAL7lJNoiSiJZy" ], "strokeSharpness": "round", "boundElements": [], "updated": 1642714818537, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": null, "points": [ [ 0, 0 ], [ 26.41225578429045, -0.2552319773002338 ], [ 37.62000339651456, 2.3153712935189787 ], [ 48.33668263438425, -1.9652862252120569 ] ] }, { "type": "line", "version": 4309, "versionNonce": 1719212436, "isDeleted": false, "id": "jMbD-eUPMlCihQh7GK--c", "fillStyle": "solid", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 1275.2350565627432, "y": 162.6389618103206, "strokeColor": "#00e676", "backgroundColor": "#fff", "width": 54.40694982784246, "height": 2.9096445412231735, "seed": 1358235692, "groupIds": [ "33LizGdDAL7lJNoiSiJZy" ], "strokeSharpness": "round", "boundElements": [], "updated": 1642714818537, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": null, "points": [ [ 0, 0 ], [ 10.166093050596771, -1.166642430373031 ], [ 16.130660965377448, -0.8422655250909383 ], [ 46.26079588567538, 0.6125567455206506 ], [ 54.40694982784246, -2.297087795702523 ] ] }, { "type": "line", "version": 4274, "versionNonce": 745404844, "isDeleted": false, "id": "lmzq_v_7Zzb2T4WxQPEqG", "fillStyle": "solid", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 1275.519899458309, "y": 99.06204068494588, "strokeColor": "#00e676", "backgroundColor": "#fff", "width": 46.92865289294453, "height": 2.4757501798128, "seed": 719315092, "groupIds": [ "33LizGdDAL7lJNoiSiJZy" ], "strokeSharpness": "round", "boundElements": [], "updated": 1642714818537, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": null, "points": [ [ 0, 0 ], [ 18.193786115221407, -0.5912874140789839 ], [ 46.92865289294453, 1.884462765733816 ] ] }, { "type": "line", "version": 4289, "versionNonce": 348702484, "isDeleted": false, "id": "YoZdONxrJotpnXjxFe748", "fillStyle": "solid", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 1275.6273876544594, "y": 133.54536972078546, "strokeColor": "#00e676", "backgroundColor": "#fff", "width": 46.92865289294453, "height": 2.4757501798128, "seed": 1543185068, "groupIds": [ "33LizGdDAL7lJNoiSiJZy" ], "strokeSharpness": "round", "boundElements": [], "updated": 1642714818537, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": null, "points": [ [ 0, 0 ], [ 8.093938105125233, 1.4279702913643746 ], [ 18.193786115221407, -0.5912874140789839 ], [ 46.92865289294453, 1.884462765733816 ] ] }, { "type": "text", "version": 450, "versionNonce": 240772140, "isDeleted": false, "id": "fAIYPmdTgnjEqc8PGu-SI", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 0, "opacity": 100, "angle": 0, "x": 1255.9645759420482, "y": 203.9818965480822, "strokeColor": "#00e676", "backgroundColor": "#03a9f4", "width": 134, "height": 26, "seed": 1891202964, "groupIds": [], "strokeSharpness": "round", "boundElements": [], "updated": 1642714818537, "fontSize": 20, "fontFamily": 1, "text": "Output Data", "baseline": 18, "textAlign": "left", "verticalAlign": "top", "containerId": null, "originalText": "Output Data" } ], "appState": { "gridSize": null, "viewBackgroundColor": "#ffffff" }, "files": {} } ================================================ FILE: docs/images/why.excalidraw ================================================ { "type": "excalidraw", "version": 2, "source": "https://excalidraw.com", "elements": [ { "type": "rectangle", "version": 639, "versionNonce": 1876686474, "isDeleted": false, "id": "mG5J3YEl3JUm8tWD2gLax", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 840.0920889678455, "y": 323.8650942251963, "strokeColor": "#ff7043", "backgroundColor": "#ff7043", "width": 412.0000000000001, "height": 54.999999999999964, "seed": 322487816, "groupIds": [ "VYua_NGvuS1eX_KPHCgOc" ], "strokeSharpness": "sharp", "boundElements": [], "updated": 1731084295790, "index": "a4", "frameId": null, "roundness": null, "link": null, "locked": false }, { "type": "text", "version": 210, "versionNonce": 82579222, "isDeleted": false, "id": "N9ytCRthOA395FeHYtiWS", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 1019.5920889678455, "y": 335.8362480713501, "strokeColor": "#343a40", "backgroundColor": "#03a9f4", "width": 53, "height": 36, "seed": 1396884744, "groupIds": [ "VYua_NGvuS1eX_KPHCgOc" ], "strokeSharpness": "sharp", "boundElements": [], "updated": 1731084295790, "fontSize": 28, "fontFamily": 1, "text": "API", "baseline": 25, "textAlign": "left", "verticalAlign": "top", "containerId": null, "originalText": "API", "index": "a5", "frameId": null, "roundness": null, "link": null, "locked": false, "autoResize": true, "lineHeight": 1.2857142857142858 }, { "type": "rectangle", "version": 460, "versionNonce": 1551291798, "isDeleted": false, "id": "QKBksUpSSix31GSKFO1uV", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 837.5920889678456, "y": 456.8747096098117, "strokeColor": "#03a9f4", "backgroundColor": "#03a9f4", "width": 417, "height": 60.846153846153825, "seed": 910262792, "groupIds": [], "strokeSharpness": "sharp", "boundElements": [ { "id": "4o-d8ZzuqEpB7ROBAyReL", "type": "text" } ], "updated": 1731084298697, "index": "a6", "frameId": null, "roundness": null, "link": null, "locked": false }, { "type": "text", "version": 263, "versionNonce": 1011157002, "isDeleted": false, "id": "4o-d8ZzuqEpB7ROBAyReL", "fillStyle": "cross-hatch", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 842.4670889678456, "y": 469.2977865328886, "strokeColor": "#343a40", "backgroundColor": "#03a9f4", "width": 407, "height": 36, "seed": 1820749832, "groupIds": [], "strokeSharpness": "sharp", "boundElements": [], "updated": 1731084298697, "fontSize": 28, "fontFamily": 1, "text": "Embeddings", "baseline": 25, "textAlign": "center", "verticalAlign": "middle", "containerId": "QKBksUpSSix31GSKFO1uV", "originalText": "Embeddings", "index": "a7", "frameId": null, "roundness": null, "link": null, "locked": false, "autoResize": true, "lineHeight": 1.2857142857142858 }, { "type": "rectangle", "version": 847, "versionNonce": 266827407, "isDeleted": false, "id": "uFbzRXgEesWgll3llORrW", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 969.4670889678455, "y": 387.5069211482732, "strokeColor": "#00e676", "backgroundColor": "#00e676", "width": 135, "height": 60.846153846153825, "seed": 531948552, "groupIds": [ "DlS9eiqyrIHp45FmS35GN" ], "strokeSharpness": "sharp", "boundElements": [ { "type": "text", "id": "Ctud-Ap642NinH0-64fKT" } ], "updated": 1731149308913, "index": "a8", "frameId": null, "roundness": null, "link": null, "locked": false }, { "type": "text", "version": 617, "versionNonce": 1566326241, "isDeleted": false, "id": "Ctud-Ap642NinH0-64fKT", "fillStyle": "cross-hatch", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 988.8004223011789, "y": 399.9299980713501, "strokeColor": "#000000", "backgroundColor": "#03a9f4", "width": 96.33333333333333, "height": 36, "seed": 720254072, "groupIds": [ "DlS9eiqyrIHp45FmS35GN" ], "strokeSharpness": "sharp", "boundElements": [], "updated": 1731149308913, "fontSize": 28, "fontFamily": 1, "text": "Pipeline", "baseline": 25, "textAlign": "center", "verticalAlign": "middle", "containerId": "uFbzRXgEesWgll3llORrW", "originalText": "Pipeline", "index": "a9", "frameId": null, "roundness": null, "link": null, "locked": false, "autoResize": true, "lineHeight": 1.2857142857142858 }, { "type": "rectangle", "version": 827, "versionNonce": 1640467681, "isDeleted": false, "id": "_zQO_kyS5Pvq1zDhEn9hv", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 1111.4863197370764, "y": 388.1766044061918, "strokeColor": "#ffeb3b", "backgroundColor": "#ffeb3b", "width": 141, "height": 59.76470588235287, "seed": 1988829960, "groupIds": [ "DlS9eiqyrIHp45FmS35GN" ], "strokeSharpness": "sharp", "boundElements": [ { "id": "tL_gw7MpliV1bDFJp8Cql", "type": "text" } ], "updated": 1731149323000, "index": "aA", "frameId": null, "roundness": null, "link": null, "locked": false }, { "type": "text", "version": 617, "versionNonce": 1776634049, "isDeleted": false, "id": "tL_gw7MpliV1bDFJp8Cql", "fillStyle": "cross-hatch", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 1122.7113197370763, "y": 400.0589573473683, "strokeColor": "#000000", "backgroundColor": "#03a9f4", "width": 118.55, "height": 36, "seed": 274132856, "groupIds": [ "DlS9eiqyrIHp45FmS35GN" ], "strokeSharpness": "sharp", "boundElements": [], "updated": 1731149323001, "fontSize": 28, "fontFamily": 1, "text": "Workflow", "baseline": 25, "textAlign": "center", "verticalAlign": "middle", "containerId": "_zQO_kyS5Pvq1zDhEn9hv", "originalText": "Workflow", "index": "aB", "frameId": null, "roundness": null, "link": null, "locked": false, "autoResize": true, "lineHeight": 1.2857142857142858 }, { "type": "rectangle", "version": 910, "versionNonce": 391148577, "isDeleted": false, "id": "STAkXjIBdAZgXMIKZfGnx", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 839.6978581986145, "y": 387.38672884058093, "strokeColor": "#9775fa", "backgroundColor": "#9775fa", "width": 121, "height": 60.846153846153825, "seed": 362222090, "groupIds": [ "DlS9eiqyrIHp45FmS35GN" ], "strokeSharpness": "sharp", "boundElements": [ { "type": "text", "id": "1iyFs-1PG5afe6QKq1s34" } ], "updated": 1731149289560, "index": "aC", "frameId": null, "roundness": null, "link": null, "locked": false }, { "type": "text", "version": 690, "versionNonce": 456727151, "isDeleted": false, "id": "1iyFs-1PG5afe6QKq1s34", "fillStyle": "cross-hatch", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 861.8895238480286, "y": 399.80980576365783, "strokeColor": "#1e1e1e", "backgroundColor": "#03a9f4", "width": 76.61666870117188, "height": 36, "seed": 565985482, "groupIds": [ "DlS9eiqyrIHp45FmS35GN" ], "strokeSharpness": "sharp", "boundElements": [], "updated": 1731149289560, "fontSize": 28, "fontFamily": 1, "text": "Agent", "baseline": 25, "textAlign": "center", "verticalAlign": "middle", "containerId": "STAkXjIBdAZgXMIKZfGnx", "originalText": "Agent", "index": "aD", "frameId": null, "roundness": null, "link": null, "locked": false, "autoResize": true, "lineHeight": 1.2857142857142858 } ], "appState": { "gridSize": 20, "gridStep": 5, "gridModeEnabled": false, "viewBackgroundColor": "#ffffff" }, "files": {} } ================================================ FILE: docs/images/workflow.excalidraw ================================================ { "type": "excalidraw", "version": 2, "source": "https://excalidraw.com", "elements": [ { "type": "rectangle", "version": 1659, "versionNonce": 1307774681, "isDeleted": false, "id": "qYd3q0Vjks7VOHUC9RR51", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 425, "y": 339.5, "strokeColor": "#00e676", "backgroundColor": "#00e676", "width": 290.0000000000001, "height": 46, "seed": 1441952427, "groupIds": [], "roundness": null, "boundElements": [ { "type": "text", "id": "WPeWn6N4rCHf0jY16N9Ge" } ], "updated": 1673789051585, "link": null, "locked": false }, { "type": "arrow", "version": 311, "versionNonce": 924248664, "isDeleted": false, "id": "tgnQzXC9s8RY4oImBOXuB", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 562.5285999651802, "y": 272.4578674923631, "strokeColor": "#000", "backgroundColor": "#228be6", "width": 0.4714000348197942, "height": 72.5421325076369, "seed": 650463755, "groupIds": [], "roundness": { "type": 2 }, "boundElements": [], "updated": 1658678202111, "link": null, "locked": false, "startBinding": { "elementId": "B435ajoI5vAQBDvzkd8aY", "focus": 0.05963733900881362, "gap": 5.457867492363107 }, "endBinding": null, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": "arrow", "points": [ [ 0, 0 ], [ 0.4714000348197942, 72.5421325076369 ] ] }, { "type": "text", "version": 1347, "versionNonce": 162309943, "isDeleted": false, "id": "WPeWn6N4rCHf0jY16N9Ge", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 430, "y": 344.5, "strokeColor": "#000000", "backgroundColor": "#fa5252", "width": 280, "height": 36, "seed": 870516459, "groupIds": [], "roundness": null, "boundElements": [], "updated": 1673789051585, "link": null, "locked": false, "fontSize": 28, "fontFamily": 1, "text": "Translate", "baseline": 25, "textAlign": "center", "verticalAlign": "middle", "containerId": "qYd3q0Vjks7VOHUC9RR51", "originalText": "Translate" }, { "type": "rectangle", "version": 168, "versionNonce": 2070244952, "isDeleted": false, "id": "IxMxusRKX2PpnJT1uY0cC", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 819, "y": 217.5, "strokeColor": "#ffeb3b", "backgroundColor": "#ffeb3b", "width": 290.0000000000001, "height": 49, "seed": 1364021803, "groupIds": [], "roundness": null, "boundElements": [ { "type": "text", "id": "SSXjX_URKvcVC8h-Qgz4j" }, { "id": "_YNRYTAnVzdhhKxFtkVyg", "type": "arrow" } ], "updated": 1658678121633, "link": null, "locked": false }, { "type": "text", "version": 73, "versionNonce": 1790001960, "isDeleted": false, "id": "SSXjX_URKvcVC8h-Qgz4j", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 824, "y": 224, "strokeColor": "#000000", "backgroundColor": "#82c91e", "width": 280, "height": 36, "seed": 948915563, "groupIds": [], "roundness": null, "boundElements": [], "updated": 1658678121633, "link": null, "locked": false, "fontSize": 28, "fontFamily": 1, "text": "Extract Text", "baseline": 25, "textAlign": "center", "verticalAlign": "middle", "containerId": "IxMxusRKX2PpnJT1uY0cC", "originalText": "Extract Text" }, { "type": "rectangle", "version": 205, "versionNonce": 1180033880, "isDeleted": false, "id": "lATIKISgJPOGnUHleuFRH", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 821, "y": 338, "strokeColor": "#03a9f4", "backgroundColor": "#03a9f4", "width": 290.0000000000001, "height": 46, "seed": 1256311595, "groupIds": [], "roundness": null, "boundElements": [ { "id": "21WiUuyDtpQ9FnJ74REpJ", "type": "text" }, { "id": "Q51e0Hav-kYfjX3b8tt-r", "type": "arrow" } ], "updated": 1658678121633, "link": null, "locked": false }, { "type": "text", "version": 84, "versionNonce": 1559141928, "isDeleted": false, "id": "21WiUuyDtpQ9FnJ74REpJ", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 826, "y": 343, "strokeColor": "#000000", "backgroundColor": "#fa5252", "width": 280, "height": 36, "seed": 626266373, "groupIds": [], "roundness": null, "boundElements": [], "updated": 1658678121633, "link": null, "locked": false, "fontSize": 28, "fontFamily": 1, "text": "Summarize", "baseline": 25, "textAlign": "center", "verticalAlign": "middle", "containerId": "lATIKISgJPOGnUHleuFRH", "originalText": "Summarize" }, { "type": "arrow", "version": 150, "versionNonce": 108519256, "isDeleted": false, "id": "_YNRYTAnVzdhhKxFtkVyg", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 962.2340210300499, "y": 270, "strokeColor": "#000", "backgroundColor": "#228be6", "width": 0.2659789699500834, "height": 75, "seed": 101249451, "groupIds": [], "roundness": { "type": 2 }, "boundElements": [], "updated": 1658678205029, "link": null, "locked": false, "startBinding": { "elementId": "IxMxusRKX2PpnJT1uY0cC", "focus": 0.012856281024868153, "gap": 3.5 }, "endBinding": null, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": "arrow", "points": [ [ 0, 0 ], [ 0.2659789699500834, 75 ] ] }, { "type": "rectangle", "version": 213, "versionNonce": 1959303464, "isDeleted": false, "id": "6B3J0wJfi561c9gMsgtXG", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 824, "y": 455, "strokeColor": "#7950f2", "backgroundColor": "#7950f2", "width": 290.0000000000001, "height": 46, "seed": 1164190923, "groupIds": [], "roundness": null, "boundElements": [ { "id": "qJDqmDEZF9Ewh2UrjXq5Z", "type": "text" }, { "id": "Q51e0Hav-kYfjX3b8tt-r", "type": "arrow" } ], "updated": 1658678121634, "link": null, "locked": false }, { "type": "text", "version": 135, "versionNonce": 210169176, "isDeleted": false, "id": "qJDqmDEZF9Ewh2UrjXq5Z", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 829, "y": 460, "strokeColor": "#000000", "backgroundColor": "#fa5252", "width": 280, "height": 36, "seed": 1307943269, "groupIds": [], "roundness": null, "boundElements": [], "updated": 1658678121634, "link": null, "locked": false, "fontSize": 28, "fontFamily": 1, "text": "Build Vector Index", "baseline": 25, "textAlign": "center", "verticalAlign": "middle", "containerId": "6B3J0wJfi561c9gMsgtXG", "originalText": "Build Vector Index" }, { "type": "arrow", "version": 203, "versionNonce": 133313624, "isDeleted": false, "id": "Q51e0Hav-kYfjX3b8tt-r", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 963.600775377322, "y": 387.5, "strokeColor": "#000", "backgroundColor": "#228be6", "width": 0.29392358842710564, "height": 66.5, "seed": 59173285, "groupIds": [], "roundness": { "type": 2 }, "boundElements": [], "updated": 1658678208037, "link": null, "locked": false, "startBinding": { "elementId": "lATIKISgJPOGnUHleuFRH", "focus": 0.015253485349599789, "gap": 3.5 }, "endBinding": { "elementId": "6B3J0wJfi561c9gMsgtXG", "focus": -0.039966641220930875, "gap": 1 }, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": "arrow", "points": [ [ 0, 0 ], [ -0.29392358842710564, 66.5 ] ] }, { "type": "rectangle", "version": 1246, "versionNonce": 842910649, "isDeleted": false, "id": "5VuUdI_BsJ5pyE1nTqJUI", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 1195, "y": 221, "strokeColor": "#7950f2", "backgroundColor": "#7950f2", "width": 290.0000000000001, "height": 46, "seed": 1044404613, "groupIds": [], "roundness": null, "boundElements": [ { "id": "bJJ9SGsJsvT071qBBH0w5", "type": "text" }, { "id": "Q51e0Hav-kYfjX3b8tt-r", "type": "arrow" } ], "updated": 1673789051585, "link": null, "locked": false }, { "type": "text", "version": 1431, "versionNonce": 1914742871, "isDeleted": false, "id": "bJJ9SGsJsvT071qBBH0w5", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 1200, "y": 226, "strokeColor": "#000000", "backgroundColor": "#fa5252", "width": 280, "height": 36, "seed": 128953675, "groupIds": [], "roundness": null, "boundElements": [], "updated": 1673789051585, "link": null, "locked": false, "fontSize": 28, "fontFamily": 1, "text": "Run similarity query", "baseline": 25, "textAlign": "center", "verticalAlign": "middle", "containerId": "5VuUdI_BsJ5pyE1nTqJUI", "originalText": "Run similarity query" }, { "type": "rectangle", "version": 334, "versionNonce": 7828646, "isDeleted": false, "id": "pSbgtf1qAB7tWl-pTld7e", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 1197, "y": 343, "strokeColor": "#ff7043", "backgroundColor": "#ff7043", "width": 290.0000000000001, "height": 46, "seed": 210689733, "groupIds": [], "roundness": null, "boundElements": [ { "id": "pQzKSM3audka1kiQm_Sku", "type": "text" }, { "id": "Q51e0Hav-kYfjX3b8tt-r", "type": "arrow" }, { "id": "k67YzXNtt1GLh4i8Es5zJ", "type": "arrow" } ], "updated": 1673791639562, "link": null, "locked": false }, { "type": "text", "version": 293, "versionNonce": 271822438, "isDeleted": false, "id": "pQzKSM3audka1kiQm_Sku", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 1219.5, "y": 348, "strokeColor": "#000", "backgroundColor": "#fa5252", "width": 245, "height": 36, "seed": 1869028363, "groupIds": [], "roundness": null, "boundElements": [], "updated": 1673791644235, "link": null, "locked": false, "fontSize": 28, "fontFamily": 1, "text": "Send notifications", "baseline": 25, "textAlign": "center", "verticalAlign": "middle", "containerId": "pSbgtf1qAB7tWl-pTld7e", "originalText": "Send notifications" }, { "type": "arrow", "version": 310, "versionNonce": 1574300026, "isDeleted": false, "id": "k67YzXNtt1GLh4i8Es5zJ", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 1339.1171359117898, "y": 265.5, "strokeColor": "#000", "backgroundColor": "#228be6", "width": 1.0119492860442278, "height": 76.5, "seed": 1656816747, "groupIds": [], "roundness": { "type": 2 }, "boundElements": [], "updated": 1673791641098, "link": null, "locked": false, "startBinding": null, "endBinding": { "elementId": "pSbgtf1qAB7tWl-pTld7e", "focus": -0.010690950588675632, "gap": 1 }, "lastCommittedPoint": null, "startArrowhead": null, "endArrowhead": "arrow", "points": [ [ 0, 0 ], [ 1.0119492860442278, 76.5 ] ] }, { "type": "text", "version": 308, "versionNonce": 641647912, "isDeleted": false, "id": "kKPPLMCj8QQIJLbW-B5hm", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 1180, "y": 451, "strokeColor": "#000", "backgroundColor": "#fab005", "width": 296, "height": 52, "seed": 1079990731, "groupIds": [], "roundness": null, "boundElements": [], "updated": 1658678121635, "link": null, "locked": false, "fontSize": 20, "fontFamily": 1, "text": "- API bindings for JavaScript,\n Rust, Go and Java", "baseline": 44, "textAlign": "left", "verticalAlign": "top", "containerId": null, "originalText": "- API bindings for JavaScript,\n Rust, Go and Java" }, { "type": "text", "version": 560, "versionNonce": 1977188696, "isDeleted": false, "id": "1AQ3rj-V4weRtPA8a5-z-", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 437.5, "y": 449.5, "strokeColor": "#000", "backgroundColor": "#fab005", "width": 278, "height": 52, "seed": 836512075, "groupIds": [], "roundness": null, "boundElements": [], "updated": 1658678121635, "link": null, "locked": false, "fontSize": 20, "fontFamily": 1, "text": "- Build with Python or YAML\n- Run local or via API", "baseline": 44, "textAlign": "left", "verticalAlign": "top", "containerId": null, "originalText": "- Build with Python or YAML\n- Run local or via API" }, { "type": "rectangle", "version": 292, "versionNonce": 259776552, "isDeleted": false, "id": "B435ajoI5vAQBDvzkd8aY", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 426, "y": 221, "strokeColor": "#03a9f4", "backgroundColor": "#03a9f4", "width": 290.0000000000001, "height": 46, "seed": 942981672, "groupIds": [], "roundness": null, "boundElements": [ { "id": "AoGnxEHn4x-zq2-0C0VrT", "type": "text" }, { "id": "Q51e0Hav-kYfjX3b8tt-r", "type": "arrow" }, { "id": "tgnQzXC9s8RY4oImBOXuB", "type": "arrow" } ], "updated": 1658678121635, "link": null, "locked": false }, { "type": "text", "version": 171, "versionNonce": 2093287976, "isDeleted": false, "id": "AoGnxEHn4x-zq2-0C0VrT", "fillStyle": "hachure", "strokeWidth": 1, "strokeStyle": "solid", "roughness": 1, "opacity": 100, "angle": 0, "x": 431, "y": 226, "strokeColor": "#000000", "backgroundColor": "#fa5252", "width": 280, "height": 36, "seed": 604886104, "groupIds": [], "roundness": null, "boundElements": [], "updated": 1658678188284, "link": null, "locked": false, "fontSize": 28, "fontFamily": 1, "text": "Summarize", "baseline": 25, "textAlign": "center", "verticalAlign": "middle", "containerId": "B435ajoI5vAQBDvzkd8aY", "originalText": "Summarize" } ], "appState": { "gridSize": null, "viewBackgroundColor": "#3030" }, "files": {} } ================================================ FILE: docs/index.md ================================================ #

All-in-one AI framework

Version GitHub last commit GitHub issues Join Slack Build Status Coverage Status

txtai is an all-in-one AI framework for semantic search, LLM orchestration and language model workflows. ![architecture](images/architecture.png#gh-light-mode-only) ![architecture](images/architecture-dark.png#gh-dark-mode-only) The key component of txtai is an embeddings database, which is a union of vector indexes (sparse and dense), graph networks and relational databases. This foundation enables vector search and/or serves as a powerful knowledge source for large language model (LLM) applications. Build autonomous agents, retrieval augmented generation (RAG) processes, multi-model workflows and more. Summary of txtai features: - 🔎 Vector search with SQL, object storage, topic modeling, graph analysis and multimodal indexing - 📄 Create embeddings for text, documents, audio, images and video - 💡 Pipelines powered by language models that run LLM prompts, question-answering, labeling, transcription, translation, summarization and more - ↪️️ Workflows to join pipelines together and aggregate business logic. txtai processes can be simple microservices or multi-model workflows. - 🤖 Agents that intelligently connect embeddings, pipelines, workflows and other agents together to autonomously solve complex problems - ⚙️ Web and Model Context Protocol (MCP) APIs. Bindings available for [JavaScript](https://github.com/neuml/txtai.js), [Java](https://github.com/neuml/txtai.java), [Rust](https://github.com/neuml/txtai.rs) and [Go](https://github.com/neuml/txtai.go). - 🔋 Batteries included with defaults to get up and running fast - ☁️ Run local or scale out with container orchestration txtai is built with Python 3.10+, [Hugging Face Transformers](https://github.com/huggingface/transformers), [Sentence Transformers](https://github.com/UKPLab/sentence-transformers) and [FastAPI](https://github.com/tiangolo/fastapi). txtai is open-source under an Apache 2.0 license. !!! note [NeuML](https://neuml.com) is the company behind txtai and we provide AI consulting services around our stack. [Schedule a meeting](https://cal.com/neuml/intro) or [send a message](mailto:info@neuml.com) to learn more. We're also building an easy and secure way to run hosted txtai applications with [txtai.cloud](https://txtai.cloud). ================================================ FILE: docs/install.md ================================================ # Installation ![install](images/install.png#only-light) ![install](images/install-dark.png#only-dark) The easiest way to install is via pip and PyPI ``` pip install txtai ``` Python 3.10+ is supported. Using a Python [virtual environment](https://docs.python.org/3/library/venv.html) is recommended. ## Optional dependencies txtai has the following optional dependencies that can be installed as extras. The patterns below are supported in setup.py install_requires sections. _Note: Extras are provided for convenience. Alternatively, individual packages can be installed to limit dependencies._ ### All Install all dependencies. ``` pip install txtai[all] ``` ### ANN Additional ANN backends. ``` pip install txtai[ann] ``` ### API Serve txtai via a web API. ``` pip install txtai[api] ``` ### Cloud Interface with cloud compute. ``` pip install txtai[cloud] ``` ### Console Command line index query console. ``` pip install txtai[console] ``` ### Database Additional content storage options. ``` pip install txtai[database] ``` ### Graph Topic modeling, data connectivity and network analysis. ``` pip install txtai[graph] ``` ### Model Additional non-standard models. ``` pip install txtai[model] ``` ### Pipeline All pipelines - default install comes with most common pipelines. ``` pip install txtai[pipeline] ``` More granular extras are available for pipeline categories: `pipeline-audio`, `pipeline-data`, `pipeline-image`, `pipeline-llm`, `pipeline-text`, and `pipeline-train`. ### Scoring Additional scoring methods. ``` pip install txtai[scoring] ``` ### Vectors Additional vector methods. ``` pip install txtai[vectors] ``` ### Workflow All workflow tasks - default install comes with most common workflow tasks. ``` pip install txtai[workflow] ``` ### Combining dependencies Multiple dependencies can be specified at the same time. ``` pip install txtai[pipeline,workflow] ``` ## Environment specific prerequisites Additional environment specific prerequisites are below. ### Linux The AudioStream and Microphone pipelines require the [PortAudio](https://python-sounddevice.readthedocs.io/en/0.5.0/installation.html) system library. The Transcription pipeline requires the [SoundFile](https://github.com/bastibe/python-soundfile#installation) system library. ### macOS Older versions of Faiss have a runtime dependency on `libomp` for macOS. Run `brew install libomp` in this case. The AudioStream and Microphone pipelines require the [PortAudio](https://python-sounddevice.readthedocs.io/en/0.5.0/installation.html) system library. Run `brew install portaudio`. ### Windows Optional dependencies require [C++ Build Tools](https://visualstudio.microsoft.com/visual-cpp-build-tools/) The [txtai build workflow](https://github.com/neuml/txtai/blob/master/.github/workflows/build.yml) occasionally has work arounds for other known but temporary dependency issues. The [FAQ](../faq) also has a list of common problems, including common installation issues. ## CPU-only The default install adds PyTorch with GPU support. There are a number of dependencies that come with that. When running in a CPU-only environment or using Embeddings/LLM models without PyTorch (i.e. llama.cpp or API services), the CPU-only PyTorch package can be installed with txtai as follows. ``` pip install txtai torch==[version]+cpu \ -f https://download.pytorch.org/whl/torch ``` Where `[version]` is the version of PyTorch (such as 2.4.1). The [txtai-cpu](https://hub.docker.com/r/neuml/txtai-cpu) image on Docker Hub uses this method to reduce the image size. ## Install from source txtai can also be installed directly from GitHub to access the latest, unreleased features. ``` pip install git+https://github.com/neuml/txtai ``` Extras can be installed from GitHub by adding `#egg=txtai[]` to the end of the above URL. ## Conda A [community-supported txtai package](https://anaconda.org/conda-forge/txtai) is available via conda-forge. ``` conda install -c conda-forge txtai ``` ## Run with containers Docker images are available for txtai. [See this section](../cloud) for more information on container-based installs. ================================================ FILE: docs/models.md ================================================ # Model guide ![models](images/models.png) See the table below for the current recommended models. These models all allow commercial use and offer a blend of speed and performance. | Component | Model(s) | | ---------------------------------------------------- | ------------------------------------------------------------------------ | | [Embeddings](../embeddings) | [all-MiniLM-L6-v2](https://hf.co/sentence-transformers/all-MiniLM-L6-v2) | | [Image Captions](./pipeline/image/caption.md) | [BLIP](https://hf.co/Salesforce/blip-image-captioning-base) | | [Labels - Zero Shot](./pipeline/text/labels.md) | [BART-Large-MNLI](https://hf.co/facebook/bart-large) | | [Labels - Fixed](./pipeline/text/labels.md) | Fine-tune with [training pipeline](./pipeline/train/trainer.md) | | [Large Language Model (LLM)](./pipeline/text/llm.md) | [gpt-oss-20b](https://huggingface.co/openai/gpt-oss-20b) | | [Summarization](./pipeline/text/summary.md) | [DistilBART](https://hf.co/sshleifer/distilbart-cnn-12-6) | | [Text-to-Speech](./pipeline/audio/texttospeech.md) | [ESPnet JETS](https://hf.co/NeuML/ljspeech-jets-onnx) | | [Transcription](./pipeline/audio/transcription.md) | [Whisper](https://hf.co/openai/whisper-base) | | [Translation](./pipeline/text/translation.md) | [OPUS Model Series](https://hf.co/Helsinki-NLP) | Models can be loaded as either a path from the Hugging Face Hub or a local directory. Model paths are optional, defaults are loaded when not specified. For tasks with no recommended model, txtai uses the default models as shown in the Hugging Face Tasks guide. See the following links to learn more. - [Hugging Face Tasks](https://hf.co/tasks) - [Hugging Face Model Hub](https://hf.co/models) - [MTEB Leaderboard](https://hf.co/spaces/mteb/leaderboard) - [LMSYS LLM Leaderboard](https://chat.lmsys.org/?leaderboard) - [Open LLM Leaderboard](https://hf.co/spaces/HuggingFaceH4/open_llm_leaderboard) ================================================ FILE: docs/observability.md ================================================ # Observability ![agent](https://raw.githubusercontent.com/neuml/mlflow-txtai/master/images/agent.png) Observability enables tracking the inner workings of a system without having to change the system. This makes it much easier to debug and evaluate overall performance. `txtai` has an integration with [MLflow](https://mlflow.org) and it's [tracing module](https://mlflow.org/docs/latest/llms/tracing/index.html) to provide insights into each of the components in `txtai`. ## Examples The following shows a number of examples on how to introduce observability into a `txtai` process. ### Initialization Run the following sections first to initialize tracing. ``` # Install MLflow plugin for txtai pip install mlflow-txtai # Start a local MLflow service mlflow server --host 127.0.0.1 --port 8000 ``` ```python import mlflow mlflow.set_tracking_uri(uri="http://localhost:8000") mlflow.set_experiment("txtai") # Enable txtai automatic tracing mlflow.txtai.autolog() ``` ### Textractor The first example traces a [Textractor pipeline](../pipeline/data/textractor). ```python from txtai.pipeline import Textractor with mlflow.start_run(): textractor = Textractor() textractor("https://github.com/neuml/txtai") ``` ![textractor](https://raw.githubusercontent.com/neuml/mlflow-txtai/master/images/textractor.png) ### Embeddings Next, we'll trace an [Embeddings](../embeddings) query. ```python from txtai import Embeddings with mlflow.start_run(): wiki = Embeddings() wiki.load(provider="huggingface-hub", container="neuml/txtai-wikipedia-slim") embeddings = Embeddings(content=True, graph=True) embeddings.index(wiki.search("SELECT id, text FROM txtai LIMIT 25")) embeddings.search("MATCH (A)-[]->(B) RETURN A") ``` ![embeddings-load](https://raw.githubusercontent.com/neuml/mlflow-txtai/master/images/embeddings-load.png) ![embeddings-index](https://raw.githubusercontent.com/neuml/mlflow-txtai/master/images/embeddings-index.png) ### Retrieval Augmented Generation (RAG) The next example traces a [RAG pipeline](../pipeline/text/rag). ```python from txtai import Embeddings, RAG with mlflow.start_run(): wiki = Embeddings() wiki.load(provider="huggingface-hub", container="neuml/txtai-wikipedia-slim") # Define prompt template template = """ Answer the following question using only the context below. Only include information specifically discussed. question: {question} context: {context} """ # Create RAG pipeline rag = RAG( wiki, "openai/gpt-oss-20b", system="You are a friendly assistant. You answer questions from users.", template=template, context=10 ) rag("Tell me about the Roman Empire", maxlength=2048) ``` ![rag](https://raw.githubusercontent.com/neuml/mlflow-txtai/master/images/rag.png) ### Workflow This example runs a [workflow](../workflow). This workflow runs an embeddings query and then translates each result to French. ```python from txtai import Embeddings, Workflow from txtai.pipeline import Translation from txtai.workflow import Task with mlflow.start_run(): wiki = Embeddings() wiki.load(provider="huggingface-hub", container="neuml/txtai-wikipedia-slim") # Translation instance translate = Translation() workflow = Workflow([ Task(lambda x: [y[0]["text"] for y in wiki.batchsearch(x, 1)]), Task(lambda x: translate(x, "fr")) ]) print(list(workflow(["Roman Empire", "Greek Empire", "Industrial Revolution"]))) ``` ![workflow](https://raw.githubusercontent.com/neuml/mlflow-txtai/master/images/workflow.png) ### Agent The last example runs a [txtai agent](../agent) designed to research questions on astronomy. ```python from txtai import Agent, Embeddings def search(query): """ Searches a database of astronomy data. Make sure to call this tool only with a string input, never use JSON. Args: query: concepts to search for using similarity search Returns: list of search results with for each match """ return embeddings.search( "SELECT id, text, distance FROM txtai WHERE similar(:query)", 10, parameters={"query": query} ) embeddings = Embeddings() embeddings.load(provider="huggingface-hub", container="neuml/txtai-astronomy") agent = Agent( tools=[search], llm="Qwen/Qwen3-4B-Instruct-2507", max_steps=10, ) researcher = """ {command} Do the following. - Search for results related to the topic. - Analyze the results - Continue querying until conclusive answers are found - Write a Markdown report """ with mlflow.start_run(): agent(researcher.format(command=""" Write a detailed list with explanations of 10 candidate stars that could potentially be habitable to life. """), maxlength=16000) ``` ![agent](https://raw.githubusercontent.com/neuml/mlflow-txtai/master/images/agent.png) ## Read more Check out the [mlflow-txtai](https://github.com/neuml/mlflow-txtai) project to see more examples. ================================================ FILE: docs/overrides/main.html ================================================ {% extends "base.html" %} {% block extrahead %} {% set title = config.site_name %} {% if page and page.meta and page.meta.title %} {% set title = title ~ " - " ~ page.meta.title %} {% elif page and page.title and not page.is_homepage %} {% set title = title ~ " - " ~ page.title %} {% endif %} {% endblock %} ================================================ FILE: docs/pipeline/audio/audiomixer.md ================================================ # Audio Mixer ![pipeline](../../images/pipeline.png#only-light) ![pipeline](../../images/pipeline-dark.png#only-dark) The Audio Mixer pipeline mixes multiple audio streams into a single stream. ## Example The following shows a simple example using this pipeline. ```python from txtai.pipeline import AudioMixer # Create and run pipeline mixer = AudioMixer() mixer(((audio1, rate1), (audio2, rate2))) ``` See the link below for a more detailed example. | Notebook | Description | | |:----------|:-------------|------:| | [Generative Audio](https://github.com/neuml/txtai/blob/master/examples/66_Generative_Audio.ipynb) | Storytelling with generative audio workflows | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/66_Generative_Audio.ipynb) | ## Configuration-driven example Pipelines are run with Python or configuration. Pipelines can be instantiated in [configuration](../../../api/configuration/#pipeline) using the lower case name of the pipeline. Configuration-driven pipelines are run with [workflows](../../../workflow/#configuration-driven-example) or the [API](../../../api#local-instance). ### config.yml ```yaml # Create pipeline using lower case class name audiomixer: # Run pipeline with workflow workflow: audiomixer: tasks: - action: audiomixer ``` ### Run with Workflows ```python from txtai import Application # Create and run pipeline with workflow app = Application("config.yml") list(app.workflow("audiomixer", [[[audio1, rate1], [audio2, rate2]]])) ``` ### Run with API ```bash CONFIG=config.yml uvicorn "txtai.api:app" & curl \ -X POST "http://localhost:8000/workflow" \ -H "Content-Type: application/json" \ -d '{"name":"audiomixer", "elements":[[[audio1, rate1], [audio2, rate2]]]}' ``` ## Methods Python documentation for the pipeline. ### ::: txtai.pipeline.AudioMixer.__init__ ### ::: txtai.pipeline.AudioMixer.__call__ ================================================ FILE: docs/pipeline/audio/audiostream.md ================================================ # Audio Stream ![pipeline](../../images/pipeline.png#only-light) ![pipeline](../../images/pipeline-dark.png#only-dark) The Audio Stream pipeline is a threaded pipeline that plays audio segments. This pipeline is designed to run on local machines given that it requires access to write to an output device. ## Example The following shows a simple example using this pipeline. ```python from txtai.pipeline import AudioStream # Create and run pipeline audio = AudioStream() audio(data) ``` This pipeline may require additional system dependencies. See [this section](../../../install#environment-specific-prerequisites) for more. See the link below for a more detailed example. | Notebook | Description | | |:----------|:-------------|------:| | [Speech to Speech RAG](https://github.com/neuml/txtai/blob/master/examples/65_Speech_to_Speech_RAG.ipynb) [▶️](https://www.youtube.com/watch?v=tH8QWwkVMKA) | Full cycle speech to speech workflow with RAG | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/65_Speech_to_Speech_RAG.ipynb) | ## Configuration-driven example Pipelines are run with Python or configuration. Pipelines can be instantiated in [configuration](../../../api/configuration/#pipeline) using the lower case name of the pipeline. Configuration-driven pipelines are run with [workflows](../../../workflow/#configuration-driven-example) or the [API](../../../api#local-instance). ### config.yml ```yaml # Create pipeline using lower case class name audiostream: # Run pipeline with workflow workflow: audiostream: tasks: - action: audiostream ``` ### Run with Workflows ```python from txtai import Application # Create and run pipeline with workflow app = Application("config.yml") list(app.workflow("audiostream", [["numpy data", "sample rate"]])) ``` ### Run with API ```bash CONFIG=config.yml uvicorn "txtai.api:app" & curl \ -X POST "http://localhost:8000/workflow" \ -H "Content-Type: application/json" \ -d '{"name":"audiostream", "elements":[["numpy data", "sample rate"]]}' ``` ## Methods Python documentation for the pipeline. ### ::: txtai.pipeline.AudioStream.__init__ ### ::: txtai.pipeline.AudioStream.__call__ ================================================ FILE: docs/pipeline/audio/microphone.md ================================================ # Microphone ![pipeline](../../images/pipeline.png#only-light) ![pipeline](../../images/pipeline-dark.png#only-dark) The Microphone pipeline reads input speech from a microphone device. This pipeline is designed to run on local machines given that it requires access to read from an input device. ## Example The following shows a simple example using this pipeline. ```python from txtai.pipeline import Microphone # Create and run pipeline microphone = Microphone() microphone() ``` This pipeline may require additional system dependencies. See [this section](../../../install#environment-specific-prerequisites) for more. See the link below for a more detailed example. | Notebook | Description | | |:----------|:-------------|------:| | [Speech to Speech RAG](https://github.com/neuml/txtai/blob/master/examples/65_Speech_to_Speech_RAG.ipynb) [▶️](https://www.youtube.com/watch?v=tH8QWwkVMKA) | Full cycle speech to speech workflow with RAG | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/65_Speech_to_Speech_RAG.ipynb) | ## Configuration-driven example Pipelines are run with Python or configuration. Pipelines can be instantiated in [configuration](../../../api/configuration/#pipeline) using the lower case name of the pipeline. Configuration-driven pipelines are run with [workflows](../../../workflow/#configuration-driven-example) or the [API](../../../api#local-instance). ### config.yml ```yaml # Create pipeline using lower case class name microphone: # Run pipeline with workflow workflow: microphone: tasks: - action: microphone ``` ### Run with Workflows ```python from txtai import Application # Create and run pipeline with workflow app = Application("config.yml") list(app.workflow("microphone", ["1"])) ``` ### Run with API ```bash CONFIG=config.yml uvicorn "txtai.api:app" & curl \ -X POST "http://localhost:8000/workflow" \ -H "Content-Type: application/json" \ -d '{"name":"microphone", "elements":["1"]}' ``` ## Methods Python documentation for the pipeline. ### ::: txtai.pipeline.Microphone.__init__ ### ::: txtai.pipeline.Microphone.__call__ ================================================ FILE: docs/pipeline/audio/texttoaudio.md ================================================ # Text To Audio ![pipeline](../../images/pipeline.png#only-light) ![pipeline](../../images/pipeline-dark.png#only-dark) The Text To Audio pipeline generates audio from text. ## Example The following shows a simple example using this pipeline. ```python from txtai.pipeline import TextToAudio # Create and run pipeline tta = TextToAudio() tta("Describe the audio to generate here") ``` See the link below for a more detailed example. | Notebook | Description | | |:----------|:-------------|------:| | [Generative Audio](https://github.com/neuml/txtai/blob/master/examples/66_Generative_Audio.ipynb) | Storytelling with generative audio workflows | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/66_Generative_Audio.ipynb) | ## Configuration-driven example Pipelines are run with Python or configuration. Pipelines can be instantiated in [configuration](../../../api/configuration/#pipeline) using the lower case name of the pipeline. Configuration-driven pipelines are run with [workflows](../../../workflow/#configuration-driven-example) or the [API](../../../api#local-instance). ### config.yml ```yaml # Create pipeline using lower case class name texttoaudio: # Run pipeline with workflow workflow: tta: tasks: - action: texttoaudio ``` ### Run with Workflows ```python from txtai import Application # Create and run pipeline with workflow app = Application("config.yml") list(app.workflow("tta", ["Describe the audio to generate here"])) ``` ### Run with API ```bash CONFIG=config.yml uvicorn "txtai.api:app" & curl \ -X POST "http://localhost:8000/workflow" \ -H "Content-Type: application/json" \ -d '{"name":"tta", "elements":["Describe the audio to generate here"]}' ``` ## Methods Python documentation for the pipeline. ### ::: txtai.pipeline.TextToAudio.__init__ ### ::: txtai.pipeline.TextToAudio.__call__ ================================================ FILE: docs/pipeline/audio/texttospeech.md ================================================ # Text To Speech ![pipeline](../../images/pipeline.png#only-light) ![pipeline](../../images/pipeline-dark.png#only-dark) The Text To Speech pipeline generates speech from text. ## Example The following shows a simple example using this pipeline. ```python from txtai.pipeline import TextToSpeech # Create and run pipeline with default model tts = TextToSpeech() tts("Say something here") # Stream audio - incrementally generates snippets of audio yield from tts( "Say something here. And say something else.".split(), stream=True ) # Generate audio using a speaker id tts = TextToSpeech("neuml/vctk-vits-onnx") tts("Say something here", speaker=15) # Generate audio using speaker embeddings tts = TextToSpeech("neuml/txtai-speecht5-onnx") tts("Say something here", speaker=np.array(...)) ``` See the links below for a more detailed example. | Notebook | Description | | |:----------|:-------------|------:| | [Text to speech generation](https://github.com/neuml/txtai/blob/master/examples/40_Text_to_Speech_Generation.ipynb) | Generate speech from text | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/40_Text_to_Speech_Generation.ipynb) | | [Speech to Speech RAG](https://github.com/neuml/txtai/blob/master/examples/65_Speech_to_Speech_RAG.ipynb) [▶️](https://www.youtube.com/watch?v=tH8QWwkVMKA) | Full cycle speech to speech workflow with RAG | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/65_Speech_to_Speech_RAG.ipynb) | | [Generative Audio](https://github.com/neuml/txtai/blob/master/examples/66_Generative_Audio.ipynb) | Storytelling with generative audio workflows | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/66_Generative_Audio.ipynb) | This pipeline is backed by ONNX models from the Hugging Face Hub. The following models are currently available. - [kokoro-base-onnx](https://huggingface.co/NeuML/kokoro-base-onnx) | [fp16](https://huggingface.co/NeuML/kokoro-fp16-onnx) | [int8](https://huggingface.co/NeuML/kokoro-int8-onnx) - [ljspeech-jets-onnx](https://huggingface.co/NeuML/ljspeech-jets-onnx) - [ljspeech-vits-onnx](https://huggingface.co/NeuML/ljspeech-vits-onnx) - [vctk-vits-onnx](https://huggingface.co/NeuML/vctk-vits-onnx) - [txtai-speecht5-onnx](https://huggingface.co/NeuML/txtai-speecht5-onnx) ## Configuration-driven example Pipelines are run with Python or configuration. Pipelines can be instantiated in [configuration](../../../api/configuration/#pipeline) using the lower case name of the pipeline. Configuration-driven pipelines are run with [workflows](../../../workflow/#configuration-driven-example) or the [API](../../../api#local-instance). ### config.yml ```yaml # Create pipeline using lower case class name texttospeech: # Run pipeline with workflow workflow: tts: tasks: - action: texttospeech ``` ### Run with Workflows ```python from txtai import Application # Create and run pipeline with workflow app = Application("config.yml") list(app.workflow("tts", ["Say something here"])) ``` ### Run with API ```bash CONFIG=config.yml uvicorn "txtai.api:app" & curl \ -X POST "http://localhost:8000/workflow" \ -H "Content-Type: application/json" \ -d '{"name":"tts", "elements":["Say something here"]}' ``` ## Methods Python documentation for the pipeline. ### ::: txtai.pipeline.TextToSpeech.__init__ ### ::: txtai.pipeline.TextToSpeech.__call__ ================================================ FILE: docs/pipeline/audio/transcription.md ================================================ # Transcription ![pipeline](../../images/pipeline.png#only-light) ![pipeline](../../images/pipeline-dark.png#only-dark) The Transcription pipeline converts speech in audio files to text. ## Example The following shows a simple example using this pipeline. ```python from txtai.pipeline import Transcription # Create and run pipeline transcribe = Transcription() transcribe("path to wav file") ``` This pipeline may require additional system dependencies. See [this section](../../../install#environment-specific-prerequisites) for more. See the links below for a more detailed example. | Notebook | Description | | |:----------|:-------------|------:| | [Transcribe audio to text](https://github.com/neuml/txtai/blob/master/examples/11_Transcribe_audio_to_text.ipynb) | Convert audio files to text | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/11_Transcribe_audio_to_text.ipynb) | | [Speech to Speech RAG](https://github.com/neuml/txtai/blob/master/examples/65_Speech_to_Speech_RAG.ipynb) [▶️](https://www.youtube.com/watch?v=tH8QWwkVMKA) | Full cycle speech to speech workflow with RAG | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/65_Speech_to_Speech_RAG.ipynb) | ## Configuration-driven example Pipelines are run with Python or configuration. Pipelines can be instantiated in [configuration](../../../api/configuration/#pipeline) using the lower case name of the pipeline. Configuration-driven pipelines are run with [workflows](../../../workflow/#configuration-driven-example) or the [API](../../../api#local-instance). ### config.yml ```yaml # Create pipeline using lower case class name transcription: # Run pipeline with workflow workflow: transcribe: tasks: - action: transcription ``` ### Run with Workflows ```python from txtai import Application # Create and run pipeline with workflow app = Application("config.yml") list(app.workflow("transcribe", ["path to wav file"])) ``` ### Run with API ```bash CONFIG=config.yml uvicorn "txtai.api:app" & curl \ -X POST "http://localhost:8000/workflow" \ -H "Content-Type: application/json" \ -d '{"name":"transcribe", "elements":["path to wav file"]}' ``` ## Methods Python documentation for the pipeline. ### ::: txtai.pipeline.Transcription.__init__ ### ::: txtai.pipeline.Transcription.__call__ ================================================ FILE: docs/pipeline/data/filetohtml.md ================================================ # File To HTML ![pipeline](../../images/pipeline.png#only-light) ![pipeline](../../images/pipeline-dark.png#only-dark) The File To HTML pipeline transforms files to HTML. It supports the following text extraction backends. ## Apache Tika [Apache Tika](https://tika.apache.org/) detects and extracts metadata and text from over a thousand different file types. See [this link](https://tika.apache.org/2.9.2/formats.html) for a list of supported document formats. Apache Tika requires [Java](https://en.wikipedia.org/wiki/Java_(programming_language)) to be installed. An alternative to that is starting a separate Apache Tika service via [this Docker Image](https://hub.docker.com/r/apache/tika) and setting these [environment variables](https://github.com/chrismattmann/tika-python?tab=readme-ov-file#environment-variables). ## Docling [Docling](https://github.com/DS4SD/docling) parses documents and exports them to the desired format with ease and speed. This is a library that has rapidly gained popularity starting in late 2024. Docling excels in parsing formatting elements from PDFs (tables, sections etc). See [this link](https://github.com/DS4SD/docling?tab=readme-ov-file#features) for a list of supported document formats. ## Example The following shows a simple example using this pipeline. ```python from txtai.pipeline import FileToHTML # Create and run pipeline html = FileToHTML() html("/path/to/file") ``` ## Configuration-driven example Pipelines are run with Python or configuration. Pipelines can be instantiated in [configuration](../../../api/configuration/#pipeline) using the lower case name of the pipeline. Configuration-driven pipelines are run with [workflows](../../../workflow/#configuration-driven-example) or the [API](../../../api#local-instance). ### config.yml ```yaml # Create pipeline using lower case class name filetohtml: # Run pipeline with workflow workflow: html: tasks: - action: filetohtml ``` ### Run with Workflows ```python from txtai import Application # Create and run pipeline with workflow app = Application("config.yml") list(app.workflow("html", ["/path/to/file"])) ``` ### Run with API ```bash CONFIG=config.yml uvicorn "txtai.api:app" & curl \ -X POST "http://localhost:8000/workflow" \ -H "Content-Type: application/json" \ -d '{"name":"html", "elements":["/path/to/file"]}' ``` ## Methods Python documentation for the pipeline. ### ::: txtai.pipeline.FileToHTML.__init__ ### ::: txtai.pipeline.FileToHTML.__call__ ================================================ FILE: docs/pipeline/data/htmltomd.md ================================================ # HTML To Markdown ![pipeline](../../images/pipeline.png#only-light) ![pipeline](../../images/pipeline-dark.png#only-dark) The HTML To Markdown pipeline transforms HTML to Markdown. Markdown formatting is applied for headings, blockquotes, lists, code, tables and text. Visual formatting is also included (bold, italic etc). This pipeline searches for the best node that has relevant text, often found with an `article`, `main` or `body` tag. The HTML to Markdown pipeline requires the [BeautifulSoup4](https://pypi.org/project/beautifulsoup4/) library to be installed. ## Example The following shows a simple example using this pipeline. ```python from txtai.pipeline import HTMLToMarkdown # Create and run pipeline md = HTMLToMarkdown() md("This is a test") ``` ## Configuration-driven example Pipelines are run with Python or configuration. Pipelines can be instantiated in [configuration](../../../api/configuration/#pipeline) using the lower case name of the pipeline. Configuration-driven pipelines are run with [workflows](../../../workflow/#configuration-driven-example) or the [API](../../../api#local-instance). ### config.yml ```yaml # Create pipeline using lower case class name htmltomarkdown: # Run pipeline with workflow workflow: markdown: tasks: - action: htmltomarkdown ``` ### Run with Workflows ```python from txtai import Application # Create and run pipeline with workflow app = Application("config.yml") list(app.workflow("markdown", ["This is a test"])) ``` ### Run with API ```bash CONFIG=config.yml uvicorn "txtai.api:app" & curl \ -X POST "http://localhost:8000/workflow" \ -H "Content-Type: application/json" \ -d '{"name":"markdown", "elements":["This is a test"]}' ``` ## Methods Python documentation for the pipeline. ### ::: txtai.pipeline.HTMLToMarkdown.__init__ ### ::: txtai.pipeline.HTMLToMarkdown.__call__ ================================================ FILE: docs/pipeline/data/segmentation.md ================================================ # Segmentation ![pipeline](../../images/pipeline.png#only-light) ![pipeline](../../images/pipeline-dark.png#only-dark) The Segmentation pipeline segments text into semantic units. ## Example The following shows a simple example using this pipeline. ```python from txtai.pipeline import Segmentation # Create and run pipeline segment = Segmentation(sentences=True) segment("This is a test. And another test.") # Load third-party chunkers segment = Segmentation(chunker="semantic") segment("This is a test. And another test.") ``` The Segmentation pipeline supports segmenting `sentences`, `lines`, `paragraphs` and `sections` using a rules-based approach. Each of these modes can be set when creating the pipeline. Third-party chunkers are also supported via the `chunker` parameter. ## Configuration-driven example Pipelines are run with Python or configuration. Pipelines can be instantiated in [configuration](../../../api/configuration/#pipeline) using the lower case name of the pipeline. Configuration-driven pipelines are run with [workflows](../../../workflow/#configuration-driven-example) or the [API](../../../api#local-instance). ### config.yml ```yaml # Create pipeline using lower case class name segmentation: sentences: true # Run pipeline with workflow workflow: segment: tasks: - action: segmentation ``` ### Run with Workflows ```python from txtai import Application # Create and run pipeline with workflow app = Application("config.yml") list(app.workflow("segment", ["This is a test. And another test."])) ``` ### Run with API ```bash CONFIG=config.yml uvicorn "txtai.api:app" & curl \ -X POST "http://localhost:8000/workflow" \ -H "Content-Type: application/json" \ -d '{"name":"segment", "elements":["This is a test. And another test."]}' ``` ## Methods Python documentation for the pipeline. ### ::: txtai.pipeline.Segmentation.__init__ ### ::: txtai.pipeline.Segmentation.__call__ ================================================ FILE: docs/pipeline/data/tabular.md ================================================ # Tabular ![pipeline](../../images/pipeline.png#only-light) ![pipeline](../../images/pipeline-dark.png#only-dark) The Tabular pipeline splits tabular data into rows and columns. The tabular pipeline is most useful in creating (id, text, tag) tuples to load into Embedding indexes. ## Example The following shows a simple example using this pipeline. ```python from txtai.pipeline import Tabular # Create and run pipeline tabular = Tabular("id", ["text"]) tabular("path to csv file") ``` See the link below for a more detailed example. | Notebook | Description | | |:----------|:-------------|------:| | [Transform tabular data with composable workflows](https://github.com/neuml/txtai/blob/master/examples/22_Transform_tabular_data_with_composable_workflows.ipynb) | Transform, index and search tabular data | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/22_Transform_tabular_data_with_composable_workflows.ipynb) | ## Configuration-driven example Pipelines are run with Python or configuration. Pipelines can be instantiated in [configuration](../../../api/configuration/#pipeline) using the lower case name of the pipeline. Configuration-driven pipelines are run with [workflows](../../../workflow/#configuration-driven-example) or the [API](../../../api#local-instance). ### config.yml ```yaml # Create pipeline using lower case class name tabular: idcolumn: id textcolumns: - text # Run pipeline with workflow workflow: tabular: tasks: - action: tabular ``` ### Run with Workflows ```python from txtai import Application # Create and run pipeline with workflow app = Application("config.yml") list(app.workflow("tabular", ["path to csv file"])) ``` ### Run with API ```bash CONFIG=config.yml uvicorn "txtai.api:app" & curl \ -X POST "http://localhost:8000/workflow" \ -H "Content-Type: application/json" \ -d '{"name":"tabular", "elements":["path to csv file"]}' ``` ## Methods Python documentation for the pipeline. ### ::: txtai.pipeline.Tabular.__init__ ### ::: txtai.pipeline.Tabular.__call__ ================================================ FILE: docs/pipeline/data/textractor.md ================================================ # Textractor ![pipeline](../../images/pipeline.png#only-light) ![pipeline](../../images/pipeline-dark.png#only-dark) The Textractor pipeline extracts and splits text from documents. This pipeline extends the [Segmentation](../segmentation) pipeline. Each document goes through the following process. - Content is retrieved if it's not local - If the document `mime-type` isn't plain text or HTML, it's converted to HTML via the [FiletoHTML](../filetohtml) pipeline - HTML is converted to Markdown via the [HTMLToMarkdown](../htmltomd) pipeline - Content is split/chunked based on the [segmentation parameters](../segmentation/#txtai.pipeline.Segmentation.__init__) and returned The [backend](../filetohtml/#txtai.pipeline.FileToHTML.__init__) parameter sets the FileToHTML pipeline backend. If a backend isn't available, this pipeline assumes input is HTML content and only converts it to Markdown. See the [FiletoHTML](../filetohtml) and [HTMLToMarkdown](../htmltomd) pipelines to learn more on the dependencies necessary for each of those pipelines. ## Example The following shows a simple example using this pipeline. ```python from txtai.pipeline import Textractor # Create and run pipeline textract = Textractor() textract("https://github.com/neuml/txtai") ``` See the link below for a more detailed example. | Notebook | Description | | |:----------|:-------------|------:| | [Extract text from documents](https://github.com/neuml/txtai/blob/master/examples/10_Extract_text_from_documents.ipynb) | Extract text from PDF, Office, HTML and more | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/10_Extract_text_from_documents.ipynb) | | [Chunking your data for RAG](https://github.com/neuml/txtai/blob/master/examples/73_Chunking_your_data_for_RAG.ipynb) | Extract, chunk and index content for effective retrieval | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/73_Chunking_your_data_for_RAG.ipynb) | ## Configuration-driven example Pipelines are run with Python or configuration. Pipelines can be instantiated in [configuration](../../../api/configuration/#pipeline) using the lower case name of the pipeline. Configuration-driven pipelines are run with [workflows](../../../workflow/#configuration-driven-example) or the [API](../../../api#local-instance). ### config.yml ```yaml # Create pipeline using lower case class name textractor: # Run pipeline with workflow workflow: textract: tasks: - action: textractor ``` ### Run with Workflows ```python from txtai import Application # Create and run pipeline with workflow app = Application("config.yml") list(app.workflow("textract", ["https://github.com/neuml/txtai"])) ``` ### Run with API ```bash CONFIG=config.yml uvicorn "txtai.api:app" & curl \ -X POST "http://localhost:8000/workflow" \ -H "Content-Type: application/json" \ -d '{"name":"textract", "elements":["https://github.com/neuml/txtai"]}' ``` ## Methods Python documentation for the pipeline. ### ::: txtai.pipeline.Textractor.__init__ ### ::: txtai.pipeline.Textractor.__call__ ================================================ FILE: docs/pipeline/data/tokenizer.md ================================================ # Tokenizer ![pipeline](../../images/pipeline.png#only-light) ![pipeline](../../images/pipeline-dark.png#only-dark) The Tokenizer pipeline splits text into tokens. This is primarily used for keyword / term indexing. _Note: Transformers-based models have their own tokenizers and this pipeline isn't designed for working with Transformers models._ ## Example The following shows a simple example using this pipeline. ```python from txtai.pipeline import Tokenizer # Create and run pipeline tokenizer = Tokenizer() tokenizer("text to tokenize") # Whitespace tokenization tokenizer = Tokenizer(whitespace=True) tokenizer("text to tokenize") # Tokenize using a regular expression tokenizer = Tokenizer(regexp=r"\w{5,}") tokenizer("text to tokenize") # Tokenize into trigrams like pg_trgm tokenizer = Tokenizer(ngrams={ "ngrams": 3, "lpad": " ", "rpad": " ", "unique": True }) tokenize("text to tokenize") # Tokenize into edge ngrams tokenizer = Tokenizer(ngrams={"nmin": 2, "nmax": 5, "edge": True}) tokenizer("text to tokenize") ``` ## Configuration-driven example Pipelines are run with Python or configuration. Pipelines can be instantiated in [configuration](../../../api/configuration/#pipeline) using the lower case name of the pipeline. Configuration-driven pipelines are run with [workflows](../../../workflow/#configuration-driven-example) or the [API](../../../api#local-instance). ### config.yml ```yaml # Create pipeline using lower case class name tokenizer: # Run pipeline with workflow workflow: tokenizer: tasks: - action: tokenizer ``` ### Run with Workflows ```python from txtai import Application # Create and run pipeline with workflow app = Application("config.yml") list(app.workflow("tokenizer", ["text to tokenize"])) ``` ### Run with API ```bash CONFIG=config.yml uvicorn "txtai.api:app" & curl \ -X POST "http://localhost:8000/workflow" \ -H "Content-Type: application/json" \ -d '{"name":"tokenizer", "elements":["text"]}' ``` ## Methods Python documentation for the pipeline. ### ::: txtai.pipeline.Tokenizer.__init__ ### ::: txtai.pipeline.Tokenizer.__call__ ================================================ FILE: docs/pipeline/image/caption.md ================================================ # Caption ![pipeline](../../images/pipeline.png#only-light) ![pipeline](../../images/pipeline-dark.png#only-dark) The caption pipeline reads a list of images and returns a list of captions for those images. ## Example The following shows a simple example using this pipeline. ```python from txtai.pipeline import Caption # Create and run pipeline caption = Caption() caption("path to image file") ``` See the link below for a more detailed example. | Notebook | Description | | |:----------|:-------------|------:| | [Generate image captions and detect objects](https://github.com/neuml/txtai/blob/master/examples/25_Generate_image_captions_and_detect_objects.ipynb) | Captions and object detection for images | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/25_Generate_image_captions_and_detect_objects.ipynb) | ## Configuration-driven example Pipelines are run with Python or configuration. Pipelines can be instantiated in [configuration](../../../api/configuration/#pipeline) using the lower case name of the pipeline. Configuration-driven pipelines are run with [workflows](../../../workflow/#configuration-driven-example) or the [API](../../../api#local-instance). ### config.yml ```yaml # Create pipeline using lower case class name caption: # Run pipeline with workflow workflow: caption: tasks: - action: caption ``` ### Run with Workflows ```python from txtai import Application # Create and run pipeline with workflow app = Application("config.yml") list(app.workflow("caption", ["path to image file"])) ``` ### Run with API ```bash CONFIG=config.yml uvicorn "txtai.api:app" & curl \ -X POST "http://localhost:8000/workflow" \ -H "Content-Type: application/json" \ -d '{"name":"caption", "elements":["path to image file"]}' ``` ## Methods Python documentation for the pipeline. ### ::: txtai.pipeline.Caption.__init__ ### ::: txtai.pipeline.Caption.__call__ ================================================ FILE: docs/pipeline/image/imagehash.md ================================================ # ImageHash ![pipeline](../../images/pipeline.png#only-light) ![pipeline](../../images/pipeline-dark.png#only-dark) The image hash pipeline generates perceptual image hashes. These hashes can be used to detect near-duplicate images. This method is not backed by machine learning models and not intended to find conceptually similar images. ## Example The following shows a simple example using this pipeline. ```python from txtai.pipeline import ImageHash # Create and run pipeline ihash = ImageHash() ihash("path to image file") ``` See the link below for a more detailed example. | Notebook | Description | | |:----------|:-------------|------:| | [Near duplicate image detection](https://github.com/neuml/txtai/blob/master/examples/31_Near_duplicate_image_detection.ipynb) | Identify duplicate and near-duplicate images | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/31_Near_duplicate_image_detection.ipynb) | ## Configuration-driven example Pipelines are run with Python or configuration. Pipelines can be instantiated in [configuration](../../../api/configuration/#pipeline) using the lower case name of the pipeline. Configuration-driven pipelines are run with [workflows](../../../workflow/#configuration-driven-example) or the [API](../../../api#local-instance). ### config.yml ```yaml # Create pipeline using lower case class name imagehash: # Run pipeline with workflow workflow: imagehash: tasks: - action: imagehash ``` ### Run with Workflows ```python from txtai import Application # Create and run pipeline with workflow app = Application("config.yml") list(app.workflow("imagehash", ["path to image file"])) ``` ### Run with API ```bash CONFIG=config.yml uvicorn "txtai.api:app" & curl \ -X POST "http://localhost:8000/workflow" \ -H "Content-Type: application/json" \ -d '{"name":"imagehash", "elements":["path to image file"]}' ``` ## Methods Python documentation for the pipeline. ### ::: txtai.pipeline.ImageHash.__init__ ### ::: txtai.pipeline.ImageHash.__call__ ================================================ FILE: docs/pipeline/image/objects.md ================================================ # Objects ![pipeline](../../images/pipeline.png#only-light) ![pipeline](../../images/pipeline-dark.png#only-dark) The Objects pipeline reads a list of images and returns a list of detected objects. ## Example The following shows a simple example using this pipeline. ```python from txtai.pipeline import Objects # Create and run pipeline objects = Objects() objects("path to image file") ``` See the link below for a more detailed example. | Notebook | Description | | |:----------|:-------------|------:| | [Generate image captions and detect objects](https://github.com/neuml/txtai/blob/master/examples/25_Generate_image_captions_and_detect_objects.ipynb) | Captions and object detection for images | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/25_Generate_image_captions_and_detect_objects.ipynb) | ## Configuration-driven example Pipelines are run with Python or configuration. Pipelines can be instantiated in [configuration](../../../api/configuration/#pipeline) using the lower case name of the pipeline. Configuration-driven pipelines are run with [workflows](../../../workflow/#configuration-driven-example) or the [API](../../../api#local-instance). ### config.yml ```yaml # Create pipeline using lower case class name objects: # Run pipeline with workflow workflow: objects: tasks: - action: objects ``` ### Run with Workflows ```python from txtai import Application # Create and run pipeline with workflow app = Application("config.yml") list(app.workflow("objects", ["path to image file"])) ``` ### Run with API ```bash CONFIG=config.yml uvicorn "txtai.api:app" & curl \ -X POST "http://localhost:8000/workflow" \ -H "Content-Type: application/json" \ -d '{"name":"objects", "elements":["path to image file"]}' ``` ## Methods Python documentation for the pipeline. ### ::: txtai.pipeline.Objects.__init__ ### ::: txtai.pipeline.Objects.__call__ ================================================ FILE: docs/pipeline/index.md ================================================ # Pipeline ![pipeline](../images/pipeline.png#only-light) ![pipeline](../images/pipeline-dark.png#only-dark) txtai provides a generic pipeline processing framework with the only interface requirement being a `__call__` method. Pipelines are flexible and process various types of data. Pipelines can wrap machine learning models as well as other processes. Pipelines are run with Python or configuration. Pipelines can be instantiated in [configuration](../api/configuration/#pipeline) using the lower case name of the pipeline. Configuration-driven pipelines are run with [workflows](../workflow/#configuration-driven-example) or the [API](../api#local-instance). ## List of pipelines The following is a list of the current pipelines available in txtai. All pipelines use default models when otherwise not specified. See the [model guide](../models) for the current model recommendations. All pipelines are designed to work with local models via the [Transformers library](https://github.com/huggingface/transformers). The `LLM` and `RAG` pipelines also have integrations for [llama.cpp](https://github.com/abetlen/llama-cpp-python) and [hosted API models via LiteLLM](https://github.com/BerriAI/litellm). The `LLM` pipeline can be prompted to accomplish many of the same tasks (i.e. summarization, translation, classification). - Audio - [AudioMixer](audio/audiomixer) - [AudioStream](audio/audiostream) - [Microphone](audio/microphone) - [TextToAudio](audio/texttoaudio) - [TextToSpeech](audio/texttospeech) - [Transcription](audio/transcription) - Data Processing - [FileToHTML](data/filetohtml) - [HTMLToMarkdown](data/htmltomd) - [Segmentation](data/segmentation) - [Tabular](data/tabular) - [Text extraction](data/textractor) - [Tokenizer](data/tokenizer) - Image - [Caption](image/caption) - [Image Hash](image/imagehash) - [Objects](image/objects) - Text - [Entity](text/entity) - [Labeling](text/labels) - [LLM](text/llm) - [RAG](text/rag) - [Reranker](text/reranker) - [Similarity](text/similarity) - [Summary](text/summary) - [Translation](text/translation) - Training - [HF ONNX](train/hfonnx) - [ML ONNX](train/mlonnx) - [Trainer](train/trainer) ================================================ FILE: docs/pipeline/llm/llm.md ================================================ # LLM ![pipeline](../../images/pipeline.png#only-light) ![pipeline](../../images/pipeline-dark.png#only-dark) The LLM pipeline runs prompts through a large language model (LLM). This pipeline autodetects the LLM framework based on the model path. ## Example The following shows a simple example using this pipeline. ```python from txtai import LLM # Create LLM pipeline llm = LLM() # Run prompt llm( """ Answer the following question using the provided context. Question: What are the applications of txtai? Context: txtai is an open-source platform for semantic search and workflows powered by language models. """ ) # Prompts with chat templating can be directly passed # The template format varies by model llm( """ <|im_start|>system You are a friendly assistant.<|im_end|> <|im_start|>user Answer the following question...<|im_end|> <|im_start|>assistant """ ) # Chat messages automatically handle templating llm([ {"role": "system", "content": "You are a friendly assistant."}, {"role": "user", "content": "Answer the following question..."} ]) # When there is no system prompt passed to instruction tuned models # the default role is inferred `defaultrole="auto"` llm("Answer the following question...") # To always generate chat messages for string inputs llm("Answer the following question...", defaultrole="user") # To never generate chat messages for string inputs llm("Answer the following question...", defaultrole="prompt") ``` The LLM pipeline automatically detects the underlying LLM framework. This can also be manually set. The following methods are supported. - [Hugging Face Transformers](https://github.com/huggingface/transformers) - [llama.cpp](https://github.com/abetlen/llama-cpp-python) - [LLM APIs via LiteLLM](https://github.com/BerriAI/litellm) - [OpenCode server](https://github.com/anomalyco/opencode) `llama.cpp` models support both local and remote GGUF paths on the HF Hub. See the [LiteLLM documentation](https://litellm.vercel.app/docs/providers) for the options available with LiteLLM models. See the [OpenCode documentation](https://opencode.ai/docs/server/) for more on how to integrate the LLM pipeline with a running OpenCode instance. ```python from txtai import LLM # Transformers llm = LLM("openai/gpt-oss-20b") llm = LLM("openai/gpt-oss-20b", method="transformers") # llama.cpp llm = LLM("unsloth/gpt-oss-20b-GGUF/gpt-oss-20b-Q4_K_M.gguf") llm = LLM("unsloth/gpt-oss-20b-GGUF/gpt-oss-20b-Q4_K_M.gguf", method="llama.cpp") # LiteLLM llm = LLM("ollama/gpt-oss") llm = LLM("ollama/gpt-oss", method="litellm") # Custom Ollama endpoint llm = LLM("ollama/gpt-oss", api_base="http://localhost:11434") # Custom OpenAI-compatible endpoint llm = LLM("openai/gpt-oss", api_base="http://localhost:4000") # LLM APIs - must also set API key via environment variable llm = LLM("gpt-5.2") llm = LLM("claude-opus-4-5-20251101") llm = LLM("gemini/gemini-3-pro-preview") # Local OpenCode server started via `opencode serve` llm = LLM("opencode") llm = LLM("opencode/big-pickle", url="http://localhost:4000") ``` Models can be externally loaded and passed to pipelines. This is useful for models that are not yet supported by Transformers and/or need special initialization. ```python import torch from transformers import AutoModelForCausalLM, AutoTokenizer from txtai import LLM # Load Qwen3 0.6B path = "Qwen/Qwen3-0.6B" model = AutoModelForCausalLM.from_pretrained( path, dtype=torch.bfloat16, ) tokenizer = AutoTokenizer.from_pretrained(path) llm = LLM((model, tokenizer)) ``` See the links below for more detailed examples. | Notebook | Description | | |:----------|:-------------|------:| | [Prompt-driven search with LLMs](https://github.com/neuml/txtai/blob/master/examples/42_Prompt_driven_search_with_LLMs.ipynb) | Embeddings-guided and Prompt-driven search with Large Language Models (LLMs) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/42_Prompt_driven_search_with_LLMs.ipynb) | | [Prompt templates and task chains](https://github.com/neuml/txtai/blob/master/examples/44_Prompt_templates_and_task_chains.ipynb) | Build model prompts and connect tasks together with workflows | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/44_Prompt_templates_and_task_chains.ipynb) | | [Build RAG pipelines with txtai](https://github.com/neuml/txtai/blob/master/examples/52_Build_RAG_pipelines_with_txtai.ipynb) [▶️](https://www.youtube.com/watch?v=t_OeAc8NVfQ) | Guide on retrieval augmented generation including how to create citations | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/52_Build_RAG_pipelines_with_txtai.ipynb) | | [Integrate LLM frameworks](https://github.com/neuml/txtai/blob/master/examples/53_Integrate_LLM_Frameworks.ipynb) | Integrate llama.cpp, LiteLLM and custom generation frameworks | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/53_Integrate_LLM_Frameworks.ipynb) | | [Generate knowledge with Semantic Graphs and RAG](https://github.com/neuml/txtai/blob/master/examples/55_Generate_knowledge_with_Semantic_Graphs_and_RAG.ipynb) | Knowledge exploration and discovery with Semantic Graphs and RAG | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/55_Generate_knowledge_with_Semantic_Graphs_and_RAG.ipynb) | | [Build knowledge graphs with LLMs](https://github.com/neuml/txtai/blob/master/examples/57_Build_knowledge_graphs_with_LLM_driven_entity_extraction.ipynb) | Build knowledge graphs with LLM-driven entity extraction | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/57_Build_knowledge_graphs_with_LLM_driven_entity_extraction.ipynb) | | [Advanced RAG with graph path traversal](https://github.com/neuml/txtai/blob/master/examples/58_Advanced_RAG_with_graph_path_traversal.ipynb) | Graph path traversal to collect complex sets of data for advanced RAG | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/58_Advanced_RAG_with_graph_path_traversal.ipynb) | | [Advanced RAG with guided generation](https://github.com/neuml/txtai/blob/master/examples/60_Advanced_RAG_with_guided_generation.ipynb) | Retrieval Augmented and Guided Generation | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/60_Advanced_RAG_with_guided_generation.ipynb) | | [RAG with llama.cpp and external API services](https://github.com/neuml/txtai/blob/master/examples/62_RAG_with_llama_cpp_and_external_API_services.ipynb) | RAG with additional vector and LLM frameworks | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/62_RAG_with_llama_cpp_and_external_API_services.ipynb) | | [How RAG with txtai works](https://github.com/neuml/txtai/blob/master/examples/63_How_RAG_with_txtai_works.ipynb) | Create RAG processes, API services and Docker instances | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/63_How_RAG_with_txtai_works.ipynb) | | [Speech to Speech RAG](https://github.com/neuml/txtai/blob/master/examples/65_Speech_to_Speech_RAG.ipynb) [▶️](https://www.youtube.com/watch?v=tH8QWwkVMKA) | Full cycle speech to speech workflow with RAG | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/65_Speech_to_Speech_RAG.ipynb) | | [Analyzing Hugging Face Posts with Graphs and Agents](https://github.com/neuml/txtai/blob/master/examples/68_Analyzing_Hugging_Face_Posts_with_Graphs_and_Agents.ipynb) | Explore a rich dataset with Graph Analysis and Agents | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/68_Analyzing_Hugging_Face_Posts_with_Graphs_and_Agents.ipynb) | | [Granting autonomy to agents](https://github.com/neuml/txtai/blob/master/examples/69_Granting_autonomy_to_agents.ipynb) | Agents that iteratively solve problems as they see fit | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/69_Granting_autonomy_to_agents.ipynb) | | [Getting started with LLM APIs](https://github.com/neuml/txtai/blob/master/examples/70_Getting_started_with_LLM_APIs.ipynb) | Generate embeddings and run LLMs with OpenAI, Claude, Gemini, Bedrock and more | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/70_Getting_started_with_LLM_APIs.ipynb) | | [Analyzing LinkedIn Company Posts with Graphs and Agents](https://github.com/neuml/txtai/blob/master/examples/71_Analyzing_LinkedIn_Company_Posts_with_Graphs_and_Agents.ipynb) | Exploring how to improve social media engagement with AI | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/71_Analyzing_LinkedIn_Company_Posts_with_Graphs_and_Agents.ipynb) | | [Parsing the stars with txtai](https://github.com/neuml/txtai/blob/master/examples/72_Parsing_the_stars_with_txtai.ipynb) | Explore an astronomical knowledge graph of known stars, planets, galaxies | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/72_Parsing_the_stars_with_txtai.ipynb) | | [Chunking your data for RAG](https://github.com/neuml/txtai/blob/master/examples/73_Chunking_your_data_for_RAG.ipynb) | Extract, chunk and index content for effective retrieval | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/73_Chunking_your_data_for_RAG.ipynb) | | [Medical RAG Research with txtai](https://github.com/neuml/txtai/blob/master/examples/75_Medical_RAG_Research_with_txtai.ipynb) | Analyze PubMed article metadata with RAG | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/75_Medical_RAG_Research_with_txtai.ipynb) | | [GraphRAG with Wikipedia and GPT OSS](https://github.com/neuml/txtai/blob/master/examples/77_GraphRAG_with_Wikipedia_and_GPT_OSS.ipynb) | Deep graph search powered RAG | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/77_GraphRAG_with_Wikipedia_and_GPT_OSS.ipynb) | | [RAG is more than Vector Search](https://github.com/neuml/txtai/blob/master/examples/79_RAG_is_more_than_Vector_Search.ipynb) | Context retrieval via Web, SQL and other sources | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/79_RAG_is_more_than_Vector_Search.ipynb) | | [OpenCode as a txtai LLM](https://github.com/neuml/txtai/blob/master/examples/81_OpenCode_as_a_txtai_LLM.ipynb) | Integrate OpenCode with the txtai ecosystem | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/81_OpenCode_as_a_txtai_LLM.ipynb) | | [Agentic College Search](https://github.com/neuml/txtai/blob/master/examples/82_Agentic_College_Search.ipynb) | Identify a list of strong engineering colleges | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/82_Agentic_College_Search.ipynb) | | [TxtAI got skills](https://github.com/neuml/txtai/blob/master/examples/83_TxtAI_got_skills.ipynb) | Integrate skill.md files with your agent | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/83_TxtAI_got_skills.ipynb) | | [Agent Tools](https://github.com/neuml/txtai/blob/master/examples/84_Agent_Tools.ipynb) [▶️](https://www.youtube.com/watch?v=RDNaFXQy3GQ) | Learn about the txtai agent toolkit | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/84_Agent_Tools.ipynb) | ## Configuration-driven example Pipelines are run with Python or configuration. Pipelines can be instantiated in [configuration](../../../api/configuration/#pipeline) using the lower case name of the pipeline. Configuration-driven pipelines are run with [workflows](../../../workflow/#configuration-driven-example) or the [API](../../../api#local-instance). ### config.yml ```yaml # Create pipeline using lower case class name llm: # Run pipeline with workflow workflow: llm: tasks: - action: llm ``` Similar to the Python example above, the underlying [Hugging Face pipeline parameters](https://huggingface.co/docs/transformers/main/main_classes/pipelines#transformers.pipeline.model) and [model parameters](https://huggingface.co/docs/transformers/model_doc/auto#transformers.AutoModel.from_pretrained) can be set in pipeline configuration. ```yaml llm: path: Qwen/Qwen3-0.6B dtype: torch.bfloat16 ``` ### Run with Workflows ```python from txtai import Application # Create and run pipeline with workflow app = Application("config.yml") list(app.workflow("llm", [ """ Answer the following question using the provided context. Question: What are the applications of txtai? Context: txtai is an open-source platform for semantic search and workflows powered by language models. """ ])) ``` ### Run with API ```bash CONFIG=config.yml uvicorn "txtai.api:app" & curl \ -X POST "http://localhost:8000/workflow" \ -H "Content-Type: application/json" \ -d '{"name":"llm", "elements": ["Answer the following question..."]}' ``` ## Methods Python documentation for the pipeline. ### ::: txtai.pipeline.LLM.__init__ ### ::: txtai.pipeline.LLM.__call__ ================================================ FILE: docs/pipeline/llm/rag.md ================================================ # RAG ![pipeline](../../images/pipeline.png#only-light) ![pipeline](../../images/pipeline-dark.png#only-dark) The Retrieval Augmented Generation (RAG) pipeline joins a prompt, context data store and generative model together to extract knowledge. The data store can be an embeddings database or a similarity instance with associated input text. The generative model can be a prompt-driven large language model (LLM), an extractive question-answering model or a custom pipeline. ## Example The following shows a simple example using this pipeline. ```python from txtai import Embeddings, RAG # Input data data = [ "US tops 5 million confirmed virus cases", "Canada's last fully intact ice shelf has suddenly collapsed, " + "forming a Manhattan-sized iceberg", "Beijing mobilises invasion craft along coast as Taiwan tensions escalate", "The National Park Service warns against sacrificing slower friends " + "in a bear attack", "Maine man wins $1M from $25 lottery ticket", "Make huge profits without work, earn up to $100,000 a day" ] # Build embeddings index embeddings = Embeddings(content=True) embeddings.index(data) # Create the RAG pipeline rag = RAG(embeddings, "Qwen/Qwen3-0.6B", template=""" Answer the following question using the provided context. Question: {question} Context: {context} """) # Run RAG pipeline rag("What was won?") # Prompts with chat templating can be directly passed # The template format varies by model rag = RAG(embeddings, "Qwen/Qwen3-0.6B", template=""" <|im_start|>system You are a friendly assistant.<|im_end|> <|im_start|>user Answer the following question using the provided context. Question: {question} Context: {context} <|im_start|>assistant """ ) rag("What was won?") # Inputs are automatically converted to chat messages when a # system prompt is provided rag = RAG( embeddings, "openai/gpt-oss-20b", system="You are a friendly assistant", template=""" Answer the following question using the provided context. Question: {question} Context: {context} """) rag("What was won?") # LLM options can be passed as additional arguments # - Streaming RAG response with `stream=True` # - String inputs are always converted to user messages with `defaultrole="user"` # - Thinking text is removed with `stripthink=True` rag("What was won?", stream=True, defaultrole="user", stripThink=True) ``` See the [Embeddings](../../../embeddings) and [LLM](../llm) pages for additional configuration options. Check out this [RAG Quickstart Example](https://github.com/neuml/txtai/blob/master/examples/rag_quickstart.py). Additional examples are listed below. | Notebook | Description | | |:----------|:-------------|------:| | [Prompt-driven search with LLMs](https://github.com/neuml/txtai/blob/master/examples/42_Prompt_driven_search_with_LLMs.ipynb) | Embeddings-guided and Prompt-driven search with Large Language Models (LLMs) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/42_Prompt_driven_search_with_LLMs.ipynb) | | [Build RAG pipelines with txtai](https://github.com/neuml/txtai/blob/master/examples/52_Build_RAG_pipelines_with_txtai.ipynb) [▶️](https://www.youtube.com/watch?v=t_OeAc8NVfQ) | Guide on retrieval augmented generation including how to create citations | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/52_Build_RAG_pipelines_with_txtai.ipynb) | | [Integrate LLM frameworks](https://github.com/neuml/txtai/blob/master/examples/53_Integrate_LLM_Frameworks.ipynb) | Integrate llama.cpp, LiteLLM and custom generation frameworks | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/53_Integrate_LLM_Frameworks.ipynb) | | [Generate knowledge with Semantic Graphs and RAG](https://github.com/neuml/txtai/blob/master/examples/55_Generate_knowledge_with_Semantic_Graphs_and_RAG.ipynb) | Knowledge exploration and discovery with Semantic Graphs and RAG | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/55_Generate_knowledge_with_Semantic_Graphs_and_RAG.ipynb) | | [Advanced RAG with graph path traversal](https://github.com/neuml/txtai/blob/master/examples/58_Advanced_RAG_with_graph_path_traversal.ipynb) | Graph path traversal to collect complex sets of data for advanced RAG | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/58_Advanced_RAG_with_graph_path_traversal.ipynb) | | [Advanced RAG with guided generation](https://github.com/neuml/txtai/blob/master/examples/60_Advanced_RAG_with_guided_generation.ipynb) | Retrieval Augmented and Guided Generation | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/60_Advanced_RAG_with_guided_generation.ipynb) | | [RAG with llama.cpp and external API services](https://github.com/neuml/txtai/blob/master/examples/62_RAG_with_llama_cpp_and_external_API_services.ipynb) | RAG with additional vector and LLM frameworks | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/62_RAG_with_llama_cpp_and_external_API_services.ipynb) | | [How RAG with txtai works](https://github.com/neuml/txtai/blob/master/examples/63_How_RAG_with_txtai_works.ipynb) | Create RAG processes, API services and Docker instances | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/63_How_RAG_with_txtai_works.ipynb) | | [Speech to Speech RAG](https://github.com/neuml/txtai/blob/master/examples/65_Speech_to_Speech_RAG.ipynb) [▶️](https://www.youtube.com/watch?v=tH8QWwkVMKA) | Full cycle speech to speech workflow with RAG | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/65_Speech_to_Speech_RAG.ipynb) | | [Parsing the stars with txtai](https://github.com/neuml/txtai/blob/master/examples/72_Parsing_the_stars_with_txtai.ipynb) | Explore an astronomical knowledge graph of known stars, planets, galaxies | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/72_Parsing_the_stars_with_txtai.ipynb) | | [Chunking your data for RAG](https://github.com/neuml/txtai/blob/master/examples/73_Chunking_your_data_for_RAG.ipynb) | Extract, chunk and index content for effective retrieval | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/73_Chunking_your_data_for_RAG.ipynb) | | [Medical RAG Research with txtai](https://github.com/neuml/txtai/blob/master/examples/75_Medical_RAG_Research_with_txtai.ipynb) | Analyze PubMed article metadata with RAG | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/75_Medical_RAG_Research_with_txtai.ipynb) | | [GraphRAG with Wikipedia and GPT OSS](https://github.com/neuml/txtai/blob/master/examples/77_GraphRAG_with_Wikipedia_and_GPT_OSS.ipynb) | Deep graph search powered RAG | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/77_GraphRAG_with_Wikipedia_and_GPT_OSS.ipynb) | | [RAG is more than Vector Search](https://github.com/neuml/txtai/blob/master/examples/79_RAG_is_more_than_Vector_Search.ipynb) | Context retrieval via Web, SQL and other sources | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/79_RAG_is_more_than_Vector_Search.ipynb) | ## Configuration-driven example Pipelines are run with Python or configuration. Pipelines can be instantiated in [configuration](../../../api/configuration/#pipeline) using the lower case name of the pipeline. Configuration-driven pipelines are run with [workflows](../../../workflow/#configuration-driven-example) or the [API](../../../api#local-instance). ### config.yml ```yaml # Allow documents to be indexed writable: True # Content is required for extractor pipeline embeddings: content: True rag: path: Qwen/Qwen3-0.6B template: | Answer the following question using the provided context. Question: {question} Context: {context} workflow: search: tasks: - action: rag ``` ### Run with Workflows Built in tasks make using the extractor pipeline easier. ```python from txtai import Application # Create and run pipeline with workflow app = Application("config.yml") app.add([ "US tops 5 million confirmed virus cases", "Canada's last fully intact ice shelf has suddenly collapsed, " + "forming a Manhattan-sized iceberg", "Beijing mobilises invasion craft along coast as Taiwan tensions escalate", "The National Park Service warns against sacrificing slower friends " + "in a bear attack", "Maine man wins $1M from $25 lottery ticket", "Make huge profits without work, earn up to $100,000 a day" ]) app.index() list(app.workflow("search", ["What was won?"])) ``` ### Run with API ```bash CONFIG=config.yml uvicorn "txtai.api:app" & curl \ -X POST "http://localhost:8000/workflow" \ -H "Content-Type: application/json" \ -d '{"name": "search", "elements": ["What was won"]}' ``` ## Methods Python documentation for the pipeline. ### ::: txtai.pipeline.RAG.__init__ ### ::: txtai.pipeline.RAG.__call__ ================================================ FILE: docs/pipeline/text/entity.md ================================================ # Entity ![pipeline](../../images/pipeline.png#only-light) ![pipeline](../../images/pipeline-dark.png#only-dark) The Entity pipeline applies a token classifier to text and extracts entity/label combinations. ## Example The following shows a simple example using this pipeline. ```python from txtai.pipeline import Entity # Create and run pipeline entity = Entity() entity("Canada's last fully intact ice shelf has suddenly collapsed, " \ "forming a Manhattan-sized iceberg") # Extract entities using a GLiNER model which supports dynamic labels entity = Entity("gliner-community/gliner_medium-v2.5") entity("Canada's last fully intact ice shelf has suddenly collapsed, " \ "forming a Manhattan-sized iceberg", labels=["country", "city"]) ``` See the link below for a more detailed example. | Notebook | Description | | |:----------|:-------------|------:| | [Entity extraction workflows](https://github.com/neuml/txtai/blob/master/examples/26_Entity_extraction_workflows.ipynb) | Identify entity/label combinations | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/26_Entity_extraction_workflows.ipynb) | | [Parsing the stars with txtai](https://github.com/neuml/txtai/blob/master/examples/72_Parsing_the_stars_with_txtai.ipynb) | Explore an astronomical knowledge graph of known stars, planets, galaxies | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/72_Parsing_the_stars_with_txtai.ipynb) | ## Configuration-driven example Pipelines are run with Python or configuration. Pipelines can be instantiated in [configuration](../../../api/configuration/#pipeline) using the lower case name of the pipeline. Configuration-driven pipelines are run with [workflows](../../../workflow/#configuration-driven-example) or the [API](../../../api#local-instance). ### config.yml ```yaml # Create pipeline using lower case class name entity: # Run pipeline with workflow workflow: entity: tasks: - action: entity ``` ### Run with Workflows ```python from txtai import Application # Create and run pipeline with workflow app = Application("config.yml") list(app.workflow("entity", ["Canada's last fully intact ice shelf has suddenly collapsed, forming a Manhattan-sized iceberg"])) ``` ### Run with API ```bash CONFIG=config.yml uvicorn "txtai.api:app" & curl \ -X POST "http://localhost:8000/workflow" \ -H "Content-Type: application/json" \ -d '{"name":"entity", "elements": ["Canadas last fully intact ice shelf has suddenly collapsed, forming a Manhattan-sized iceberg"]}' ``` ## Methods Python documentation for the pipeline. ### ::: txtai.pipeline.Entity.__init__ ### ::: txtai.pipeline.Entity.__call__ ================================================ FILE: docs/pipeline/text/labels.md ================================================ # Labels ![pipeline](../../images/pipeline.png#only-light) ![pipeline](../../images/pipeline-dark.png#only-dark) The Labels pipeline uses a text classification model to apply labels to input text. This pipeline can classify text using either a zero shot model (dynamic labeling) or a standard text classification model (fixed labeling). ## Example The following shows a simple example using this pipeline. ```python from txtai.pipeline import Labels # Create and run pipeline labels = Labels() labels( ["Great news", "That's rough"], ["positive", "negative"] ) ``` See the link below for a more detailed example. | Notebook | Description | | |:----------|:-------------|------:| | [Apply labels with zero shot classification](https://github.com/neuml/txtai/blob/master/examples/07_Apply_labels_with_zero_shot_classification.ipynb) | Use zero shot learning for labeling, classification and topic modeling | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/07_Apply_labels_with_zero_shot_classification.ipynb) | ## Configuration-driven example Pipelines are run with Python or configuration. Pipelines can be instantiated in [configuration](../../../api/configuration/#pipeline) using the lower case name of the pipeline. Configuration-driven pipelines are run with [workflows](../../../workflow/#configuration-driven-example) or the [API](../../../api#local-instance). ### config.yml ```yaml # Create pipeline using lower case class name labels: # Run pipeline with workflow workflow: labels: tasks: - action: labels args: [["positive", "negative"]] ``` ### Run with Workflows ```python from txtai import Application # Create and run pipeline with workflow app = Application("config.yml") list(app.workflow("labels", ["Great news", "That's rough"])) ``` ### Run with API ```bash CONFIG=config.yml uvicorn "txtai.api:app" & curl \ -X POST "http://localhost:8000/workflow" \ -H "Content-Type: application/json" \ -d '{"name":"labels", "elements": ["Great news", "Thats rough"]}' ``` ## Methods Python documentation for the pipeline. ### ::: txtai.pipeline.Labels.__init__ ### ::: txtai.pipeline.Labels.__call__ ================================================ FILE: docs/pipeline/text/reranker.md ================================================ # Reranker ![pipeline](../../images/pipeline.png#only-light) ![pipeline](../../images/pipeline-dark.png#only-dark) The Reranker pipeline runs embeddings queries and re-ranks them using a similarity pipeline. ## Example The following shows a simple example using this pipeline. ```python from txtai import Embeddings from txtai.pipeline import Reranker, Similarity # Embeddings instance embeddings = Embeddings() embeddings.load(provider="huggingface-hub", container="neuml/txtai-wikipedia") # Similarity instance similarity = Similarity(path="colbert-ir/colbertv2.0", lateencode=True) # Reranking pipeline reranker = Reranker(embeddings, similarity) reranker("Tell me about AI") ``` _Note: Content must be enabled with the embeddings instance for this to work properly._ See the link below for a more detailed example. | Notebook | Description | | |:----------|:-------------|------:| | [What's new in txtai 9.0](https://github.com/neuml/txtai/blob/master/examples/76_Whats_new_in_txtai_9_0.ipynb) | Learned sparse vectors, late interaction models and rerankers | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/76_Whats_new_in_txtai_9_0.ipynb) | ## Configuration-driven example Pipelines are run with Python or configuration. Pipelines can be instantiated in [configuration](../../../api/configuration/#pipeline) using the lower case name of the pipeline. Configuration-driven pipelines are run with [workflows](../../../workflow/#configuration-driven-example) or the [API](../../../api#local-instance). ### config.yml ```yaml embeddings: similarity: # Create pipeline using lower case class name reranker: # Run pipeline with workflow workflow: translate: tasks: - reranker ``` ### Run with Workflows ```python from txtai import Application # Create and run pipeline with workflow app = Application("config.yml") list(app.workflow("reranker", ["Tell me about AI"])) ``` ### Run with API ```bash CONFIG=config.yml uvicorn "txtai.api:app" & curl \ -X POST "http://localhost:8000/workflow" \ -H "Content-Type: application/json" \ -d '{"name":"rerank", "elements":["Tell me about AI"]}' ``` ## Methods Python documentation for the pipeline. ### ::: txtai.pipeline.Reranker.__init__ ### ::: txtai.pipeline.Reranker.__call__ ================================================ FILE: docs/pipeline/text/similarity.md ================================================ # Similarity ![pipeline](../../images/pipeline.png#only-light) ![pipeline](../../images/pipeline-dark.png#only-dark) The Similarity pipeline computes similarity between queries and list of text using a text classifier. This pipeline supports both standard text classification models and zero-shot classification models. The pipeline uses the queries as labels for the input text. The results are transposed to get scores per query/label vs scores per input text. Cross-encoder models are supported via the `crossencode=True` constructor parameter. Late interaction (i.e. ColBERT) models are also supported via the `lateencode=True` constructor parameter. CrossEncoder and LateEncoder pipelines back each of these models and can be instantiated directly as well. ## Example The following shows a simple example using this pipeline. ```python from txtai.pipeline import Similarity # Create and run pipeline similarity = Similarity() similarity("feel good story", [ "Maine man wins $1M from $25 lottery ticket", "Don't sacrifice slower friends in a bear attack" ]) ``` See the link below for a more detailed example. | Notebook | Description | | |:----------|:-------------|------:| | [Add semantic search to Elasticsearch](https://github.com/neuml/txtai/blob/master/examples/04_Add_semantic_search_to_Elasticsearch.ipynb) | Add semantic search to existing search systems | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/04_Add_semantic_search_to_Elasticsearch.ipynb) | ## Configuration-driven example Pipelines are run with Python or configuration. Pipelines can be instantiated in [configuration](../../../api/configuration/#pipeline) using the lower case name of the pipeline. Configuration-driven pipelines are run with [workflows](../../../workflow/#configuration-driven-example) or the [API](../../../api#local-instance). ### config.yml ```yaml # Create pipeline using lower case class name similarity: ``` ### Run with Workflows ```python from txtai import Application # Create and run pipeline with workflow app = Application("config.yml") app.similarity("feel good story", [ "Maine man wins $1M from $25 lottery ticket", "Don't sacrifice slower friends in a bear attack" ]) ``` ### Run with API ```bash CONFIG=config.yml uvicorn "txtai.api:app" & curl \ -X POST "http://localhost:8000/similarity" \ -H "Content-Type: application/json" \ -d '{"query": "feel good story", "texts": ["Maine man wins $1M from $25 lottery ticket", "Dont sacrifice slower friends in a bear attack"]}' ``` ## Methods Python documentation for the pipeline. ### ::: txtai.pipeline.Similarity.__init__ ### ::: txtai.pipeline.Similarity.__call__ ================================================ FILE: docs/pipeline/text/summary.md ================================================ # Summary ![pipeline](../../images/pipeline.png#only-light) ![pipeline](../../images/pipeline-dark.png#only-dark) The Summary pipeline summarizes text. This pipeline runs a text2text model that abstractively creates a summary of the input text. ## Example The following shows a simple example using this pipeline. ```python from txtai.pipeline import Summary # Create and run pipeline summary = Summary() summary("Enter long, detailed text to summarize here") ``` See the link below for a more detailed example. | Notebook | Description | | |:----------|:-------------|------:| | [Building abstractive text summaries](https://github.com/neuml/txtai/blob/master/examples/09_Building_abstractive_text_summaries.ipynb) | Run abstractive text summarization | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/09_Building_abstractive_text_summaries.ipynb) | ## Configuration-driven example Pipelines are run with Python or configuration. Pipelines can be instantiated in [configuration](../../../api/configuration/#pipeline) using the lower case name of the pipeline. Configuration-driven pipelines are run with [workflows](../../../workflow/#configuration-driven-example) or the [API](../../../api#local-instance). ### config.yml ```yaml # Create pipeline using lower case class name summary: # Run pipeline with workflow workflow: summary: tasks: - action: summary ``` ### Run with Workflows ```python from txtai import Application # Create and run pipeline with workflow app = Application("config.yml") list(app.workflow("summary", ["Enter long, detailed text to summarize here"])) ``` ### Run with API ```bash CONFIG=config.yml uvicorn "txtai.api:app" & curl \ -X POST "http://localhost:8000/workflow" \ -H "Content-Type: application/json" \ -d '{"name":"summary", "elements":["Enter long, detailed text to summarize here"]}' ``` ## Methods Python documentation for the pipeline. ### ::: txtai.pipeline.Summary.__init__ ### ::: txtai.pipeline.Summary.__call__ ================================================ FILE: docs/pipeline/text/translation.md ================================================ # Translation ![pipeline](../../images/pipeline.png#only-light) ![pipeline](../../images/pipeline-dark.png#only-dark) The Translation pipeline translates text between languages. It supports over 100+ languages. Automatic source language detection is built-in. This pipeline detects the language of each input text row, loads a model for the source-target combination and translates text to the target language. ## Example The following shows a simple example using this pipeline. ```python from txtai.pipeline import Translation # Create and run pipeline translate = Translation() translate("This is a test translation into Spanish", "es") ``` See the link below for a more detailed example. | Notebook | Description | | |:----------|:-------------|------:| | [Translate text between languages](https://github.com/neuml/txtai/blob/master/examples/12_Translate_text_between_languages.ipynb) | Streamline machine translation and language detection | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/12_Translate_text_between_languages.ipynb) | ## Configuration-driven example Pipelines are run with Python or configuration. Pipelines can be instantiated in [configuration](../../../api/configuration/#pipeline) using the lower case name of the pipeline. Configuration-driven pipelines are run with [workflows](../../../workflow/#configuration-driven-example) or the [API](../../../api#local-instance). ### config.yml ```yaml # Create pipeline using lower case class name translation: # Run pipeline with workflow workflow: translate: tasks: - action: translation args: ["es"] ``` ### Run with Workflows ```python from txtai import Application # Create and run pipeline with workflow app = Application("config.yml") list(app.workflow("translate", ["This is a test translation into Spanish"])) ``` ### Run with API ```bash CONFIG=config.yml uvicorn "txtai.api:app" & curl \ -X POST "http://localhost:8000/workflow" \ -H "Content-Type: application/json" \ -d '{"name":"translate", "elements":["This is a test translation into Spanish"]}' ``` ## Methods Python documentation for the pipeline. ### ::: txtai.pipeline.Translation.__init__ ### ::: txtai.pipeline.Translation.__call__ ================================================ FILE: docs/pipeline/train/hfonnx.md ================================================ # HFOnnx ![pipeline](../../images/pipeline.png#only-light) ![pipeline](../../images/pipeline-dark.png#only-dark) Exports a Hugging Face Transformer model to ONNX. Currently, this works best with classification/pooling/qa models. Work is ongoing for sequence to sequence models (summarization, transcription, translation). ## Example The following shows a simple example using this pipeline. ```python from txtai.pipeline import HFOnnx, Labels # Model path path = "distilbert-base-uncased-finetuned-sst-2-english" # Export model to ONNX onnx = HFOnnx() model = onnx(path, "text-classification", "model.onnx", True) # Run inference and validate labels = Labels((model, path), dynamic=False) labels("I am happy") ``` See the link below for a more detailed example. | Notebook | Description | | |:----------|:-------------|------:| | [Export and run models with ONNX](https://github.com/neuml/txtai/blob/master/examples/18_Export_and_run_models_with_ONNX.ipynb) | Export models with ONNX, run natively in JavaScript, Java and Rust | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/18_Export_and_run_models_with_ONNX.ipynb) | ## Methods Python documentation for the pipeline. ### ::: txtai.pipeline.HFOnnx.__call__ ================================================ FILE: docs/pipeline/train/mlonnx.md ================================================ # MLOnnx ![pipeline](../../images/pipeline.png#only-light) ![pipeline](../../images/pipeline-dark.png#only-dark) Exports a traditional machine learning model (i.e. scikit-learn) to ONNX. ## Example See the link below for a detailed example. | Notebook | Description | | |:----------|:-------------|------:| | [Export and run other machine learning models](https://github.com/neuml/txtai/blob/master/examples/21_Export_and_run_other_machine_learning_models.ipynb) | Export and run models from scikit-learn, PyTorch and more | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/21_Export_and_run_other_machine_learning_models.ipynb) | ## Methods Python documentation for the pipeline. ### ::: txtai.pipeline.MLOnnx.__call__ ================================================ FILE: docs/pipeline/train/trainer.md ================================================ # HFTrainer ![pipeline](../../images/pipeline.png#only-light) ![pipeline](../../images/pipeline-dark.png#only-dark) Trains a new Hugging Face Transformer model using the Trainer framework. ## Example The following shows a simple example using this pipeline. ```python import pandas as pd from datasets import load_dataset from txtai.pipeline import HFTrainer trainer = HFTrainer() # Pandas DataFrame df = pd.read_csv("training.csv") model, tokenizer = trainer("bert-base-uncased", df) # Hugging Face dataset ds = load_dataset("glue", "sst2") model, tokenizer = trainer("bert-base-uncased", ds["train"], columns=("sentence", "label")) # List of dicts dt = [{"text": "sentence 1", "label": 0}, {"text": "sentence 2", "label": 1}]] model, tokenizer = trainer("bert-base-uncased", dt) # Support additional TrainingArguments model, tokenizer = trainer("bert-base-uncased", dt, learning_rate=3e-5, num_train_epochs=5) ``` All [TrainingArguments](https://huggingface.co/transformers/main_classes/trainer.html#transformers.TrainingArguments) are supported as function arguments to the trainer call. See the links below for more detailed examples. | Notebook | Description | | |:----------|:-------------|------:| | [Train a text labeler](https://github.com/neuml/txtai/blob/master/examples/16_Train_a_text_labeler.ipynb) | Build text sequence classification models | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/16_Train_a_text_labeler.ipynb) | | [Train without labels](https://github.com/neuml/txtai/blob/master/examples/17_Train_without_labels.ipynb) | Use zero-shot classifiers to train new models | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/17_Train_without_labels.ipynb) | | [Train a QA model](https://github.com/neuml/txtai/blob/master/examples/19_Train_a_QA_model.ipynb) | Build and fine-tune question-answering models | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/19_Train_a_QA_model.ipynb) | | [Train a language model from scratch](https://github.com/neuml/txtai/blob/master/examples/41_Train_a_language_model_from_scratch.ipynb) | Build new language models | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/41_Train_a_language_model_from_scratch.ipynb) | ## Training tasks The HFTrainer pipeline builds and/or fine-tunes models for following training tasks. | Task | Description | |:-----|:------------| | language-generation | Causal language model for text generation (e.g. GPT) | | language-modeling | Masked language model for general tasks (e.g. BERT) | | question-answering | Extractive question-answering model, typically with the SQuAD dataset | | sequence-sequence | Sequence-Sequence model (e.g. T5) | | text-classification | Classify text with a set of labels | | token-detection | ELECTRA-style pre-training with replaced token detection | ## PEFT Parameter-Efficient Fine-Tuning (PEFT) is supported through [Hugging Face's PEFT library](https://github.com/huggingface/peft). Quantization is provided through [bitsandbytes](https://github.com/TimDettmers/bitsandbytes). See the examples below. ```python from txtai.pipeline import HFTrainer trainer = HFTrainer() trainer(..., quantize=True, lora=True) ``` When these parameters are set to True, they use default configuration. This can also be customized. ```python quantize = { "load_in_4bit": True, "bnb_4bit_use_double_quant": True, "bnb_4bit_quant_type": "nf4", "bnb_4bit_compute_dtype": "bfloat16" } lora = { "r": 16, "lora_alpha": 8, "target_modules": "all-linear", "lora_dropout": 0.05, "bias": "none" } trainer(..., quantize=quantize, lora=lora) ``` The parameters also accept `transformers.BitsAndBytesConfig` and `peft.LoraConfig` instances. See the following PEFT documentation links for more information. - [Quantization](https://huggingface.co/docs/peft/developer_guides/quantization) - [LoRA](https://huggingface.co/docs/peft/developer_guides/lora) ## Merge An important parameter for `language-generation` and `language-modeling` tasks is `merge` or the packing of data into chunks. It supports the following options. - `concat` (default) - text is split into chunks up to maxlength, data can be split across multiple chunks - `pack` - text is split into chunks up to maxlength, data guaranteed to be in same chunk, chunks can be smaller than maxlength - `None` - disables merging Merging helps reduce training time as data can be processed efficiently without padding. `concat` maximizes this as it guarantees each chunk will be up to maxlength size. `pack` is a middle ground where data is combined but records are preserved. For general language modeling tasks like masked language modeling, `concat` is the best choice. For instruction/prompt fine-tuning, `pack` or None are the better choices as it guarantees complex logic is not split across chunks. ## Methods Python documentation for the pipeline. ### ::: txtai.pipeline.HFTrainer.__call__ ================================================ FILE: docs/poweredby.md ================================================ # Powered by txtai The following applications are powered by txtai. ![apps](https://raw.githubusercontent.com/neuml/txtai/master/apps.jpg) | Application | Description | |:------------ |:-------------| | [rag](https://github.com/neuml/rag) | Retrieval Augmented Generation (RAG) application | | [ncoder](https://github.com/neuml/ncoder) | Open-Source AI coding agent | | [paperai](https://github.com/neuml/paperai) | AI for medical and scientific papers | | [annotateai](https://github.com/neuml/annotateai) | Automatically annotate papers with LLMs | In addition to this list, there are also many other [open-source projects](https://github.com/neuml/txtai/network/dependents), [published research](https://scholar.google.com/scholar?q=txtai&hl=en&as_ylo=2022) and closed proprietary/commercial projects that have built on txtai in production. ================================================ FILE: docs/usecases.md ================================================ # Use Cases The following sections introduce common txtai use cases. A comprehensive set of over 70 [example notebooks and applications](../examples) are also available. ## Semantic Search Build semantic/similarity/vector/neural search applications. ![demo](https://raw.githubusercontent.com/neuml/txtai/master/demo.gif) Traditional search systems use keywords to find data. Semantic search has an understanding of natural language and identifies results that have the same meaning, not necessarily the same keywords. ![search](https://raw.githubusercontent.com/neuml/txtai/master/docs/images/search.png#gh-light-mode-only) ![search](https://raw.githubusercontent.com/neuml/txtai/master/docs/images/search-dark.png#gh-dark-mode-only) Get started with the following examples. | Notebook | Description | | |:----------|:-------------|------:| | [Introducing txtai](https://github.com/neuml/txtai/blob/master/examples/01_Introducing_txtai.ipynb) [▶️](https://www.youtube.com/watch?v=SIezMnVdmMs) | Overview of the functionality provided by txtai | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/01_Introducing_txtai.ipynb) | | [Similarity search with images](https://github.com/neuml/txtai/blob/master/examples/13_Similarity_search_with_images.ipynb) | Embed images and text into the same space for search | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/13_Similarity_search_with_images.ipynb) | | [Build a QA database](https://github.com/neuml/txtai/blob/master/examples/34_Build_a_QA_database.ipynb) | Question matching with semantic search | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/34_Build_a_QA_database.ipynb) | | [Semantic Graphs](https://github.com/neuml/txtai/blob/master/examples/38_Introducing_the_Semantic_Graph.ipynb) | Explore topics, data connectivity and run network analysis| [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/38_Introducing_the_Semantic_Graph.ipynb) | ## LLM Orchestration Autonomous agents, retrieval augmented generation (RAG), chat with your data, pipelines and workflows that interface with large language models (LLMs). ![llm](images/llm.png) See below to learn more. | Notebook | Description | | |:----------|:-------------|------:| | [Prompt templates and task chains](https://github.com/neuml/txtai/blob/master/examples/44_Prompt_templates_and_task_chains.ipynb) | Build model prompts and connect tasks together with workflows | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/44_Prompt_templates_and_task_chains.ipynb) | | [Integrate LLM frameworks](https://github.com/neuml/txtai/blob/master/examples/53_Integrate_LLM_Frameworks.ipynb) | Integrate llama.cpp, LiteLLM and custom generation frameworks | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/53_Integrate_LLM_Frameworks.ipynb) | | [Build knowledge graphs with LLMs](https://github.com/neuml/txtai/blob/master/examples/57_Build_knowledge_graphs_with_LLM_driven_entity_extraction.ipynb) | Build knowledge graphs with LLM-driven entity extraction | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/57_Build_knowledge_graphs_with_LLM_driven_entity_extraction.ipynb) | ### Agents Agents connect embeddings, pipelines, workflows and other agents together to autonomously solve complex problems. ![agent](images/agent.png) txtai agents are built on top of the [smolagents](https://github.com/huggingface/smolagents) framework. This supports all LLMs txtai supports (Hugging Face, llama.cpp, OpenAI / Claude / AWS Bedrock via LiteLLM). Agent prompting with [`agents.md`](https://github.com/agentsmd/agents.md) and [`skill.md`](https://agentskills.io/specification) are also supported. Check out this [Agent Quickstart Example](https://github.com/neuml/txtai/blob/master/examples/agent_quickstart.py). Additional examples are listed below. | Notebook | Description | | |:----------|:-------------|------:| | [Granting autonomy to agents](https://github.com/neuml/txtai/blob/master/examples/69_Granting_autonomy_to_agents.ipynb) | Agents that iteratively solve problems as they see fit | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/69_Granting_autonomy_to_agents.ipynb) | | [TxtAI got skills](https://github.com/neuml/txtai/blob/master/examples/83_TxtAI_got_skills.ipynb) | Integrate skill.md files with your agent | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/83_TxtAI_got_skills.ipynb) | | [Agent Tools](https://github.com/neuml/txtai/blob/master/examples/84_Agent_Tools.ipynb) [▶️](https://www.youtube.com/watch?v=RDNaFXQy3GQ) | Learn about the txtai agent toolkit | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/84_Agent_Tools.ipynb) | | [Analyzing LinkedIn Company Posts with Graphs and Agents](https://github.com/neuml/txtai/blob/master/examples/71_Analyzing_LinkedIn_Company_Posts_with_Graphs_and_Agents.ipynb) | Exploring how to improve social media engagement with AI | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/71_Analyzing_LinkedIn_Company_Posts_with_Graphs_and_Agents.ipynb) | ### Retrieval augmented generation Retrieval augmented generation (RAG) reduces the risk of LLM hallucinations by constraining the output with a knowledge base as context. RAG is commonly used to "chat with your data". ![rag](images/rag.png#gh-light-mode-only) ![rag](images/rag-dark.png#gh-dark-mode-only) Check out this [RAG Quickstart Example](https://github.com/neuml/txtai/blob/master/examples/rag_quickstart.py). Additional examples are listed below. | Notebook | Description | | |:----------|:-------------|------:| | [Build RAG pipelines with txtai](https://github.com/neuml/txtai/blob/master/examples/52_Build_RAG_pipelines_with_txtai.ipynb) [▶️](https://www.youtube.com/watch?v=t_OeAc8NVfQ) | Guide on retrieval augmented generation including how to create citations | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/52_Build_RAG_pipelines_with_txtai.ipynb) | | [RAG is more than Vector Search](https://github.com/neuml/txtai/blob/master/examples/79_RAG_is_more_than_Vector_Search.ipynb) | Context retrieval via Web, SQL and other sources | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/79_RAG_is_more_than_Vector_Search.ipynb) | | [GraphRAG with Wikipedia and GPT OSS](https://github.com/neuml/txtai/blob/master/examples/77_GraphRAG_with_Wikipedia_and_GPT_OSS.ipynb) | Deep graph search powered RAG | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/77_GraphRAG_with_Wikipedia_and_GPT_OSS.ipynb) | | [Speech to Speech RAG](https://github.com/neuml/txtai/blob/master/examples/65_Speech_to_Speech_RAG.ipynb) [▶️](https://www.youtube.com/watch?v=tH8QWwkVMKA) | Full cycle speech to speech workflow with RAG | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/65_Speech_to_Speech_RAG.ipynb) | ## Language Model Workflows Language model workflows, also known as semantic workflows, connect language models together to build intelligent applications. ![flows](images/flows.png#gh-light-mode-only) ![flows](images/flows-dark.png#gh-dark-mode-only) While LLMs are powerful, there are plenty of smaller, more specialized models that work better and faster for specific tasks. This includes models for extractive question-answering, automatic summarization, text-to-speech, transcription and translation. Check out this [Workflow Quickstart Example](https://github.com/neuml/txtai/blob/master/examples/workflow_quickstart.py). Additional examples are listed below. | Notebook | Description | | |:----------|:-------------|------:| | [Run pipeline workflows](https://github.com/neuml/txtai/blob/master/examples/14_Run_pipeline_workflows.ipynb) [▶️](https://www.youtube.com/watch?v=UBMPDCn1gEU) | Simple yet powerful constructs to efficiently process data | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/14_Run_pipeline_workflows.ipynb) | | [Building abstractive text summaries](https://github.com/neuml/txtai/blob/master/examples/09_Building_abstractive_text_summaries.ipynb) | Run abstractive text summarization | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/09_Building_abstractive_text_summaries.ipynb) | | [Transcribe audio to text](https://github.com/neuml/txtai/blob/master/examples/11_Transcribe_audio_to_text.ipynb) | Convert audio files to text | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/11_Transcribe_audio_to_text.ipynb) | | [Translate text between languages](https://github.com/neuml/txtai/blob/master/examples/12_Translate_text_between_languages.ipynb) | Streamline machine translation and language detection | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/12_Translate_text_between_languages.ipynb) | ================================================ FILE: docs/why.md ================================================ # Why txtai? ![why](images/why.png#only-light) ![why](images/why-dark.png#only-dark) New vector databases, LLM frameworks and everything in between are sprouting up daily. Why build with txtai? - Up and running in minutes with [pip](../install/) or [Docker](../cloud/) ```python # Get started in a couple lines import txtai embeddings = txtai.Embeddings() embeddings.index(["Correct", "Not what we hoped"]) embeddings.search("positive", 1) #[(0, 0.29862046241760254)] ``` - Built-in API makes it easy to develop applications using your programming language of choice ```yaml # app.yml embeddings: path: sentence-transformers/all-MiniLM-L6-v2 ``` ```bash CONFIG=app.yml uvicorn "txtai.api:app" curl -X GET "http://localhost:8000/search?query=positive" ``` - Run local - no need to ship data off to disparate remote services - Work with micromodels all the way up to large language models (LLMs) - Low footprint - install additional dependencies and scale up when needed - [Learn by example](../examples) - notebooks cover all available functionality ================================================ FILE: docs/workflow/index.md ================================================ # Workflow ![workflow](../images/workflow.png#only-light) ![workflow](../images/workflow-dark.png#only-dark) Workflows are a simple yet powerful construct that takes a callable and returns elements. Workflows operate well with pipelines but can work with any callable object. Workflows are streaming and work on data in batches, allowing large volumes of data to be processed efficiently. Given that pipelines are callable objects, workflows enable efficient processing of pipeline data. Large language models typically work with smaller batches of data, workflows are well suited to feed a series of transformers pipelines. An example of the most basic workflow: ```python workflow = Workflow([Task(lambda x: [y * 2 for y in x])]) list(workflow([1, 2, 3])) ``` This example multiplies each input value by 2 and returns transformed elements via a generator. Since workflows run as generators, output must be consumed for execution to occur. The following snippets show how output can be consumed. ```python # Small dataset where output fits in memory list(workflow(elements)) # Large dataset for output in workflow(elements): function(output) # Large dataset where output is discarded for _ in workflow(elements): pass ``` Workflows are run with Python or configuration. Examples of both methods are shown below. ## Example A full-featured example is shown below in Python. This workflow transcribes a set of audio files, translates the text into French and indexes the data. ```python from txtai import Embeddings from txtai.pipeline import Transcription, Translation from txtai.workflow import FileTask, Task, Workflow # Embeddings instance embeddings = Embeddings({ "path": "sentence-transformers/paraphrase-MiniLM-L3-v2", "content": True }) # Transcription instance transcribe = Transcription() # Translation instance translate = Translation() tasks = [ FileTask(transcribe, r"\.wav$"), Task(lambda x: translate(x, "fr")) ] # List of files to process data = [ "US_tops_5_million.wav", "Canadas_last_fully.wav", "Beijing_mobilises.wav", "The_National_Park.wav", "Maine_man_wins_1_mil.wav", "Make_huge_profits.wav" ] # Workflow that translate text to French workflow = Workflow(tasks) # Index data embeddings.index((uid, text, None) for uid, text in enumerate(workflow(data))) # Search embeddings.search("wildlife", 1) ``` ## Configuration-driven example Workflows can also be defined with YAML configuration. ```yaml writable: true embeddings: path: sentence-transformers/paraphrase-MiniLM-L3-v2 content: true # Transcribe audio to text transcription: # Translate text between languages translation: workflow: index: tasks: - action: transcription select: "\\.wav$" task: file - action: translation args: ["fr"] - action: index ``` ```python # Create and run the workflow from txtai import Application # Create and run the workflow app = Application("workflow.yml") list(app.workflow("index", [ "US_tops_5_million.wav", "Canadas_last_fully.wav", "Beijing_mobilises.wav", "The_National_Park.wav", "Maine_man_wins_1_mil.wav", "Make_huge_profits.wav" ])) # Search app.search("wildlife") ``` The code above executes a workflow defined in the file `workflow.yml. ## LLM workflow example Workflows can connect multiple LLM prompting tasks together. ```yaml llm: path: openai/gpt-oss-20b workflow: llm: tasks: - task: template template: | Extract keywords for the following text. {text} action: llm - task: template template: | Translate the following text into French. {text} action: llm ``` ```python from txtai import Application app = Application("workflow.yml") list(app.workflow("llm", [ """ txtai is an open-source platform for semantic search and workflows powered by language models. """ ])) ``` Any txtai pipeline/workflow task can be connected in workflows with LLMs. ```yaml llm: path: openai/gpt-oss-20b translation: workflow: llm: tasks: - task: template template: | Extract keywords for the following text. {text} action: llm - action: translation args: - fr ``` See the following links for more information. - [Workflow Demo](https://huggingface.co/spaces/NeuML/txtai) - [Workflow YAML Examples](https://huggingface.co/spaces/NeuML/txtai/tree/main/workflows) - [Workflow YAML Guide](../api/configuration/#workflow) ## Methods Workflows are callable objects. Workflows take an input of iterable data elements and output iterable data elements. ### ::: txtai.workflow.Workflow.__init__ ### ::: txtai.workflow.Workflow.__call__ ### ::: txtai.workflow.Workflow.schedule ## More examples Check out this [Workflow Quickstart Example](https://github.com/neuml/txtai/blob/master/examples/workflow_quickstart.py). See [this link](../examples/#workflows) for a full list of workflow examples. ================================================ FILE: docs/workflow/schedule.md ================================================ # Schedule ![schedule](../images/schedule.png#only-light) ![schedule](../images/schedule-dark.png#only-dark) Workflows can run on a repeating basis with schedules. This is suitable in cases where a workflow is run against a dynamically expanding input, like an API service or directory of files. The schedule method takes a cron expression, list of static elements (which dynamically expand i.e. API service, directory listing) and an optional maximum number of iterations. Below are a couple example cron expressions. ```bash # ┌─────────────── minute (0 - 59) # | ┌───────────── hour (0 - 23) # | | ┌─────────── day of the month (1 - 31) # | | | ┌───────── month (1 - 12) # | | | | ┌─────── day of the week (0 - 6) # | | | | | ┌───── second (0 - 59) # | | | | | | * * * * * * # Run every second 0/5 * * * * # Run every 5 minutes 0 0 1 * * # Run monthly on 1st 0 0 1 1 * # Run on Jan 1 at 12am 0 0 * * mon,wed # Run Monday and Wednesday ``` ## Python Simple workflow [scheduled](../#txtai.workflow.base.Workflow.schedule) with Python. ```python workflow = Workflow(tasks) workflow.schedule("0/5 * * * *", elements) ``` See the link below for a more detailed example. | Notebook | Description | | |:----------|:-------------|------:| | [Workflow Scheduling](https://github.com/neuml/txtai/blob/master/examples/27_Workflow_scheduling.ipynb) | Schedule workflows with cron expressions | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/27_Workflow_scheduling.ipynb) | ## Configuration Simple workflow scheduled with configuration. ```yaml workflow: index: schedule: cron: 0/5 * * * * elements: [...] tasks: [...] ``` ```python # Create and run the workflow from txtai import Application # Create and run the workflow app = Application("workflow.yml") # Wait for scheduled workflows app.wait() ``` See the links below for more information on cron expressions. - [cron overview](https://en.wikipedia.org/wiki/Cron) - [croniter - library used by txtai](https://github.com/kiorky/croniter) ================================================ FILE: docs/workflow/task/console.md ================================================ # Console Task ![task](../../images/task.png#only-light) ![task](../../images/task-dark.png#only-dark) The Console Task prints task inputs and outputs to standard output. This task is mainly used for debugging and can be added at any point in a workflow. ## Example The following shows a simple example using this task as part of a workflow. ```python from txtai.workflow import FileTask, Workflow workflow = Workflow([ConsoleTask()]) workflow(["Input 1", "Input2"]) ``` ## Configuration-driven example This task can also be created with workflow configuration. ```yaml workflow: tasks: - task: console ``` ## Methods Python documentation for the task. ### ::: txtai.workflow.ConsoleTask.__init__ ================================================ FILE: docs/workflow/task/export.md ================================================ # Export Task ![task](../../images/task.png#only-light) ![task](../../images/task-dark.png#only-dark) The Export Task exports task outputs to CSV or Excel. ## Example The following shows a simple example using this task as part of a workflow. ```python from txtai.workflow import FileTask, Workflow workflow = Workflow([ExportTask()]) workflow(["Input 1", "Input2"]) ``` ## Configuration-driven example This task can also be created with workflow configuration. ```yaml workflow: tasks: - task: export ``` ## Methods Python documentation for the task. ### ::: txtai.workflow.ExportTask.__init__ ### ::: txtai.workflow.ExportTask.register ================================================ FILE: docs/workflow/task/file.md ================================================ # File Task ![task](../../images/task.png#only-light) ![task](../../images/task-dark.png#only-dark) The File Task validates a file exists. It handles both file paths and local file urls. Note that this task _only_ works with local files. ## Example The following shows a simple example using this task as part of a workflow. ```python from txtai.workflow import FileTask, Workflow workflow = Workflow([FileTask()]) workflow(["/path/to/file", "file:///path/to/file"]) ``` ## Configuration-driven example This task can also be created with workflow configuration. ```yaml workflow: tasks: - task: file ``` ## Methods Python documentation for the task. ### ::: txtai.workflow.FileTask.__init__ ================================================ FILE: docs/workflow/task/image.md ================================================ # Image Task ![task](../../images/task.png#only-light) ![task](../../images/task-dark.png#only-dark) The Image Task reads file paths, check the file is an image and opens it as an Image object. Note that this task _only_ works with local files. ## Example The following shows a simple example using this task as part of a workflow. ```python from txtai.workflow import ImageTask, Workflow workflow = Workflow([ImageTask()]) workflow(["image.jpg", "image.gif"]) ``` ## Configuration-driven example This task can also be created with workflow configuration. ```yaml workflow: tasks: - task: image ``` ## Methods Python documentation for the task. ### ::: txtai.workflow.ImageTask.__init__ ================================================ FILE: docs/workflow/task/index.md ================================================ # Tasks ![task](../../images/task.png#only-light) ![task](../../images/task-dark.png#only-dark) Workflows execute tasks. Tasks are callable objects with a number of parameters to control the processing of data at a given step. While similar to pipelines, tasks encapsulate processing and don't perform signficant transformations on their own. Tasks perform logic to prepare content for the underlying action(s). A simple task is shown below. ```python Task(lambda x: [y * 2 for y in x]) ``` The task above executes the function above for all input elements. Tasks work well with pipelines, since pipelines are callable objects. The example below will summarize each input element. ```python summary = Summary() Task(summary) ``` Tasks can operate independently but work best with workflows, as workflows add large-scale stream processing. ```python summary = Summary() task = Task(summary) task(["Very long text here"]) workflow = Workflow([task]) list(workflow(["Very long text here"])) ``` Tasks can also be created with configuration as part of a workflow. ```yaml workflow: tasks: - action: summary ``` ::: txtai.workflow.Task.__init__ ## Multi-action task concurrency The default processing mode is to run actions sequentially. Multiprocessing support is already built in at a number of levels. Any of the GPU models will maximize GPU utilization for example and even in CPU mode, concurrency is utilized. But there are still use cases for task action concurrency. For example, if the system has multiple GPUs, the task runs external sequential code, or the task has a large number of I/O tasks. In addition to sequential processing, multi-action tasks can run either multithreaded or with multiple processes. The advantages of each approach are discussed below. - *multithreading* - no overhead of creating separate processes or pickling data. But Python can only execute a single thread due the GIL, so this approach won't help with CPU bound actions. This method works well with I/O bound actions and GPU actions. - *multiprocessing* - separate subprocesses are created and data is exchanged via pickling. This method can fully utilize all CPU cores since each process runs independently. This method works well with CPU bound actions. More information on multiprocessing can be found in the [Python documentation](https://docs.python.org/3/library/multiprocessing.html). ## Multi-action task merges Multi-action tasks will generate parallel outputs for the input data. The task output can be merged together in a couple different ways. ### ::: txtai.workflow.Task.hstack ### ::: txtai.workflow.Task.vstack ### ::: txtai.workflow.Task.concat ## Extract task output columns With column-wise merging, each output row will be a tuple of output values for each task action. This can be fed as input to a downstream task and that task can have separate tasks work with each element. A simple example: ```python workflow = Workflow([Task(lambda x: [y * 3 for y in x], unpack=False, column=0)]) list(workflow([(2, 8)])) ``` For the example input tuple of (2, 2), the workflow will only select the first element (2) and run the task against that element. ```python workflow = Workflow([Task([lambda x: [y * 3 for y in x], lambda x: [y - 1 for y in x]], unpack=False, column={0:0, 1:1})]) list(workflow([(2, 8)])) ``` The example above applies a separate action to each input column. This simple construct can help build extremely powerful workflow graphs! ================================================ FILE: docs/workflow/task/retrieve.md ================================================ # Retrieve Task ![task](../../images/task.png#only-light) ![task](../../images/task-dark.png#only-dark) The Retrieve Task connects to a url and downloads the content locally. This task is helpful when working with actions that require data to be available locally. ## Example The following shows a simple example using this task as part of a workflow. ```python from txtai.workflow import RetrieveTask, Workflow workflow = Workflow([RetrieveTask(directory="/tmp")]) workflow(["https://file.to.download", "/local/file/to/copy"]) ``` ## Configuration-driven example This task can also be created with workflow configuration. ```yaml workflow: tasks: - task: retrieve directory: /tmp ``` ## Methods Python documentation for the task. ### ::: txtai.workflow.RetrieveTask.__init__ ### ::: txtai.workflow.RetrieveTask.register ================================================ FILE: docs/workflow/task/service.md ================================================ # Service Task ![task](../../images/task.png#only-light) ![task](../../images/task-dark.png#only-dark) The Service Task extracts content from a http service. ## Example The following shows a simple example using this task as part of a workflow. ```python from txtai.workflow import ServiceTask, Workflow workflow = Workflow([ServiceTask(url="https://service.url/action)]) workflow(["parameter"]) ``` ## Configuration-driven example This task can also be created with workflow configuration. ```yaml workflow: tasks: - task: service url: https://service.url/action ``` ## Methods Python documentation for the task. ### ::: txtai.workflow.ServiceTask.__init__ ### ::: txtai.workflow.ServiceTask.register ================================================ FILE: docs/workflow/task/storage.md ================================================ # Storage Task ![task](../../images/task.png#only-light) ![task](../../images/task-dark.png#only-dark) The Storage Task expands a local directory or cloud storage bucket into a list of URLs to process. ## Example The following shows a simple example using this task as part of a workflow. ```python from txtai.workflow import StorageTask, Workflow workflow = Workflow([StorageTask()]) workflow(["s3://path/to/bucket", "local://local/directory"]) ``` ## Configuration-driven example This task can also be created with workflow configuration. ```yaml workflow: tasks: - task: storage ``` ## Methods Python documentation for the task. ### ::: txtai.workflow.StorageTask.__init__ ================================================ FILE: docs/workflow/task/template.md ================================================ # Template Task ![task](../../images/task.png#only-light) ![task](../../images/task-dark.png#only-dark) The Template Task generates text from a template and task inputs. Templates can be used to prepare data for a number of tasks including generating large language model (LLM) prompts. ## Example The following shows a simple example using this task as part of a workflow. ```python from txtai.workflow import TemplateTask, Workflow workflow = Workflow([TemplateTask(template="This is a {text} task")]) workflow([{"text": "template"}]) ``` ## Configuration-driven example This task can also be created with workflow configuration. ```yaml workflow: tasks: - task: template template: This is a {text} task ``` ## Methods Python documentation for the task. ### ::: txtai.workflow.TemplateTask.__init__ ================================================ FILE: docs/workflow/task/url.md ================================================ # Url Task ![task](../../images/task.png#only-light) ![task](../../images/task-dark.png#only-dark) The Url Task validates that inputs start with a url prefix. ## Example The following shows a simple example using this task as part of a workflow. ```python from txtai.workflow import UrlTask, Workflow workflow = Workflow([UrlTask()]) workflow(["https://file.to.download", "file:////local/file/to/copy"]) ``` ## Configuration-driven example This task can also be created with workflow configuration. ```yaml workflow: tasks: - task: url ``` ## Methods Python documentation for the task. ### ::: txtai.workflow.UrlTask.__init__ ================================================ FILE: docs/workflow/task/workflow.md ================================================ # Workflow Task ![task](../../images/task.png#only-light) ![task](../../images/task-dark.png#only-dark) The Workflow Task runs a workflow. Allows creating workflows of workflows. ## Example The following shows a simple example using this task as part of a workflow. ```python from txtai.workflow import WorkflowTask, Workflow workflow = Workflow([WorkflowTask(otherworkflow)]) workflow(["input data"]) ``` ## Methods Python documentation for the task. ### ::: txtai.workflow.WorkflowTask.__init__ ================================================ FILE: examples/01_Introducing_txtai.ipynb ================================================ { "cells": [ { "cell_type": "markdown", "metadata": { "id": "POWZoSJR6XzK" }, "source": [ "# Introducing txtai\n", "\n", "[txtai](https://github.com/neuml/txtai) is an all-in-one AI framework for semantic search, LLM orchestration and language model workflows.\n", "\n", "The key component of txtai is an embeddings database, which is a union of vector indexes (sparse and dense), graph networks and relational databases.\n", "\n", "This foundation enables vector search and/or serves as a powerful knowledge source for large language model (LLM) applications.\n", "\n", "Build autonomous agents, retrieval augmented generation (RAG) processes, multi-model workflows and more.\n", "\n", "The following is a summary of key features:\n", "\n", "- 🔎 Vector search with SQL, object storage, topic modeling, graph analysis and multimodal indexing\n", "- 📄 Create embeddings for text, documents, audio, images and video\n", "- 💡 Pipelines powered by language models that run LLM prompts, question-answering, labeling, transcription, translation, summarization and more\n", "- ↪️️ Workflows to join pipelines together and aggregate business logic. txtai processes can be simple microservices or multi-model workflows.\n", "- 🤖 Agents that intelligently connect embeddings, pipelines, workflows and other agents together to autonomously solve complex problems\n", "- ⚙️ Web and Model Context Protocol (MCP) APIs. Bindings available for [JavaScript](https://github.com/neuml/txtai.js), [Java](https://github.com/neuml/txtai.java), [Rust](https://github.com/neuml/txtai.rs) and [Go](https://github.com/neuml/txtai.go).\n", "- 🔋 Batteries included with defaults to get up and running fast\n", "- ☁️ Run local or scale out with container orchestration\n", "\n", "txtai is built with Python 3.10+, [Hugging Face Transformers](https://github.com/huggingface/transformers), [Sentence Transformers](https://github.com/UKPLab/sentence-transformers) and [FastAPI](https://github.com/tiangolo/fastapi). txtai is open-source under an Apache 2.0 license.\n", "\n", "> [NeuML](https://neuml.com) is the company behind txtai and we provide AI consulting services around our stack. [Schedule a meeting](https://cal.com/neuml/intro) or [send a message](mailto:info@neuml.com) to learn more.\n", ">\n", "> We're also building an easy and secure way to run hosted txtai applications with [txtai.cloud](https://txtai.cloud).\n" ] }, { "cell_type": "markdown", "metadata": { "id": "qa_PPKVX6XzN" }, "source": [ "# Install dependencies\n", "\n", "Install `txtai` and all dependencies." ] }, { "cell_type": "code", "execution_count": null, "metadata": { "_cell_guid": "b1076dfc-b9ad-4769-8c92-a6c4dae69d19", "_kg_hide-output": true, "_uuid": "8f2839f25d086af736a60e9eeb907d3b93b6e0e5", "id": "24q-1n5i6XzQ", "trusted": true }, "outputs": [], "source": [ "%%capture\n", "!pip install git+https://github.com/neuml/txtai#egg=txtai[graph]\n", "\n", "# Install translation pipeline dependencies for later examples\n", "!pip install sentencepiece sacremoses staticvectors" ] }, { "cell_type": "markdown", "metadata": { "id": "DLIjSzbq6Xzx" }, "source": [ "# Semantic search\n", "\n", "Embeddings databases are the engine that delivers semantic search. Data is transformed into embeddings vectors where similar concepts will produce similar vectors. Indexes both large and small are built with these vectors. The indexes are used to find results that have the same meaning, not necessarily the same keywords.\n", "\n", "The basic use case for an embeddings database is building an approximate nearest neighbor (ANN) index for semantic search. The following example indexes a small number of text entries to demonstrate the value of semantic search.\n" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "id": "QxX9EtIc6Xzg", "trusted": true }, "outputs": [], "source": [ "%%capture\n", "\n", "from txtai import Embeddings\n", "\n", "# Works with a list, dataset or generator\n", "data = [\n", " \"US tops 5 million confirmed virus cases\",\n", " \"Canada's last fully intact ice shelf has suddenly collapsed, forming a Manhattan-sized iceberg\",\n", " \"Beijing mobilises invasion craft along coast as Taiwan tensions escalate\",\n", " \"The National Park Service warns against sacrificing slower friends in a bear attack\",\n", " \"Maine man wins $1M from $25 lottery ticket\",\n", " \"Make huge profits without work, earn up to $100,000 a day\"\n", "]\n", "\n", "# Create an embeddings\n", "embeddings = Embeddings(path=\"sentence-transformers/nli-mpnet-base-v2\")" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "cXfZtdHD6Xzy", "outputId": "369b637e-1e1c-4229-f68e-92917be5fbd0", "trusted": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Query Best Match\n", "--------------------------------------------------\n", "feel good story Maine man wins $1M from $25 lottery ticket\n", "climate change Canada's last fully intact ice shelf has suddenly collapsed, forming a Manhattan-sized iceberg\n", "public health story US tops 5 million confirmed virus cases\n", "war Beijing mobilises invasion craft along coast as Taiwan tensions escalate\n", "wildlife The National Park Service warns against sacrificing slower friends in a bear attack\n", "asia Beijing mobilises invasion craft along coast as Taiwan tensions escalate\n", "lucky Maine man wins $1M from $25 lottery ticket\n", "dishonest junk Make huge profits without work, earn up to $100,000 a day\n" ] } ], "source": [ "# Create an index for the list of text\n", "embeddings.index(data)\n", "\n", "print(\"%-20s %s\" % (\"Query\", \"Best Match\"))\n", "print(\"-\" * 50)\n", "\n", "# Run an embeddings search for each query\n", "for query in (\"feel good story\", \"climate change\", \"public health story\", \"war\", \"wildlife\", \"asia\", \"lucky\", \"dishonest junk\"):\n", " # Extract uid of first result\n", " # search result format: (uid, score)\n", " uid = embeddings.search(query, 1)[0][0]\n", "\n", " # Print text\n", " print(\"%-20s %s\" % (query, data[uid]))" ] }, { "cell_type": "markdown", "metadata": { "id": "kIMbLW0t6Xzw" }, "source": [ "The example above shows that for all of the queries, the query text isn’t in the data. This is the true power of transformers models over token based search. What you get out of the box is 🔥🔥🔥!" ] }, { "cell_type": "markdown", "metadata": { "id": "6m7sYUj_AdOL" }, "source": [ "# Updates and deletes\n", "\n", "Updates and deletes are supported for embeddings. The upsert operation will insert new data and update existing data\n", "\n", "The following section runs a query, then updates a value changing the top result and finally deletes the updated value to revert back to the original query results." ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "2CERR0U2Ac8C", "outputId": "0c1f4dd2-1319-410b-91a4-7753adba2c26" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Initial: Maine man wins $1M from $25 lottery ticket\n", "After update: See it: baby panda born\n", "After delete: Maine man wins $1M from $25 lottery ticket\n" ] } ], "source": [ "# Run initial query\n", "uid = embeddings.search(\"feel good story\", 1)[0][0]\n", "print(\"Initial: \", data[uid])\n", "\n", "# Create a copy of data to modify\n", "udata = data.copy()\n", "\n", "# Update data\n", "udata[0] = \"See it: baby panda born\"\n", "embeddings.upsert([(0, udata[0], None)])\n", "\n", "uid = embeddings.search(\"feel good story\", 1)[0][0]\n", "print(\"After update: \", udata[uid])\n", "\n", "# Remove record just added from index\n", "embeddings.delete([0])\n", "\n", "# Ensure value matches previous value\n", "uid = embeddings.search(\"feel good story\", 1)[0][0]\n", "print(\"After delete: \", udata[uid])" ] }, { "cell_type": "markdown", "metadata": { "id": "6TCVl6QA6Xz5" }, "source": [ "# Persistence\n", "\n", "Embeddings can be saved to storage and reloaded." ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "5gyO90Hc6Xz7", "outputId": "5460fcd8-5b9f-4064-9ac3-f72e5db9ecf4", "trusted": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Canada's last fully intact ice shelf has suddenly collapsed, forming a Manhattan-sized iceberg\n" ] } ], "source": [ "embeddings.save(\"index\")\n", "\n", "embeddings = Embeddings()\n", "embeddings.load(\"index\")\n", "\n", "uid = embeddings.search(\"climate change\", 1)[0][0]\n", "print(data[uid])" ] }, { "cell_type": "markdown", "metadata": { "id": "giNZ_fHmqT8u" }, "source": [ "# Hybrid search\n", "\n", "While dense vector indexes are by far the best option for semantic search systems, sparse keyword indexes can still add value. There may be cases where finding an exact match is important.\n", "\n", "Hybrid search combines the results from sparse and dense vector indexes for the best of both worlds." ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "lclxiRFRqsFv", "outputId": "3bd15b63-3bf4-4132-a819-0f560fce3f92" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Query Best Match\n", "--------------------------------------------------\n", "feel good story Maine man wins $1M from $25 lottery ticket\n", "climate change Canada's last fully intact ice shelf has suddenly collapsed, forming a Manhattan-sized iceberg\n", "public health story US tops 5 million confirmed virus cases\n", "war Beijing mobilises invasion craft along coast as Taiwan tensions escalate\n", "wildlife The National Park Service warns against sacrificing slower friends in a bear attack\n", "asia Beijing mobilises invasion craft along coast as Taiwan tensions escalate\n", "lucky Maine man wins $1M from $25 lottery ticket\n", "dishonest junk Make huge profits without work, earn up to $100,000 a day\n" ] } ], "source": [ "# Create an embeddings\n", "embeddings = Embeddings(hybrid=True, path=\"sentence-transformers/nli-mpnet-base-v2\")\n", "\n", "# Create an index for the list of text\n", "embeddings.index(data)\n", "\n", "print(\"%-20s %s\" % (\"Query\", \"Best Match\"))\n", "print(\"-\" * 50)\n", "\n", "# Run an embeddings search for each query\n", "for query in (\"feel good story\", \"climate change\", \"public health story\", \"war\", \"wildlife\", \"asia\", \"lucky\", \"dishonest junk\"):\n", " # Extract uid of first result\n", " # search result format: (uid, score)\n", " uid = embeddings.search(query, 1)[0][0]\n", "\n", " # Print text\n", " print(\"%-20s %s\" % (query, data[uid]))" ] }, { "cell_type": "markdown", "metadata": { "id": "d9beQSw-vhz8" }, "source": [ "Same results as with semantic search. Let's run the same example with just a keyword index to view those results." ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "WykNb8y3vohL", "outputId": "5617e912-1014-495c-9dc9-e5729988d77f" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[]\n", "[(4, 0.5234998733628726)]\n" ] } ], "source": [ "# Create an embeddings\n", "embeddings = Embeddings(keyword=True)\n", "\n", "# Create an index for the list of text\n", "embeddings.index(data)\n", "\n", "print(embeddings.search(\"feel good story\"))\n", "print(embeddings.search(\"lottery\"))" ] }, { "cell_type": "markdown", "metadata": { "id": "P0FLRsrmv2hB" }, "source": [ "See that when the embeddings instance only uses a keyword index, it can't find semantic matches, only keyword matches." ] }, { "cell_type": "markdown", "metadata": { "id": "0p3WCDniUths" }, "source": [ "# Content storage\n", "\n", "Up to this point, all the examples are referencing the original data array to retrieve the input text. This works fine for a demo but what if you have millions of documents? In this case, the text needs to be retrieved from an external datastore using the id.\n", "\n", "Content storage adds an associated database (i.e. SQLite, DuckDB) that stores associated metadata with the vector index. The document text, additional metadata and additional objects can be stored and retrieved right alongside the indexed vectors." ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "MOntBQIdVv-J", "outputId": "c9d0d3e7-d7b4-4421-f63d-402db6918cca" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Maine man wins $1M from $25 lottery ticket\n" ] } ], "source": [ "# Create embeddings with content enabled. The default behavior is to only store indexed vectors.\n", "embeddings = Embeddings(path=\"sentence-transformers/nli-mpnet-base-v2\", content=True, objects=True)\n", "\n", "# Create an index for the list of text\n", "embeddings.index(data)\n", "\n", "print(embeddings.search(\"feel good story\", 1)[0][\"text\"])" ] }, { "cell_type": "markdown", "metadata": { "id": "hHGvhZm-ZTzL" }, "source": [ "The only change above is setting the *content* flag to True. This enables storing text and metadata content (if provided) alongside the index. Note how the text is pulled right from the query result!\n", "\n", "Let's add some metadata." ] }, { "cell_type": "markdown", "metadata": { "id": "BYWUFBUGyKyY" }, "source": [ "# Query with SQL\n", "\n", "When content is enabled, the entire dictionary is stored and can be queried. In addition to vector queries, txtai accepts SQL queries. This enables combined queries using both a vector index and content stored in a database backend." ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "aPH-dnV2ZuL1", "outputId": "c563060c-d292-4b19-aa64-f4c629008cdb" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[{'text': 'The National Park Service warns against sacrificing slower friends in a bear attack', 'score': 0.3151372969150543}]\n", "[{'text': 'Maine man wins $1M from $25 lottery ticket', 'length': 42, 'score': 0.08329025655984879}]\n", "[{'count(*)': 6, 'min(length)': 39, 'max(length)': 94, 'sum(length)': 387}]\n" ] } ], "source": [ "# Create an index for the list of text\n", "embeddings.index([{\"text\": text, \"length\": len(text)} for text in data])\n", "\n", "# Filter by score\n", "print(embeddings.search(\"select text, score from txtai where similar('hiking danger') and score >= 0.15\"))\n", "\n", "# Filter by metadata field 'length'\n", "print(embeddings.search(\"select text, length, score from txtai where similar('feel good story') and score >= 0.05 and length >= 40\"))\n", "\n", "# Run aggregate queries\n", "print(embeddings.search(\"select count(*), min(length), max(length), sum(length) from txtai\"))" ] }, { "cell_type": "markdown", "metadata": { "id": "oH4Yd9BOlo5u" }, "source": [ "This example above adds a simple additional field, text length.\n", "\n", "Note the second query is filtering on the metadata field length along with a `similar` query clause. This gives a great blend of vector search with traditional filtering to help identify the best results." ] }, { "cell_type": "markdown", "metadata": { "id": "lGmiYXyqyjtQ" }, "source": [ "# Object storage\n", "\n", "In addition to metadata, binary content can also be associated with documents. The example below downloads an image, upserts it along with associated text into the embeddings index." ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 307 }, "id": "Ef4-Gd8ZtzUF", "outputId": "aaa811e8-ee3a-43ed-dab3-994ca6014a64" }, "outputs": [ { "data": { "image/gif": "R0lGODlhlwQ4AvUAABITFMzMzMTExKurrGtsbCYnKJOTlIeIiHN0dLOzs0pLTJeYmHt7fFdXWLy8vJubm6SkpBobHBUWGBcZGBAqHyUdKPRDNmglIcw7MOxBNYcsJqczKts+Mrg2LeqA/Mxx3HREfaVdsth36Y1RmLFjvuN89bxpyw80IxIbGgOp9AaQzgtdgwSg5wlwnwd+tADmdgl8RQS2YAHccgeVUAPEZgWsWwLLaf/rO2pjItzLNe3bOIV8J7CjLsi4MryuMAAAACH/C05FVFNDQVBFMi4wAwEAAAAh+QQEFAD/ACwAAAAAlwQ4AgAF/yAgjmRpnmiqrmzrvnAsz3Rt33iu73zv/8CgcEgsGo/IpHLJbDqf0Kh0Sq1ar9isdsvter/gsHhMLpvP6LR6zW673/C4fE6v2+/4vH7P7/v/gIGCg4SFhoeIiYqLjI2Oj5CRkpOUlZaXmJmam5ydnp+goaKjpKWmp6ipqqusra6vsLGys7S1tre4ubq7vL2+v8DBwsPExcbHyFwBy8zNzcnQ0dLT1KTO18zV2tvc3d6A2Njf4+Tl5ufK4c7o7O3u7/A46uvx9fb3+Obzz/n9/v8Age3LFrCgwYMI3wwI0KDKwGUJI0qceK6AAwEF2ixsyMJiggg2GuwbAOBhAIooU/+qhGaAoZuNLggEOBDyok1mNh+UfLiyp8+fuRQEIPnSpYsERnGIhGDCJNCnUKOmepCUDcyYM3UsbcpTqtevYC9ZxJhCwQGkAgYcUHCiAdW0CDKWKHBWwLIDckccaGYAQIOFafMCKIAAcIK1JGDWPSyYxNjGNLaWcBq2suXLh2T2RSFy2QC0mxN7hmBT8Ni0pJexHWF2cWcDpEl09nxXtEzUnimgoMrxhmQSlDELH07cTksCKZAiH1Gg8V4IeREMnUCCgODbJ07TNDEWL2vBgB/IFRpguQnp231Pn9y1uPv38M1chDzCLn3mEEuQ7o0CKeTTEpywV2j95adXADq1tZ7/ekyxN1B8EEYoIRbL3AcAVQtYiN55WamwQFUijMUfcCCasFB6Ivw2l4E1NUjigxPGKOOMRjyWQgHh0deSOgQSBlgzqzlWYXZDqnAVawuueJJSSYoQHI1QRiklDTaq8JdnQYpAmloLdOnlAgggOdQBCJSJVJYhFqkkdUaCKNRHRAZg4QsqOtnelHjmqaeQS7KgQGrmXVgeCxcFKiiag6nJ55wAHJlik/j12SJXMO5p6aVQKroCdiNsqIJHJpZYJVeI6ucmpGkKkEOdO1WK6auwwmffC6PWasKbSiJq64EonuCoX6gCSxSDlO4T67HICkeVocxZVx2LgiaQJWFyiSim/6iaihkXc/z9iusJnhLroLHJlmtuVOEqmQAEqZW4owPsXiTngUOldkBLQRJwb2pqMTAiALfRhqBoI34b6r8zsPrkuQw3PNGoJRDwAFqMcVZXWojJxm9D0gWpDpxzXXxYlt6iumvCkC7s8MosA7RfIJoNoXLLNNcMD6t7+CfznTb37PM5+P4R886u/mz00dtAnAeoRMyM9NNQRw2D01JXbbXVVF+t9dY+Z83112CHLfbYZJdt9tlop6322my37fbbcMct99x012333XjnrffefPft99+ABy744IQXbvjhiCeu+OKMN+7445BHLvnklFdu+eWYZ6755px37vnnoIcu+v/opJdu+umop6766qy37vrrsMcu++y012777bjnrvvuvPfu++/ABy/88MQXb/zxyCev/PLMN+/889BHL/301Fdv/fXYZ6/99tx37/334Icv/vjkl2/++einr/767Lfv/vvwxy///PTXb//9+Oev//789+///wAMoAAHSMACGvCACEygAhfIwAY68IEQjKAEJ0jBClrwghjMoAY3yMEOevCDIAyhCEdIwhKa8IQoTKEKV8jCFrrwhTCMoQxnSMMa2vCGOMyhDnfIwx768IdADKIQh0jEIhrxiEhMohKXyMQmOvGJUIyiFKdIxSpa8YpYzKIWt8jFLnrxi2AMoxjHSMb/MprxjGhMoxrXyMY2uvGNcIyjHOdIxzra8Y54zKMe98jHPvrxj4AMpCAHSchCGvKQiEykIhfJyEY68pGQjKQkJ0nJSlrykpjMpCY3yclOevKToAylKEdJylKa8pSoTKUqV8nKVrrylbCMpSxnScta2vKWuMylLnfJy1768pfADKYwh0nMYhrzmMhMpjKXycxmOvOZ0IymNKdJzWpa85rYzKY2t8nNbnrzm+AMpzjHSc5ymvOc6EynOtfJzna6853wjKc850nPetrznvjMpz73yc9++vOfAA2oQAdK0IIa9KAITahCF8rQhjr0oRCNqEQnStGKWvSiGM2oRjfK0Y56//SjIA2pSEdK0pKa9KQoTalKV8rSlrr0pTCNqUxnStOa2vSmOM2pTnfK05769KdADapQh0rUohr1qEhNqlKXytSmOvWpUI2qVKdK1apa9apYzapWt8rVrnr1q2ANq1jHStaymvWsaE2rWtfK1ra69a1wjatc50rXutr1rnjNq173yte++vWvgA2sYAdL2MIa9rCITaxiF8vYxjr2sZCNrGQnS9nKWvaymM2sZjfL2c569rOgDa1oR0va0pr2tKhNrWpXy9rWuva1sI2tbGdL29ra9ra4za1ud8vb3vr2t8ANrnCHS9ziGve4yE2ucpfL3OY697nQja50p0vd6lr3uv/Yza52t8vd7nr3u+ANr3jHS97ymve86E2vetfL3va6973wja9850vf+tr3vvjNr373y9/++ve/AA6wgAdM4AIb+MAITrCCF8zgBjv4wRCOsIQnTOEKW/jCGM6whjfM4Q57+MMgDrGIR0ziEpv4xChOsYpXzOIWu/jFMI6xjGdM4xrb+MY4zrGOd8zjHvv4x0AOspCHTOQiG/nISE6ykpfM5CY7+clQjrKUp0zlKlv5yljOspa3zOUue/nLYA6zmMdM5jKb+cxoTrOa18zmNrv5zXCOs5znTOc62/nOeM6znvfM5z77+c+ADrSgB03oQhv60IhOtKIXzehGO/rRkI40tKQnTelKW/rSmM60pjfN6U57+tOgDrWoR03qUpv61KhOtapXzepWu/rVsI61rGdNa0CHAAAh+QQFDQAUACycAAwAHQAgAAAFliAgjiQZnGiqluyovmnbwrTM0rBd4q9u8jGfCBgUElHC4TGQBCyZyWdTOmo8HIIEouBajhanBATr4Dq9gEMAYpYg1pPzEVA4tUXjhpwIeB9YfntAaTwGgjwAYwMLjI2MCIc4hAQ6UoE2UnUBdztoBmsKJAUMXFQRnycQY3aRNVUHCQJZB3qtOVFoRrk+pry7lb+YwTJPIQAh+QQFDQACACyqAAwAHQAgAAAFfCAgjmQZnGialiypvmgrw7Dc0q/N4qpu8jHfCBg05H5EEeHANAYiSCCrcIK6iM8SNXtNaqtR3hTclX65Q+wYLcKyAVtr2kuKh3FreVt91gPceXc0gWVifYI1h4V4AA0ITBAnD0wEBX98AzyWgEJ7dEKcnaGgfKKlpJ8+biEAIfkEBQ0ADQAsuQAMABwAIAAABHYQyElpuDjnyqf+WNeBoMiRn1mhmmqxoSvBIfEpKx0AyuEfggDuReMkhLli5Tj06IxIIgzanD2XUaeSwkxOsVWAbgf2sqhmFFp6LrPVbu2Xm7VuJ903iUdg+IIDBgcIBWJPDCwEhncqYzJ2cy6Oj5MylZJXljoRACH5BAUNAAIALMcADAAdACAAAAJFhI8Yy+0Jj5stRootxFQnPnkKWIkGWZooY55r0AIv3M6xXb+3nq+730P9hEHS0FgEHZVJztLZzEQ701D1cU2JcCped1UAACH5BAUGABIALNUADAAdACAAAAWLICCOZBmcaJqWLKm+aCvDsNzSr83iqm7yMd8IGBQCiCejCBlQHpFOZhSKCBQMAkfjmm0sqYHEYHGCOLCCyJMIqDrUZivgEFCsge3AQVR98AMEdzx5gYR/gVJVXoaGiXWHkI6LBICRYJOVjZeQmmyKnJSIm4yhgjh5mIWlUkqsRq5CsD6yOrQ2tjNIIQAh+QQFBgAUACzkAAwAHQAgAAAFliAgjiQZnGiqluyovmnbwrTM0rBd4q9u8jGfCBgUElHC4TGQBCyZyWdTOmo8HIIEouBajhanBATr4Dq9gEMAYpYg1pPzEVA4tUXjhpwIeB9YfntAaTwGgjwAYwMLjI2MCIc4hAQ6UoE2UnUBdztoBmsKJAUMXFQRnycQY3aRNVUHCQJZB3qtOVFoRrk+pry7lb+YwTJPIQAh+QQFDQACACzyAAwAHQAgAAACRYSPGcvdCZGbLFpKbcRTQ+54CliJB1maALqoBhu4KyvDNe3aOa7qPW/yBYEiYZHoMSaRGmWTeYFupB/qCHXD7rQ/7hBVAAAh+QQFBgAVACwBAQwAHAAgAAAFliAgjiQZnGialuyovmjbwrDM0q9d4qpu8jGfCBgUEk/C4TEJOAaYTuhS1Fg4ThDCxDU1YCEJrERJFBUakVFhEEA3p61G4EAG2gqBR503ql5Tem9lEl55DAgIB3l7OAAEAQMFJHiBUV5ufYuCdhBtJV6VS4oGWyKKmlF4XydyoYMFB2EJB5KocD5RSblGtzq7uL02v75HIQAh+QQFDQACACwPAQwAHQAgAAACRYSPGMvtCY+bLUaKLcRUJz55CliJBlmaKGOea9ACL9zOsV2/t56vu99D/YRB0tBYBB2VSc7S2cxEO9NQ9XFNiXAqXndVAAAh+QQFBgAEACwdAQwAHQAgAAAFgSAgjmQZnGialiypvmgrw7Dc0q/N4qpu8jHfCBgsKQyOU4IBIJ5YChQE4jg0nSyqYfIj7gKKr/dXEAMBhccgIahdvekpNbCWR95nspn3LXfzLgF+gWOBg0NYen98ioSAiIciTgF9eziVi5eNiIWQljRCnI8+k6GSiaGlpqqpqEKTIQAh+QQFEwAUACwsAQwAHQAgAAAFliAgjiQZnGiqluyovmnbwrTM0rBd4q9u8jGfCBgUElHC4TGQBCyZyWdTOmo8HIIEouBajhanBATr4Dq9gEMAYpYg1pPzEVA4tUXjhpwIeB9YfntAaTwGgjwAYwMLjI2MCIc4hAQ6UoE2UnUBdztoBmsKJAUMXFQRnycQY3aRNVUHCQJZB3qtOVFoRrk+pry7lb+YwTJPIQAh+QQFFAAPACw6AQwAHQAgAAAFkiAgjmQZnGialiypvmgrw7Dc0q/N4qpu8jHfCBgUAognowgZUB6RTmYUOlIcEgLHoeCiAhAoCDbQGFIbJ0X1xH0Sl+QSmAF/KwIJ1n1QB379bn5gWguFhoV9PH8LPlJ3DhI6UhNZZTaTBGkTIwUEaoGKIoMCA2KaoDgknQNZCQYEEYmpSpNGtUK3jV64u7pvtEghACH5BAUTAA0ALEkBDAAcACAAAAR2EMhJabg458qn/ljXgaDIkZ9ZoZpqsaErwSHxKSsdAMrhH4IA7kXjJIS5YuU49OiMSCIM2pw9l1GnksJMTrFVgG4H9rKoZhRaei6z1W7tl5u1bifdN4lHYPiCAwYHCAViTwwsBIZ3KmMydnMujo+TMpWSV5Y6EQAh+QQFBgACACxXAQwAHQAgAAACRYSPGMvtCY+bLUaKLcRUJz55CliJBlmaKGOea9ACL9zOsV2/t56vu99D/YRB0tBYBB2VSc7S2cxEO9NQ9XFNiXAqXndVAAAh+QQFBgAMACxlAQwAHQAgAAAFrCAgjmQZnGialiypvmgrw7Dc0q/N4qpu8jHfCBgUAognowgZUB6RTmYUOiIMBIKHIhJ4LKmSxSkBwSK6X6KIkKyevE+1xBFolMpwaaFNYuehDQEJLApocUAAgQMse39qioyGenwjgY6IE1h2JAaSVGcOBW6echMPY2VdpJhrVwIDDYWXPDqyaawtbAe3tDZlBLw4a5ujooe9p4ILyXVDVAUEDwlYCQfGwTRTRCEAIfkEBRQACwAsdAEMAB0AIAAABZ4gII4kGZxoqpbsqL5p28K0zNKwXeKvbvIxnwgYFBJRwuExkAQsmclnUyoqIAYCAYQwcS2rjhMEGxgUlEdRInCIVB9lifPbOMl/innaEOCWDmx6RABYDSwEAQ+CQHN5JXUDizyEAY4kiBCSOAB8fiSAbVKQd155VGsGFG9xmjUABWFaZAlnrTlVV1kDCG5og1FfwGnCv0bBxsPIxT5PIQAh+QQFBgAGACyCAQwAHQAgAAAFliAgjmQZnGialiypvmgrw7Dc0q/N4qpu8jHfCBgUAognowgZUB6RTmYUOmpAHIIBouCiRq6BAVigGFIPYa4IkVxCCyf16BFAuIkAQuDBaqSfeAx1LHBNgEAAaASEbVKKjIZSenwlfgl3iIVyInQMmDwiaANybAJqUpwnYgInZZ84JFasCQebqEa3Qrk+uzq9Nr8zXrpIIQAh+QQFDQATACyRAQwAHAAgAAAFlCAgjiQZnGialuyovmjbwrDM0q9d4qpu8jGfCBhsFU5DYsB2XAKUTiPyqWROoVYnVtR4OASDRjNJFDFQkO/hWlWcCCNCELsIHEoGdlkQUJQaekBNBSVjVESGJIZQg4WBPBJ8hCRuWlV1DHiPOACVDSOAmzRmJwleAQh8ZECgaQFhAAOWZUKHrLVbQrk+uzq9Nr8yUCEAIfkEBQ0AAgAsnwEMAB0AIAAAAkWEjxjL7QmPmy1Gii3EVCc+eQpYiQZZmihjnmvQAi/czrFdv7eer7vfQ/2EQdLQWAQdlUnO0tnMRDvTUPVxTYlwKl53VQAAIfkEBQ0ACAAsrQEMAB0AIAAABZEgII5kGZxompYsqb5oK8Ow3NKvzeKqbvIx3wgYFAKIJ6MIGVAekU5mFDpSHBICweCgGFIBDdQAGzB4iSMsQTIqFM5A73v2fZTnu29hcDrg4TwlDWQQXYA4LQoQJ2tLXywESY5oMgUnc1IABQR/kQ6HNCKWAQkQiycNoDUiBA+Efi6PPplGtEK2s7I6uLu6NkwhACH5BAUGAAYALLwBDAAdACAAAAWGICCOJBmcaKqW7Ki+advCtMzSsF3ir27yMZ8IGBQSUaQCQTE7BkiNAKJ5JBGkVCLpgL05C5AEbQpwBsBhxykcJgyd2+4OPuKS59W6/JcX2bNAcXd8WnqDLnR+e4h9AAwBB4A8UCcFXo0TA2sPmm+YEQiaJ5FliT5mQp6FRqY6qKmvrJ8yZiEAIfkEBRMABgAsygEMAB0AIAAABZYgII5kGZxompYsqb5oK8Ow3NKvzeKqbvIx3wgYFAKIJ6MIGVAekU5mFDpqQByCAaLgokaugQFYoBhSD2GuCJFcQgsn9egRQLiJAELgwWqkn3gMdSxwTYBAAGgEhG1SioyGUnp8JX4Jd4iFciJ0DJg8ImgDcmwCalKcJ2ICJ2WfOCRWrAkHm6hGt0K5Prs6vTa/M166SCEAIfkEBQ0ACAAs2QEMABwAIAAABYYgII4kGZxompbsqL5o28KwzNKvXeKqbvIxnwgYFBJPwuExCTgGmE7o8pAyABoDgSBRiCoYB0hg0DgZIInmctTVHiY/Iqk9kOzWonbjhge0C3xybCeAd4J5hIFAc4mGi4MBhXGPiJGKPAAEB2cnEAYMe0pyOGmilD5RSalGfTqrqK02r65HIQAh+QQFFAADACznAQwAHQAgAAAFoSAgjiQZnGiqluyovmnbwrTM0rBd4q9u8jGfCBgUElHC4TGQBCyZyWdT6ggUWIpAwrkEIAII1gHMPQIK2snOWiaKFoEGKQtRmgF00ljeBrrYShR2biIEZHgBBn93Z2kAe4uEb3ESVRKRfiNZBlkMP4wiCQJjV5g8JYaOpjg3h6s1JaKlrzlzia2gj5RroA2qn24IBhAnCjNdYwIQxse5Nk8hACH5BAVJAAIALAwADAD4AUAAAAL/hI+py+0Po5y02ouz3rz7D4biSJbmiabqyrbuC8fyTNf2jef6zvf+DwwKh8Si8YhMKpfMpvMJjUqn1Kr16hJot9yu9wsOi8fksvmMTqvX7Lb7DY/L5/S6HT656/f8vv8PGCg4SFgIlmeYqLjI2Oj4CBm5hShZaXmJmam5OUfJ+QkaKjpKeudZipqquspqedoKGys7S7v2Woubq7tLesv7CxwsTOg7bHyMnIxWrNzs/CzMDD1NXb0qbZ2tve0qwf0NHv6ILV5ufv5Gjr7O3u6l7h4vLw4/b39PXY+/z2+s3w8wIK5/AgsaVEXwoMKFnBIyfAgRksOIFCsSw4Ixo8aNxBsDePwIEiTHkSRLmjwRMuXHkyxbunzpQKVKmDRr2uwoM+TNnTx7Msmp06fQoUR3ABVZNKnSpSuOrmQKNarUDU49Tr2KNSuDqgG0ev0qlSvYsWSHii2LNi3Ms2rbuuXI9q3cuVPi0r2LN4ndvHz7/tjrN7DgGoAHGz7ctCrixYxTFG4MOTKGx5IrW46p+LLmzQ0oc/4c2TPo0YhFkz4d2DTq1XhVs3791jXs2Whl07791Tbu3Vd18/7N1Dfw4USFEz/OswAAIfkEBcEAFQAsDAAsAFsCYAAABv9AgHBILBqPyKRyyWw6n9CodEqtWq/YrHbL7Xq/4LB4TC6bz+i0es1uu9/wuHxOr9vv+Lx+z+/7/4CBgoOEhYaHiImKi4yNjo+QkZKTlJWWl5iZmpuGKiwKnGIrLC13pHgKLZ4sKqV/oxRyJym0tSuhYaOuVKMuZqu3RLUnVAq0u7hdq7F0sy7ESqMpBUXGtBJE0r7FtSreyHsFLSkK1EcnqiyswUW17insUC3z6fFUBSugSwr2gfzYS5ZV6dXln5FV24RYm3bPk75kXQjOkeZOxcMhFBkSWQhNCMIqnsD1ObHKHbhx71IkFDKrmzoW/aKMixnFGE0inlAUyslEIi//lco04mTFQqeQcZ46QlyKBCXSdEpbwkxaLUXIIQWsAqWi7uLIly5UjltJwRM7a/GMkc0yE4suJuqMDopbxqcWukVCkhuiVynTvyxpUTsFwBwRF8GoblRpFSPjtU/U+eUzFgAsJJNHqVgsEkvbK5+TZBUaaLRcUVu1mD4IL7VYofhKuvAK4KWtI0BTqVNBc8Uq3qedwEJaLqzFIivC0gLezpfudcGZKDbiTEjGa0bGnRCHcDKA59ClGNtcu7MRyVWhHm0NWbQLb7bdGV2I3fhxjL/tGeOn7tZTKQ5ZRto+2FkHjxehGZGKJ61E9V58t1lXUkrR+babfu8UZh8odvG1/9cTFqZUoIAZihZiKxd1iFRwJ9oToog6JcWKgQpM95I3wCBHT3+48UZKcrsVQdJyv0W3hGatEImSXCjhuBxAQlilC5CpOTEObQa6gs48AhUx0zchetUkfAdG0RI2hC2BHmej6MOKWlAUgKNsOBp1QnLKteRCWIbJ9tJD45Gym6AfPhHWLZc5MV42ZXKR4GFE5sgXmXMyZ5mTVs152pCsFIkVnpKps2dRUZJKxGogYlqpXFs+aKqCtn3k2EornvqbpxJSqikASWnH6zTTKXCRNYZ52agwI4520VUempeENDEKBsCVn7LKbKmvKgvFWxq2YA63p444hFOQkhdYoShg2f8Eg7M4i2yx36lUnVpwcjUgazNmZ5Vh0CokrTQcHtuENCsk2kRl44q15wreSfGogZ8MYQ1tDwspbl5VhtUZrkwK3OETqCpx5jnHGFYOown1OxQyGlN38a8n2IRCY9MdUfG0ApdqT2ianXdvNFtJBO4R9eps7E1EB01LMHy6/GrCT48cmLlXcLovXAPCmVObRUuxZhJdhjvgof6SZw021cnkDtJiK4XSO+5amfOvLjZ29BIhI2f3u/jCOzXG6jKRN2Yv49zerP4+LSDVpTKD1csBqiRzzUYMDWlMtIiZWoDGBn4Et6DP/fhKmRsbt8XkyUp5YS8jzPe5bG87oedf++v/i3Y5dR1F7UdcizKsZm81C3lpm/l2mmBXqZA5dx4DWs5Sv54w24N7SDG6Hik/1ENZMR5n4RYrXqrnApYyMWvXe1V9gH3BnI1yazc1t7S/Yyui6EiEHszQC8ZH+oAfUxOpVMKTl0UvYSIR10IYdhcy3YR3cFJLbrQXmZ+tzGbaG1nRhEdB4TBmbxj03ufoR4WHLcpnfrnZ4ywIoXf0w3d624XlvmdBsZGshllqCebut7TwRSdA0kidRuR0jIIBiSZkQ2EMh3CMPTnRieTT2/7KBC7lLAxI/3NclqKQlCmRo3g21FcC70URb2HhFGdDAgS38qd4HQ5rDbMeBvuxmg1u/wOMUYBFWUS3JCdwroQ5C+Af10M98DXxiVBEnxLwsjqQgS+MPjwSAZ/kM0Q+UX2Q24uesgcN1xkIifMDoGtwCCIqmnKKKigWGC8WQCVoRzs20d25MDhGLWZjFVFUkysaWZusbaWPsqxgHLOHpYdpkIN37ODAGBLMWvkRfwfL2QnbgT0VzlKNpNyeKw/UPStUz2k3tGXlaqGbWjphfRp5DzQUw7ubJRGSiMte7JY5xXquxytdY6Uy80cOb+hsbgdcjzmT4MkpEIZ3TPTlNmrkimA6AaGAG+FaTmjHwLxRkrGYoXH8poRBOgx6FxucNVlnQRh2FHuoE9A8RfPIFWKTfP/9okBXMLZSdGKJnQMyKSHBuUQDiVAK+vskMY2VxZ4+wSZbMQtKSUpLJSZhmtuCBinEAUJqwiuYDm0CRLXptBRupaKsu6iCTgaL85WLoyzN5jZjAsOemQ6O/Pxp75YKKXSoNa1ohSek4uaTNDpGrk7lKsbW2Siq8jGUtrQLg4qVijyeUqiG61iV9Hk61H1IMz8LaGThKazTtMxry6lIHNcImazC9aTqciYKVAZWPDLhePEbCmBRMBsoaeN5MQGYxGZaFfFhzFmL3cjG6LrbVkJBp5GE1bFiVr/1MC64EhuuSDzKycQlSUqFTc48wNKCfFyzuYWh00sca0+VnqtTmZv/rCgrS02NLCQb2s1Pd5lR0JkJJYhk8m1PXPWj6PCjYEUsmO2SJtYkQDQV27XKwoKjnPwCCpkWNV5JGqQ3l8TqvBMqMHUKluBWdPciY5IUvpIkwoU8KB6cUlV0O7xgbBL3qLU4MUbi28T5MopIQYpns2wVqfEql1C3oK77FGKfW8gscfdL5nor5yoKb+ux5mVJOiis3sTuEwnQtS/VJkQhgQY2Nr8x4xWQV+GULJSCphWgd+DXZeSEmbEQDutAfnadd5irf6xoQS6TxkOygEfP0QgzEsBzn/e9GWow4qd+a9LgQnP5HX5BR5hTpLyWiKnJYlZQo/cMmE7jhr2exmio/82Q5lGb+tSW4TSqhbTSVVOBl66OtaxnTWsyDK/WuM61rndtr1bz+tfADvZfJJ1jYRv72Mheip5AnexmO/vZ0I62tKdN7Wpb+9rYzra2t83tbnv72+AOt7jHTe5ym/vc6E63utfN7na7+93wjre8503vetv73vjOt773ze9++/vfAA+4wAdO8IIb/OAIT7jCF87whjv84RCPuMQnTvGKW/ziGM+4xjfO8Y57/OMgD7nIR07ykpv85ChPucpXzvKWu/zlMI+5zGdO85rb/OY4z7m6A8Dznvvc5zoPeq1/TvSeC/3osS560ZHO9FMrnehNj7qnn/5zqVudKVQH+tW3Hv+KrBud62B/wwAC0IAzeJ3nYSd5ARwggLwSYuxlF5wAEhABK5w9AGkfuQHI7gi4O4EAATiA3c+e95ArIAADeITfnZAAvlPh7oUH+QMc34jFNwHwBqgC5CPf8bW3/akHaPzcD4ClBkx+7gjwWwFCLwCeHwBeB/B55hsw9rmrEgG1TwDpieB31userZ53uxI2z/mNY/6pPR+A6DNfhAXwPAEQYLsDVJn86PN8WAzwfQNcH/2NtP75rud9AAA/d9HTHQmTj7sUiF/8jO+dAEloPPyxAnvEGwYB9icCAYpF/nM8X/DU4XomUyy19wDmcHjjhwT4B4DrR3jtp3Fsl1et53b/wdd8lIcEjad6zwclRBB7zBd/aNeBAfAAg5Z/Deh1D6hxPJdXk7cAebWARgCDSuB8tOF56lcErUc+Y8eACoF4mBGCJ5h1KYhxFYgZBchRe/d0H1gYuMd2PleDK+h/ASB8lteD5xeAeDcF7DeEE1eEyPd8XhF9A7AAZFiGZIgAEtN6uocAbNh4UDiFUjgBS1CF32GCRhCFQUh1XGhxXqgECmB98ycE78cEbBeIgniBLIGHQqKIGIiIh3eFi5iFefh0e2hxjLgE/TcEMigadigEdFgYlzgE1zeHjtiJWAGEUbCFlQhxEwgyitiHCuKDpzKKkYhWzseDR0CHj1iCiaeF1A64ihI3eYZ4Kvunf6gIAHuXABexesxDi3XojKcIh0TDc6mHFTeoi6YoBJuYir8IjBC3jbOIeBBgfYgYAUnoAOPohIYRe+LIdgewdw9BAO9ofWPIADeojbLHcyQ4BNgIieJ3j0+git7IcLCoiQ9gfq83aKw3evdIe+VXdvj3EE/njyyxkLp3Ef0ohcKXBAI5kAsXfQC5Ccf3eN3okQ23fb2YDBmoeSVpkgwHj8kwkiSJgi75cAV5CZ5XdyxJkzXZk2rQkT4ZlGAAlEJZlFtAlEaJb0EAACH5BAUhABIALI4AbAAcACAAAAWzICCOJBmcaJqW7Ki+aNvCsMzSr13iqm7yMZ8IGBQST4NAQbYIKI6BpkKWLEAPTplDCSUECFoBANr4isoJV5ocFWGVAOtjfJSLEskG4HmgEyMBA3FfAn1lCH5AYw57Tg+Ch4k8AEkADEoImFlQAAZKEJBOmVOcWFZ9gAimkjiWhHqUEJ4RrDSNs26FYrU1T1sjaGm8OVZtIsVzwz0TJ2AjCcacQsor09JGR9PURT7X3dnWRyEAIfkEBQYABwAsnABsAB0AIAAABYUgII4kGZxoqpbsqL5p28K0zNKwXeKvbvIxnwgYUwQSOiIKYETalCfmMQmVOmXQgJWq3CogAgFE4ao2GUfwiTA0oxojQxSQlQbYo8KJXDcGCiUJAXB9cyRghG5XI4h0iiyNhYsikY+Bg45dTZCYkpyJmlOXoERCbV2mdalVq6hCqq+ssUohACH5BAUGABMALKoAbAAdACAAAAWVICCOZBmcaJqWLKm+aCvDsNzSr83iqm7yMd8IGBQCiCejCBlQHpFOZhQqKiAggtNC4aJKHKcE9oQYehvckSJbWHpbh0DDTdQRAgQ6kER4gFN5T3UABQlaCIh+gVIABgEHEXx4ejxubZKLVEk/mYOGgYRjnXsADWEQhgN3o5UiCgtgqhFrrDhTg0aMuW8+ukK+vbw6TCEAIfkEBRMABgAsuQBsABwAIAAABYIgII4kGZxompbsqL5o28KwzNKvXeKqbvIxnwgYFBJPpAJBMTuSGgFEk0giRKdA0uF6OxYgCZoUcAx8wY4TGEwYOkfb8e4timN5Wu6cCtf/+HV+LnQAdl2AhYJuiAwBB3c4T0iHWSQDag+Xi5VVlyePZIQ6ZUKbeKWkqKI2qUarMmUhACH5BAUNAAIALMcAbAAdACAAAAJFhI8Yy+0Jj5stRootxFQnPnkKWIkGWZooY55r0AIv3M6xXb+3nq+730P9hEHS0FgEHZVJztLZzEQ701D1cU2JcCped1UAACH5BAUNABMALNUAbAAdACAAAAWVICCOZBmcaJqWLKm+aCvDsNzSr83iqm7yMd8IGBQCiCejCBlQHpFOZhQqKiAggtNC4aJKHKcE9oQYehvckSJbWHpbh0DDTdQRAgQ6kER4gFN5T3UABQlaCIh+gVIABgEHEXx4ejxubZKLVEk/mYOGgYRjnXsADWEQhgN3o5UiCgtgqhFrrDhTg0aMuW8+ukK+vbw6TCEAIfkEBQ0ABgAs5ABsAB0AIAAABZMgII4kGZxoqpbsqL5p28K0zNKwXeKvbvIxnwgYUwQgOiIKYETalCfmMQmVOmXQgJWq3CISAseh4Ko2B0dIWKAYmlFt0SEKyEoDjR2+/taWGAEHfF1NLARTdoUlDYhvA4aNhHQkgIKJJ3k/eZeTE3MOEYNEUgdoA2oncZcEEV8nY2VdQqJAs7Q8tnazukK8Pr5cRCEAIfkEBQYACwAs8gBsAB0AIAAABZAgII5kGZxompYsqb5oK8Ow3NKvzeKqbvIx3wgYFAKIJ6MIGVAekU5mFDpSGByCxKHgogIYpwQEGyAMqY2ToqpeUrGNEiEAcRMBhWQpH+BK0wM3AWt/dC0Jg093gIKEUHwTeyd+b4kkc4GKQCJpAmucbZo8I2B0YycIZ3dVV6ifdptKUrJeQrNGt7a1Prm8SCEAIfkEBQYACwAsAQFsABwAIAAABYEgII4kGZxompbsqL5o28KwzNKvXeKqbvIxnwgYFBJPwuExCTgGmE7o0hAglArI5rR6zUapVhL2qSUCwF3ylyv2bsOjsdKMbqvf6TnwzB413HQBByVUd2Z/AQojBEFREwMnCRAOVQKGewAFDAknD4qQejxSZkmORkulqKekq5itPCEAIfkEBQYACAAsDwFsAB0AIAAABYUgII4kGZxoqpbsqL5p28K0zNKwXeKvbvIxnwgYFBJRpQZBcjsGSoNn87iTVqmjwmkyJQII0RrA+QRDwuc0l0zSWn9YkZt79Wa3XWAbX9ff3y5Oe4BDgn90cHZyfIl+IwkBB0yNPEkoZw4NY4YkCgYOJwMKm3E+bEKkiqacq6U6p0asr04hACH5BAUTAAIALB0BbAAdACAAAAJFhI8Zy90JkZssWkptxFND7ngKWIkHWZoAuqgGG7grK8M17do5ruo9b/IFgSJhkegxJpEaZZN5gW6kH+oIdcPutD/uEFUAACH5BAUGAAwALCwBbAAdACAAAAWIICCOJBmcaKqW7Ki+advCtMzSsF3ir27yMZ8IGBQSUcLhMZAELJnJZ1M6KiAGAkHioFAeR43syXE6eImiQrkAZjuXokPAYKNmuzKpOuCewfcTdX8nfTdwEid4fl8AC3OCjAonCH0FDWdAIwQpCScPmDwkBQedWgZ4VEaHqoysaFGrPqmysTpPIQAh+QQFDQAFACw6AWwAHQAgAAAFeSAgjmQZnGialiypvmgrw7Dc0q/N4qpu8jHfCBh0qCA7YkB0aB4gAeSPyGpEk9SSVepSVq9T4Jc79GrB3SxpixWfySLl8t3mjeu4e9hO3+f7aW5raGVqI2x+NCIECE0DJwsHCARxZm5yQoWCQpiZAJ2ZoJxmoaSjRCEAIfkEBQ0ACwAsSQFsABwAIAAABXkgII4kGZxompbsqL5o28KwzNKvXeKqbvIxnwgYHCkcg8KOGCgxTo0lsXRMKH9TIYDZ1HIBhdfVxQQjDgbIaTwsL9lb9w/+fUuBtwBdTt5n53c8eX54dliFgIeCCmdpJwMGBwhKX084BHF/PnVCnJt8Op6hoDaipUwhACH5BAUNAAsALFcBbAAdACAAAAV/ICCOJBmcaKqW7Ki+advCtMzSsF3ir27yMZ8IGBQSUcLhMZAELJnJZ1MaYDQSAkPkmo04lxOUwRGABMbV7zGcVpwOgEJZTWQXlAolHch2Qb97PH16elQOfoiGiIVgJ4uAioSQjYeSAkyRgIxrjpaYYFGgRqI+UqFrp3WpfKs8IQAh+QQFBgATACxlAWwAHQAgAAAFlSAgjmQZnGialiypvmgrw7Dc0q/N4qpu8jHfCBgUAognowgZUB6RTmYUKiogIILTQuGiShynBPaEGHob3JEiW1h6W4dAw03UEQIEOpBEeIBTeU91AAUJWgiIfoFSAAYBBxF8eHo8bm2Si1RJP5mDhoGEY517AA1hEIYDd6OVIgoLYKoRa6w4U4NGjLlvPrpCvr28OkwhACH5BAUGAAgALHQBbAAdACAAAAWEICCOJBmcaKqW7Ki+advCtMzSsF3ir27yMZ8IGBQSUSVCY3YMkAoBCPP4jE6JpIb11gQcBgKYFNB0GiDoExp9GHZH2vHuLYpfgdntnArX//h1fi50AHZcgIWCbogKJ3c8VQFLe1gkB2oPYAVkhHUQYQ4Qm2VCi5VGnTakpauoiDqtPmUhACH5BAUNAA8ALIIBbAAdACAAAAWSICCOZBmcaJqWLKm+aCvDsNzSr83iqm7yMd8IGBQCiCejCBlQHpFOZhQ6UhwSAseh4KICECgINtAYUhsnRfXEfRKX5BKYAX8rAgnWfVAHfv1ufmBaC4WGhX08fws+UncOEjpSE1llNpMEaRMjBQRqgYoigwIDYpqgOCSdA1kJBgQRialKk0a1QreNXri7um+0SCEAIfkEBRQAAgAskQFsABwAIAAAAkSEjxjL3QmPmyxGSi3EUyfueApYiQZZmuhinmsLrAEs029b47e6izmPsgV1QyBJeCQmjSBkU/lkcpxTaFWKoWatWyylAAAh+QQFBgAMACyfAWwAHQAgAAAFiCAgjiQZnGiqluyovmnbwrTM0rBd4q9u8jGfCBgUElHC4TGQBCyZyWdTOiogBgJB4qBQHkeN7MlxOniJokK5AGY7l6JDwGCjZrsyqTrgnsH3E3V/J303cBIneH5fAAtzgowKJwh9BQ1nQCMEKQknD5g8JAUHnVoGeFRGh6qMrGhRqz6psrE6TyEAIfkEBQ0ABgAsrQFsAB0AIAAABZEgII5kGZxompYsqb5oK8Ow3NKvzeKqbvIx3wgYHAQaOuJJZETalAHmMamUOmdVQJNKtBYOiUCCURhmjQSHYBAWU0RQqeBRBigEAQL87CYxAg97XVoBByUNYoJAVod9cYRXIncJijyMJJOVOJcjmQCPW5iOfJF2o4OhnaeLkI2Un1lCsIOyj7WxQra5uD66vUohACH5BAUGABUALLwBbAAdACAAAAWYICCOJBmcaKqW7Ki+advCtMzSsF3ir27yMZ8IGBQSUcLhMZAELJnJZ1MqaiwcJwhh4lqKDFlIIitRHkWFRmRUGATUTq+sETiYibZC4HEHjqxYKXxxZxJgewwICAd7fTwABAEDBSR6g1JgcH+NhHgQbyVgl16MBlxfZJ1+emEndKOFBQdjCQeUnFRGcrqFvHhRuz65wsE6TyEAIfkEBQ0AAwAsygFsAB0AIAAABYwgII5kGZxompYsqb5oK8Ow3NKvzeKqbvIx3wgYFAKIJ6MIGVAekU5mFBoJPACNhyOQQEyGVC5CcEAcBFYwcZkcFU4K9roa8JIMdTmQHScxAgd6PGwFJWOBT3MnhX6AgjiEho6Je3SMI4ePNJGNiFKWkp5hAZcimZSDoJ2aNUqfRq9CsT6zOrU2tzNIIQAh+QQFBgATACzZAWwAHAAgAAAFnyAgjiQZnGialuyovmjbwrDM0q9d4qpu8jGfCBgUEk/C4TEJOAaYTuhSpHgIBIMGYPBsTgkoiCPAuCqJgMJJS712owZyCYH0oq+Kkvp93BdufER+gGdAgzuBQBJ4enVwciRgiTwACmsjlo5fYQkBVpM4mJ8JCBGaaDanijJ7hZQydAauoQ0Ef5iMdoqSCQ8LnZC6lAUNBlxYBnkuU0ZHIQAh+QQFDQAUACznAWwAHQAgAAAFliAgjiQZnGiqluyovmnbwrTM0rBd4q9u8jGfCBgUElHC4TGQBCyZyWdTOmo8HIIEouBajhanBATr4Dq9gEMAYpYg1pPzEVA4tUXjhpwIeB9YfntAaTwGgjwAYwMLjI2MCIc4hAQ6UoE2UnUBdztoBmsKJAUMXFQRnycQY3aRNVUHCQJZB3qtOVFoRrk+pry7lb+YwTJPIQAh+QQFBgACACwMAGwA+AFAAAAC/4SPqcvtD6OctNqLs968+w+G4kiW5omm6sq27gvH8kzX9o3n+s73/g8MCofEovGITCqXzKbzCY1Kp9Sq9eoSaLfcrvcLDovH5LL5jE6r1+y2+w2Py+f0uh0+uev3/L7/DxgoOEhYCJZnmKi4yNjo+AgZuYUoWWl5iZmpuTlHyfkJGio6SnrnWYqaqrrKannaChsrO0u79lqLm6u7S3rL+wscLEzoO2x8jJyMVqzc7PwszAw9TV29Km2drb3tKsH9DR7+iC1ebn7+Ro6+zt7upe4eLy8OP29/T12Pv89vrN8PMCCufwILGlRF8KDChZwSMnwIEZLDiBQrEsOCMaPGjcQbA3j8CBIkx5EkS5o8ETLlx5MsW7p86UClSpg0a9rsKDPkzZ08ezLJqdOn0KFEdwAVWTSp0qUrjq5kCjWq1A1OPU69ijUrg6oBtHr9KpUr2LFkh4otizYtzLNq27rlyPat3LlT4tK9izeJ3bx8+/7Y6zew4BqABxs+3LQq4sWMUxRuDDkyhseSK1uOqfiy5s0NKHP+HNkz6NGIRZM+Hdg06tV4VbN+/dY17NloZdO+/dU27t1XdfP+zdQ38OFEhRM/zrMAACH5BAWMABUALAwAjAB+BIAAAAb/QIBwSCwaj8ikcslsOp/QqHRKrVqv2Kx2y+16v+CweEwum8/otHrNbrvf8Lh8Tq/b7/i8fs/v+/+AgYKDhIWGh4iJiouMjY6PkJGSk5SVlpeYmZqbnJ2en6ChoqOkpaanqKmqq6ytrq+wsbKztLW2t7i5uru8vb60Jy8vfTDCxi8nhSc1NsI0M1E1x8IoRsXTyWc0MtmfwTQUgDMv0FnS2EvL2zI2NeFPMciczd1I4zBg58fVf/rG/ESu7UMUbFgZGOva1WtTzF2ub5bUsXu2ME5BK/fILJvBUYa8MidgVKxScGK7KDA4dvxIZOPKkWAKlrMSEmaYlFvGsSRGTktK/5XUlJSMQdRGtJ2Y6C3J+OXnS3EqzwEc4lKnzT8XycR7QaPoVW1BGZUsotNhF4hcajLBaUbnNG51slJhCvbrlmD4tMQziyVskm127wLGgjcNV8HgxPU86/dIw3dT4gV+pFQJXTGNA2U2MviQ3DAFJ6N53KhkvYRf0N5drKTzGJ15hbCl81lKsdhmXL9m3Rcpyc1FdP+5jUY1p8tbgA+RhluK8EuVkyBPrbxPdSHPBdVuevgXkm+8ufJenTjn+CMep8a8Pmc7lGKi9frOd75K+uTziWTnM12M8U399aYeWfXBkx8l0dlTYFrs6cHefoC41wVp3hmBV3cANLQgTRj6tP+hEA1iwRwTIW0lA19CFIbQRM0BIBFXM4wkUQwiNfYiRUkEY9ONNbToXH43zlREkKet85aPQjHTzFsfUTjEfegdiJ1vIw4Zon7T+NgdUTBOxSWTzRFpDzNc0QjZDEZOE4MUJf2TI5oTxWghnCfGZ40+ZgbU04p1OsYljt9thmaaA0EhGQV0CkkVmewoOgSfjdoZEJ5yPomUky4mSkGCiK4T43R85knVnluJGuWAULbUIRRikpUQkpYOGByQdCo0KwrLsCgrpKYuitoRM9ZokBcIEWqMoqG26NaWzoRDVK7gQNuNQG5aiU+ycy4ZY2FsMgooq+RINiWmtVa6HDIvogj/Ua59DsnrQl8e+aiJb01Vokftjnptqbg1uGRX2qqqJFC8DWXiQm5xqZOezhgJk46BNoyviPMJFC/CEnvEV1H4FtUriR43XBQ/mAKQqqpSmuzbfxmu2sRPW2lpQ0MpaexrqR7jZrHDnBmzJEvMfJlzZCvlR23AKJv0oXQZ83bPXjxjWSa+DwsacjteHRVqswwrDVfXr87VdL+Xdmjw1N2c8G+pBSdUKmTfxEMwrFY7GuBSPuOLm9pNQ2Zlytmd3RjUPcKJIgpuL7mQ4MhmvHBTVw9t6dQ2crRXMVgnFg/W4v25aNHATj33rJTfzXTfUXxDHOI2GNf0Tk8/Q2faMFI+/97aSGMntOQZclz7s1IDLMzeovOZl4RD1hPaqB1e47dIDKMcm0BknbagTePwdYKkPcME/bmKbrP3SKZHcfKj550f6xLZcVql2Fr6tTyBsKLw/bikp+1jyVYAJ1PayhveopYWJeU9j2su2saawDctl62PafVjwttS5JHm3A9RbdsJ90CkQb+dz0k2o6BfNpcNdpHNXO8bC/rys5ntqG8tAnyUl3gjjQWein1G01+HtmI94rWuJX+LDd+EGMOWXakK/GtZDLrxuL+dSD174aDmVtZCBBrxTBjCIAGt0cPUHaY75DDOBTHHMCFlj3lAs1EAqfQh5LlojbRrXlBY5gS6zP/vgX9JY/jYc0TweQFCf5tVYMoHhReW7IUUeFDFMigrA8WvOe87VwQDybxBOtA+izwc/axUnifQkWFwFKHfACCuG0LQCiTUkw0jpiqjZOGTeJQNhsgISlFKD32r5OA77qjL0M1HGgHMpROaUT9aVtKXjYwlDrsRxVbqyVGbdCb+pEYG/vGSlPWxoh+xmRdgWqiKSIHSNSv4yj5CpBkt296HXNNEW7qoiL20zHnK50bHhCec8kgiDImIFEBisxuZcd4S+ijLTspHNIO7ZDSzYMj0sRBwDwXI6qoQSXdu85nmG5YS66jQKQAHkcpU2ShBtqEzouwdE0UfNBMppYoepXH/BjUlByc5zA19EEMVnd8hWVNKLJUwM4HznxxpSslTLlSarUmZTy9qUdUpYTNhCShBbVOfktERnufKhjf/+U0ZIFOQBe0ZUZlgzsOMg2+4WqceizrOlKVUlhGspzXuCUV5wNJV+DrWqFy51JuhI57Mc5exqHCOJW5hPzeqlgjDKM+xkjWT3ftqUr3HU6XiLX4Ys9uGEusXQoYVPyO5TpuYJKmSXJCp8fTHWzTJ0mRyE5Xj+Q9nNfrOLl3BtKEtmw3FF9mNLjSvYBJhb4s6XJDmqKNcxatk8/gwOhXqtWBd4TMGNNq3ZCMzcsUC/6bjHuCUcqtbRap4p6TVeW5RsMCl/y2JwHhG48y2gY4Ky3Z6Kht6YbVlcb0OpPQqXLDKlV2FWxERY9pT06ZkRT9V72euQaMDTzVF/DLHImHkYAWHirKOXaY9D4dIRVbNhg/GEmaPCtfTNbhmGnWpgoTZPypKSSY1iLGMY7zBC5MYBWEh3IxjjKTrqNhAi5MjhVFsz+FtcIWM7SsuQ1qZ6dDFGTuW8U8JHFEFleOuTGOxkpcsUyQEdcgIZjJSXhQmKEfZIdvJ7hW2m80xe/cj4HVx6KhcXs1m2IgnFqgnwShA14GZnQ7dpcsK/KqfmDe/QCq0gPfaW0WiaKIs6ykxo9tatmYmxCkqC8Uq0tYcxcyed2buhv+NIFSI2mQwb41Mm0eZ6gB1GrVZ5gI4R3pSyDVmOjk+L2CP8OOaBrmTl1bvvDA97K/t2rfJ7auTWWPcd9J5uZQ86xSw/BxrejhKkFwrpfHK19o+dpRqxkh9uAvVX8L53Jyu4rORrVIvGvvY6WCWXZk9nvDi+iOR1na7b0xcAj2artFtpqgx2t++5hugbh5WqoltUcL6ZuHCriW/qdDQXF4zpNveZHgpuuobBwjiquSoliku54FaVmyrvDdKdV1pozr81/sOrDxH/tItIzunYSkZb6d53HX3G6zNZmVj57rSa3NyuAJvuFiPOdmfE6uqLrPqm8tL9a7Oubcl8yzBZb7/5wVuhIJoHrS+3XlwZS4b0Y08nx3FnuCrBHvAvZXtCOud8/Ho+Qr+lODD7c5HNq7Uo/P5z93H2+Wj54vjI946rEscPabHu9sC4rVa/04YOcYX3330Mcszrpq3Dt7q5ezkzi1lQ0zpmZcy4XLPh0t4gXEL8MV0WYdN/U2Lzx3y+pSlGSdPuu/MRtysBWrHexvndHsV9AZ/O0r4HvHV1x7sf4OvpX2u/Ik7vgjVdzbSbYur+43ehIye1fHkpzjHU8+i549GKGFr/AbKb4yb+TwmZVRE00C78PTbvOQTH/MV0v+HspRQAJgixhdqSFBq5rI90vM8d6YWTKVy5zKAmfJ//67Va01AX9qHforlgFOiSSSSbXyRPUzkDMzDT2m0MfCkQMozE2WHfyJ2ckU2PliCgiNndLW2WMxUf+QkQy0xaSXIgOanWNSkXQUigv7ngt+FbshHeJK2Q/f1eO7XfIHCYqpBfnN3eYLmc9/3aXj1FT64dBCmgz7HOvISgJzzDACnZGWhMBu3hsSUKokTLlKYBCLjKdMGMx3DETlIOcZUW5zzhIhThy1GWaVjb3g4ZLt0iLJzWrUFg+5SNO2gh2bXZlPTbXHYhwmEOnNVJhj4gUVDFJKIZO1AgkkjPJvHYBejeLuGJwfzKIqoEiNlgXoHc5PzJX2Iiq1IVZxINaUYM/9m4zjhdTajuDdxmEVs54qfCIubGFNH4Tg9qImy8YqhGBIEs4hRyIZ6lDEoUlhCox6C4zQZg4mWQnlPoE84w4uu6C08pmxKmCHVCIpwt224Ix53dolB13qdB4xjF0/5Nj0iUzvxs4u+ZBL+SDloGI821yoQ1gzT9XotWF8akwzwMSQTpBs2Fg9TgGJlUmNMwl9m6BAYOSn/Yi5Fxn2Ahz1hI21T0pHnxpI+knTu1pFhVzYrBSkwZSshyW2RSGu90zE8eTodGSblQj44w4gfuDuZ9XMKiU0sWRGyqGHjdZEadS/0uEFUGSlzojfqZHGFhQ8qmWkB83rzMpK0o4Us6WL/BtgS6uiUYUM6HVleLimSfTKR5BUs2ZKHjcQrjFiRwnZxGVlSOLMQLCl67RhnTVlwWYVXdZJqR4mVOekEdCRGE0SX7MaPx+gr7AA9RZgQf4cnK8iQmomQiVkhpGl9pXmaqIkFnJIUjqgGWKYKWpeasjmbgOdatHmbuNl60NGaaRCbpvCUuRmcwsl4w1mcublxmtCJdKCcrnCPxvmcp+mX0DmdvvCak8Cc1HmBK4hc2dmdukBm3hmeuaCRR6YI2CmeUdIouYie7DmeetOe8AkLZVGe5smb3nmVHxOf+rmf/Nmf/vmfABqgAjqgBFqgBnqgCJqgCrqgDNqgDvqgEBqh/xI6oRRaoRZ6oRiaoRq6oRzaoR76oSAaoiI6oiRaoiZ6oiiaoiq6ovPHBqvZdT+pBdbJojRaoza6oKm3Bi/KUfYJLjR3o0AapEL6n3kHBjvaBDPKIT86pEzapE7aoUfaHtz5pFRapVbKoFEqB0l6pVzapV7qDWvplk9ILX81KbpylxSRpV31XH7SMfsTYa7HJyTpV4bVNfJmUJoill+6p3zapwfRNI9WND5SFZZjI26jTcJYpGQBOt/xM/9CkdoIN5W4nov1JULiEr8TNMHji2npp576qaAqYi2BPVs0eh24SykUQo1YnsrBkAA0KyjEGvaXKb94TwEJTTWEqoAYqv+82qu+KlKOdIF8Z3tNsiryV0iLBHlzRayCdjuVVWbMqE1Kt4O/Wq3W+qkwOXR4M4bkxXNKl1Hko1ZOiXk0+a3ltj9i16nXuq7smqIAZhey2F1n2U2BNoguKDVBiZifVV1l+q2ViV/tGrACi6JqYwMGa7AeaGMud4DzYWZRlg2rqaZP1bAp47A7NmXLKkJnRmNO96+MObAgG7IYSqYdBX5WNnMM64W+4Zwpe68YR3gUckTAcXYiW7M2q6LHCl2j5mVqVWY9Cm86y7NoF00tt16QZZo3m7RKi6HK8WNulHuVmbNOMLPIBbVtFRvACbRXu7Rc27URyoFGtLDXFyV1mmn/JYhGrPpQjzYrZZspzDOA4yCBRhYQ/Me2WKR/Xpu3eoue37g3ishje+gxAKM8xdhtwoicnqiOobhYE2Go/2iMZGKKv2Y7MyGNYTKPcrO3mru5AMorc8qvHmksQgiRVQmpXfKVPJqvauk2b0qW+mI/nuI9RYlwTMKMtRISeMu5uru7vMsQudu7wBu8wssFWTu8xnu8yHuSybu8zNu80rGdS+q80ju9uluH6Ei92Ju9u4uf6qq93vu94Bu+4ju+5Fu+5nu+6Ju+6ru+7Nu+7vu+8Bu/8ju/9Fu/9nu/+Ju/+ru//Nu//vu/ABzAAjzABFzABnzACJzACrzADNzADvzA/xAcwRI8wRRcwRZ8wRicwRq8wRzcwR78wSAcwiI8wiRcwiZ8wiicwiq8wizcwi78wjAcwzI8wzRcwzZ8wzicwzq8wzzcwz78w0AcxEI8xERcxEZ8xEicxEq8xExMogHwxFAcxVHcxFQ8wFJ8xVBcxVr8v1iMxVv8xfvbxVcMxmR8v2IsxWWcxvN7xlOsxm7svmycxW88x107AAHQAGcQx09Mx3zMoAXgAAJQAI1gx3hMIgKQABFgBXocAH3cyAhqAHfsCITsBAQQAAegyHrsyJo8oAoQAAPwCJPsBAkQyVSwyJt8yv/5AKQ8yKvMBJVsAFVgyqg8y/H5x4GcBApwAP+jfMgHoABH0ACqfMgIIMhDossC8MQHQMxDcABRDMsNYMeHrMwuggDQnAC9TASTbMzWLM0t8cTc/ASyTMviLJ6vjMtQPAC7DMtFsABPnAAQAMgOIM22fMjv/MS+PAS5rM0NgMzvTAT7fM7IjM0BUMn03M48qcqFLAXhPM4MTZ2QTABJMMoQTRXczMwQoMwI4MkTQAQEIM0EDSztfMkWgszK3ADSDM0PQMydPNBIkNEirdCZ3NAyDZ2A/M1DcMw2TRXevM6tfASj/M3zLAFHwMzqHNF7zNEB8ABI0MmIPAULPdNQnZs7jQSqvAA5DQAubQRZrQTsfM/d3NM3DdZFYMf/Ly0E+/zJIC0ApRzTUd3WtGnLG/0dKG3TkCzGRT3N0BzFXq3TAWDTtnzVQhDK+KzRaR3XUfDUbp3YpAnXS/DM7bzXAPDOA7AAlF3ZlI0Ag+3JB4AAnD3KkO0iUz0koY0Egm3WhH0Eow3ObK3YrO0djM0EClDPEy0ED80EgDzbtA3Wf53WgB3ZPc3U33HUMB3HrV3c3pHaSvDRQ7DVOXLaQ1DaKYLcQmDPSwDdwF3YTr3axr3duIDTnhTau23OaM3Xnx3eRsDOZe3Tv+3c/szeqk3c3B3ft6DKuN0SHY3UDsDTCbDXBcAAxGzLXr3Sui3dALDSw0wVCe3bCV7g7i0Ersx92Not3xL+Cg/+1e5cz2Bd1w4AARiuzMzsyfV8AJDs1QQg4vU82Qyw4FjdzE+s1M+93k193mLdBIg94TauCuZt3w+wy9u81MbMywvu2BuOxxnt1WIc4y3x49a819bd4DkOBTV+41JuCu+s4ptQzmsN31O+5adw1qHw07Ec4Vw+5qAw4p+A5VnOxmS+5qLw5Jjw2ml+xmw+50Ia5XR+5ydq53i+5yKq53z+54EQBAAh+QQFDQALACyOAOwAHAAgAAAFiCAgjiQZnGialuyovmjbwrDM0q9d4qpu8jGfCBgUEk+AwmvAKLiOyVNiKkAphlClgFR4IAHHQHS7CzTA2S/JEECgiWPWof0GxksQevg+0jr3Wn0NDgEHWHBaLwYRh3ZaUwMPB1dPiGozaWQygJc3mTqcmpiWop5wQnU8qKk4q3uor0ZQsLOyRCEAIfkEBQ0ACAAsnADsAB0AIAAABYMgII4kGZxoqpbsqL5p28K0zNKwXeKvbvIxnwgYFBJRwuExkAQsmclnU0oILG4BhXMJqF532e2xa8VqqWXwmes1i4nk7289bqvfwLgbDSkVTnRwDScSJFVhUn8BBCMKAoB4PCIHKBAJVg+IXCIEA48DjAyaY1GbRqY+UqWkp6ypqDpPIQAh+QQFBgAGACyqAOwAHQAgAAAFlyAgjmQZnGialiypvmgrw7Dc0q/N4qpu8jHfCBgcBBQ64klkRNqUAeYxqZQ6Z1VAk0q0KhYOwYAgGWaNjBMkoS4DoNLAYSIqCALk9zmQcIsIS3pdWngldgEFgkBWO1NwhFcjbEiPWz+UewiGJ4mVfCUNn4o8cQxuh3meCGpGcm6eAF93Y36PQqM4tyK2Qrw+vlyLusBPSiEAIfkEBQ0ADQAsuQDsABwAIAAABHYQyElpuDjnyqf+WNeBoMiRn1mhmmqxoSvBIfEpKx0AyuEfggDuReMkhLli5Tj06IxIIgzanD2XUaeSwkxOsVWAbgf2sqhmFFp6LrPVbu2Xm7VuJ903iUdg+IIDBgcIBWJPDCwEhncqYzJ2cy6Oj5MylZJXljoRACH5BAUGAAMALMcA7AAdACAAAAWAICCOJBmcaKqW7Ki+advCtMzSsF3ir27yMZ8IGCQZBIQd8VQqnBLKJeuY/EmFgCUTqw2IjilDlChqHA4PR0BsJZcI6zGQBWe7tPR4e/7W368kdXI8eXZDeH2GWYiBfoeAI4J7hBMEDGhqAg9nSV2PNZ5cjD6hQqWkozqnqqk2XSEAIfkEBQ0AAwAs1QDsAB0AIAAABYIgII5kGZxompYsqb5oK8Ow3NKvzeKqbvIx3wgYFAKIJ6MIGVAekU5mFEpqLBwnCIGypIogtS5xZDg1JKLCWQxMnwo2KYAQgOjkh8DhTs0j+GMAfoBtc3WEPG4BcDNeYAkKVYxyAAUJKJdvbIkkBI8OWpNeQpSkoz6lqKeIOFOBRkwhACH5BAUNAAIALOQA7AAdACAAAAJFhI8Yy+0Jj5stRootxFQnPnkKWIkGWZooY55r0AIv3M6xXb+3nq+730P9hEHS0FgEHZVJztLZzEQ701D1cU2JcCped1UAACH5BAUNAAYALPIA7AAdACAAAAWRICCOZBmcaJqWLKm+aCvDsNzSr83iqm7yMd8IGBwEGjriSWRE2pQB5jGplDpnVUCTSrQWDolAglEYZo0Eh2AQFlNEUKngUQYoBAEC/OwmMQIPe11aAQclDWKCQFaHfXGEVyJ3CYo8jCSTlTiXI5kAj1uYjnyRdqODoZ2ni5CNlJ9ZQrCDso+1sUK2ubg+ur1KIQAh+QQFBgAGACwBAewAHAAgAAAFkCAgjiQZnGialuyovmjbwrDM0q9d4qpu8jGfCBgUEk/C4TEJOAaYTuhyRBgIBAPCxDUtJE4DyHVAURJF30NBVFgEDmYggBCASEiF6zoqbrAeAX5RVzhag3ULiYqJgkt6OnwBWjZRdGSUU18PayMKDHE8bF91EFZITVMiDYBgDAqgOFJnSVG0qT61RreQu5hEIQAh+QQFBgAMACwPAewAHQAgAAAFlCAgjiQZnGiqluyovmnbwrTM0rBd4q9u8jGfCBgUElHC4TGQBCyZyWdTKiocBoLToeBaVlEDSDagUB5HBC76ZCbqEgEuVbYgO71VBFZVpiqyCQcIg3B9eA4BBCV1hmcFAQksWI1ufwMlj3ZSmWUifyeUQCIHJ2GIBwaaeBINYg4QDQAIqmdRqz5zuLc6uby7Nr3ASyEAIfkEBRMAAgAsHQHsAB0AIAAABXwgII5kGZxompYsqb5oK8Ow3NKvzeKqbvIx3wgYNOR+RBHhwDQGIkggq3CCuojPEjV7TWqrUd4U3JV+uUPsGC3CsgFba9pLiodxa3lbfdYD3Hl3NIFlYn2CNYeFeAANCEwQJw9MBAV/fAM8loBCe3RCnJ2hoHyipaSfPm4hACH5BAUGAAgALCwB7AAdACAAAAWEICCOJBmcaKqW7Ki+advCtMzSsF3ir27yMZ8IGBQSUSVCY3YMkAoBCPP4jE6JpIb11gQcBgKYFNB0GiDoExp9GHZH2vHuLYpfgdntnArX//h1fi50AHZcgIWCbogKJ3c8VQFLe1gkB2oPYAVkhHUQYQ4Qm2VCi5VGnTakpauoiDqtPmUhACH5BAUNAAkALDoB7AAdACAAAAWRICCOZBmcaJqWLKm+aCvDsNzSr83iqm7yMd8IGDwVdMSTyIhMLgNHWzLwjM6cEaYUaywYHIIBQTLkBhonCDhgKBOfAsRERDgp4MDnozQIyAFTTw0lB2x4PFUlCIaAZlZ0jIFZUIqRjpUHhziJJIuZjW+TjwCemjScI6WgeaKYpjVCrzmxq4i0krG4Qro+vE1EIQAh+QQFDQACACxJAewAHAAgAAACRISPGMvdCY+bLEZKLcRTJ+54CliJBlma6GKeawusASzTb1vjt7qLOY+yBXVDIEl4JCaNIGRT+WRynFNoVYqhZq1bLKUAACH5BAUGAAUALFcB7AAdACAAAAWFICCOJBmcaKqW7Ki+advCtMzSsF3ir27yMZ8IGBQSUcLhMZAELJnJZ1OqCBAihIFAAGm4loCq4eE4IAynqxNcDQwio8YJTj0pdoF7HUrSeusJLBB5a0dhboKEgIl/bIglg42GVYGQio6VfZeTj5qSRIeZI5GFoFFgp4appkaoraqvrD5PIQAh+QQFBgATACxlAewAHQAgAAAFjyAgjmQZnGialiypvmgrw7Dc0q/N4qpu8jHfCBgUAognowgZUB6RTmYUOiowEgLHo+GiAhpZAWRwOgyphSxBIipguU+iyBBAlBSBwZJ6KuwCflJpPIFQeAELiYqKhXKDPoJ9OlIAC4iTaFkMfiMNa3FAnWEDY2V7clUIEA5lCJyURrBCspBes7a1qLG4mEQhACH5BAUUABQALHQB7AAdACAAAAWkICCOJBmcaKqW7Ki+advCtMzSsF3ir27yMZ8IGBQSUcLhMZAELJnJZ/MJCShkisDiebBiA4wnASwqMBgFEcL6zBrUJ4LIwF4WAgPRwKHVB+5LTgIAdwdVEhMngkcAewUNYGsKdw+LRABdZn9ZCJyWQABjkA5DEGNyUpBrB36SnzyEeF6hsmlSiSdpAFmKrzh+CT8QSoxRgcbFRsfKyT63zJfIRCEAIfkEBQ0AFQAsggHsAB0AIAAABZAgII5kGZxompYsqb5oK8Ow3NKvzeKqbvIx3wgYFAKIJ6MIGVAekU5mFDpqLBynAaIwpAIOqEHiROgSRYWTgrQ2A9FJm3QZ2M68CgGZ+zuPCgh6AQd8dH4lBGMCbU+HLGAPbjw2eQmSOCIKDREkBoOXNCIEWRADgoVzgBBYAlqFjW9Kc0azQrU+tzq5cl62SCEAIfkEBQ0AAgAsDADsAJMBQAAAAv+Ej6nL7Q+jnLTai7PevPsPhuJIluaJpurKtu4Lx/JM1/aN5/rO9/4PDAqHxKLxiEwql8ymUyaISqfUqvWKzWq33K73Cw6Lx+Sy+YxOq7WStfsNj8vn9Lr9nm7j9/y+/w8YKCigN2h4iJiouPhWyPgIGSk5yedIeYmZqbk5Zcn5CRoqiuc5anqKmspVqtrq+vrJCjtLW4soa5uruzuHy/sLHAzmK1xsfEx8rLycm8z8DJ3qHE1dvTltna39iL3t/R3YDT5OXidejp6eF6He7k53/i4/b/Vkf4+fr7/P3+//DzCgQH4BCho8eHCgwoUMPyB8aLChxIkUH0CEWDGjxomWFx9u/AgSYEeEIUuavDcy4cmVLI+kjNgypkwfLwvOvInTRs0AOXv6bLHzp9ChJIISPYpUg9GkTJtCWOo0qlQDUKdaTVr1qlahWbd6xdn1q9iWYceaLVn2rFqNade6ldj2rdyBcefa9Vf3rt58eff6ddL3r+AkgQcbJlL4sOIfiRc71tH4seQakSdbhlH5suYVmTd7NlEAACH5BAV+ABUALAwADAEvAmAAAAb/QIBwSCwaj8ikcslsOp/QqHRKrVqv2Kx2y+16v+CweEwum8/otHrNbrvf8Lh8Tq/b7/i8fs/v+/+AgYKDhIWGh4iJiouMjY6PkJGSk5SVlpeYmYg4NzxHnDehoThHOqKhOUucnppIOD2mOjmkRQqyBXo8Nztkq61rnKlFwUUFOzmxPju4VDi8ggWnp89jyDfMRAo827o3tEU73LDCSdbYv0I7qOPfQ+reubtk5uhoxMM3PdnSou1RyBOgoYolyx8YdT4kLEFmEB85JAgV1hMSihoSWznO2dFl8WA+iRN7fXRIxFQPZgUUdIwSKmAgjHsYqhoZskm0a4k41mR0j0jP/5sarQAV5EuPTCU9dyq5qUin0kRJhfS0pSPLsVj8IhTjYc2HP07LrurgERSANoI8GjIBm2UXKB8F3mqVmnUhPKQ0fdY9YqyrQYBnQ5Gt0jeZwcBjFeDj9cpkOwXj+NG8eSooKFEACsAaBUBdgW44NHMetjnHMiOFx36WB8WUYpJ0pR3pJhjAK1S4mBZxOqRwPq8upzQWHJpI3M351A6XdbqY2B5qxTLHFdX25JZYjiHvwZ17cOuCu4IMxjBcqNdDLlvrNEWdWpamuo1DRk3lNmtMjiapbjacD/x8oYVbSbvks4NYZTFB2SlVEfGOLLG0swosPJjHmm3dddUdK5n5t/9ZUMZsc154OL2Tgw8DdXPOdrGglxlBXV3oBG9DoFifh9gN419ygnn34g1G0EgZMrGABIVoewlB23bfAbCkNV+Jsh6HQ6y3HnV50aVPjfm4KFSOqI1mlinUyJUemADQB5ICCS6hiw/cNCdFRWOyF81DQ1CV3137ZWmEnqXYmdk4IEFJ4EoKtpinhOedeQN6oGTkKIhoIlFpEdZI6iB2HzrJpy6aSqDOQ53+KGMTgPZ2aZ1LiUKlELoRQeObc2Xm5ROgSRRXMcXBqt+PLvYKq5iiMaoDNrtah6eyxvll5BSxBsnePjiB5yUs7awqBW2nvOpEjo0CcGmq5fDpip+1XBr/1U3oAXjmllDwR6A/EcXmj2thAsmEtkLQA8609bo37LGY3mVLtQMjalew6LKaxE3L/iitRSi+d2S48U4rBLmznUpuvUZUJzJWFieKcBEYV3kXKEaAnGbDUrCplTF0wjcEuPqm26ASv34C88aX2igth2rqFbGbp+rc5FDgNdmzw0vw+7K3nbGmzjNsVe3t1en4WTQULld8Eb83NRStkjKOeqsULj/BMZm1FnMpmAc3NDK8w4hHxdmqnow21n4mhSQPa1sBSpuW7ozzERwHWrK8eV76dE89Q16uxfzhy2zBh0lNkd+cHyFw1YA/wy0/rNC6W9Imu8T0n2SvyreQ8pW8/wTXa52e5cE6yNm3ZGiaGOWyloOicOsB5iwrayMTn2nhVJhiO8qK52wK454/DVsSjb98K+Xmbt7ao3jhrTIt1T19cBNSaz/66FnPB+f8cJqeNI1PYEsX1UDvnHxZs0saZPrxrBmxbnUGOpDqjPOgwUTuN/Sbn0ZWUzOouUNj5zoa8vIVt7/FJmQNG87xpKC/8d3Mesp74J4e9zMLng+E8KpcCx1XvkC9Jn3hc2HiEOe+u8CPNUJTAv48NcI+saKE2PMfB/+3OoUFJiHbOuC7ztE9qVgDPXxbQl+w8zoPKqGKGzSC5uYVG420DR+gi14Oo1Y9lSVxhTPRYP86tpIFyv9QjkiAxQj5xjQcGiSLYkzjC1vmw5VZDYOi85oUW4UTfoHxRwCsFNweljInnLGJ28vjdQS5lKJJL3RKGNXetLXA/jEDFPdaY98Q5zbPBfKEboSdEmlYwy+qK0tlG+QUoyBKnoWvl+LTZd/Yx8mXNeR9hnzGwRC3TONUMAq6wEHWxjZLZ6axizcpovaW0MzLrc58MFwe/24HsNFQMDg7ENYEjFeF+BTwR9TIZWwIhjb/GeMcwHyCZrBoJhN+LpZKswsL8VhFZDhQNKQKn+WQcJOT9MZY30DlLskotxTu0JfHLCT6LgQqF92TSw4d1DMzxpViPrKLBIKiqSyyAxd18x//KRPWAimIN2muCIPZxKeE2iHPCCDnFC56kqualLEe6cBYstCbluajqAfKAjnTq+hAajOnNv5TKjiCYHNUsqPfHAgbCiypVyOa1R5sQ06UQQ49+6XQGSZxqil8EHIs4scl/qms9bthV09UIXRqlC4US1EskCWg3i2Sjehyxn1QcVZmZPVE6URjUisiI8FSVQpImiz5BjbZo+ZFrleS7ImU6ql8MMknmXJgb7TD2Kh+8T+o8BIFC2IkYjwnKGIRDPRUIdaxuJZ6sLzqy4C32QdJ5hkLkozKiOvRqxhII3fcm3NpWwuxEi6TbP3jqozLj2f8VBpg/at1OoIDsZ7IH7Ml/8s02SYmAjH3c5JxHiqWsd7yTum3otuO72g6mLz45rl8mW4PfHeWZBQREAt9ioIXzOAGjyHBDo6whCdM4T7hscIYzrCGlQLhDXv4wyCORIdDTOISmxjBbj2xilfM4ha7+MUwjrGMZ0zjGtv4xjjOsY53zOMe+/jHQA6ykIdM5CIb+chITrKSl8zkJjv5yVCOspSnTOUqW/nKWM6ylrfM5S57+ctgDrOYx0zmMpv5zGhOs5rXzOY2u/nNcI6znOdM5zrb+c54zrOe98znPvv5z4AOtKAHTehCG/rQiE60ohfN6EY7+tGQjrSkJ01pdATg0pjOdKYrzWlKaPrTmO60qP8hAWpQj/rUjCj1p1HNakSoWtOtjjUhXr1pWdv6D7QO9a13HYUBBKABZ8j1pXkd5gI4QACsFISvga0gASSgg1EQdgCIDWYD/NoRy3YCAQJwACtIm9peVkAABvCIbDshAdemwrfBzeUHpLsR5m7Ctg1QhXWzO8vGRjb3DoBuZx9gbQ1wt7MRACJ+C+DSB9DIATJN7wb42tnnKAACHp6AfxMh2waveILynWwk2PveV5439zA9gH7TuwgLuHQCIHBsB0Sc5Cy/tEsZkPEGIJzl2Ti4yhF+8QBs29n9fjYS3M1sKXwc5FW2NgGSgO6l90bh48YGAqJOBAKc4+d8UXm3UYP/cGw04BwPfwAzxO1zJEx960YXNtKxfOw2HRxxHEf5u5GAbhCp/J0LPznTh131ADzgIlRPe67XfuVLt8ndC2jT2Y2weCWk/Fb5LnoRDr5bAPga7RsbN0P5LnhaE77KcWdo2MtibVXrPTMTP3amIW/4rAeAlfHOvNC5Pu0pHP3zTw79yFXuJZYPYAHADz7wEZCng1ccAchHN+tf73qiFiH2Zgm8EVrf+Vfjfsq6/2LMnS4EpTPh2Nzv/txhRf1ilJ/u4xf37M1f++qr+vpTPv8SsD6Exj9M+kKAfmbkPwSZLwH66rd5AqBuagd/UfZ2+lR+2fcnmmcc/sd+CZJymHcE1ACIf8VHbrZXgAb4ZO4WfsZhdX3nAHKXAB7FACjxgNGHgr3Bf5nnc8gieRW4fn03gU9wexuoZPbHfisXc+MXAaXnABDAg9iwcOMWcwdgbehBAEcYc7/HAJInBD+3c37Xc08YgBTYg05ggzeIZAtYfw8QdAl3EQbnb1XIhMA2deihajIIK2NYcS4Sg81HgIO3hU7Gck/4CyInh55Hh01mcxg4EXVXbxrIh0uGhBORh3pofYTIZF14CfkGbVCghYs4idM3iJR4iZU4h5i4iUQgiZxoY0EAACH5BAUNAAMALI4ATAEcACAAAAVpICCOJBmcaJqW7Ki+aNvCsMzSr13iqm7yMZ8IGBQST4ACLjE8JlGOqBTSJD4FumPgmnUqsTYtN+xFkq3fLhpHqAKv0njDzRvLxOnzO38vg/trfzN+antmgG9CAGKKjEZOjZCPVpGUk0AhACH5BAUNAAwALJwATAEdACAAAAR9EMhJabg468qn/lnXgaTIkaBZoZ9qsaErwfFUDI6y0lfVXIcdr3IT6F5DGYDXUzIDMw1BSJMQDoaHIDBFVivarofJCVNhZe6ZlRZHkxSzFw1Wz9l195KcX6PafiUFCFhaAQkLBwgNe0MKLAuNXzJPSpJ0lHyZcC6VTpqdTBEAIfkEBQYABgAsqgBMAR0AIAAABYkgII5kGZxompYsqb5oK8Ow3NKvzeKqbvIx3wgYFAKIJ6MIGVAekU5mFAoYCCSFQ+J0KAypVkLggdAml+BTgxRGE0VWSEksf76rAURJEUi4gXABayR8fnaAeIMjhX88gYoijIeOiXt9jTiPloZSlYSXk5mei6CdbZ+cVEadq6pCrK+uPrCzsjpMIQAh+QQFBgATACy5AEwBHAAgAAAFlCAgjiQZnGialuyovmjbwrDM0q9d4qpu8jGfCBhsFU5DYsB2XAKUTiPyqWROoVYnVtR4OASDRjNJFDFQkO/hWlWcCCNCELsIHEoGdlkQUJQaekBNBSVjVESGJIZQg4WBPBJ8hCRuWlV1DHiPOACVDSOAmzRmJwleAQh8ZECgaQFhAAOWZUKHrLVbQrk+uzq9Nr8yUCEAIfkEBSEAAgAsxwBMAR0AIAAAAkWEjxjL7QmPmy1Gii3EVCc+eQpYiQZZmihjnmvQAi/czrFdv7eer7vfQ/2EQdLQWAQdlUnO0tnMRDvTUPVxTYlwKl53VQAAIfkEBTUAEwAs1QBMASwAIAAABacgII5kaZ5AoK5si76w2M5sbJt0fu9yPvM73w9oE7qIMWMNCVOumE1nAPqSTqknKzYrHRUQEIFqoUBqe4JEWIUgngGN8kghLgDfp0Ogce++CAEEfU4kBA8OLYI8ZwUJYwiQh4pBfgYBBxGFgYNKPXaakzdnKpkkiKFFfo6TjWycRiINKxCOA4CoSX5wC4i2EXS4UYRbJXjEKbrHyMPKy53NzrDQxsRWIQAh+QQFDQAIACzyAEwBHQAgAAAFkSAgjmQZnGialiypvmgrw7Dc0q/N4qpu8jHfCBgUAognowgZUB6RTmYUOlIcEgLB4KAYUgEN1AAbMHiJIyxBMioUzkDve/Z9lOe7b2FwOuDhPCUNZBBdgDgtChAna0tfLARJjmgyBSdzUgAFBH+RDoc0IpYBCRCLJw2gNSIED4R+Lo8+mUa0Qrazsjq4u7o2TCEAIfkEBQ0ABgAsAQFMARwAIAAABYIgII4kGZxompbsqL5o28KwzNKvXeKqbvIxnwgYFBJPpAJBMTuSGgFEk0giRKdA0uF6OxYgCZoUcAx8wY4TGEwYOkfb8e4timN5Wu6cCtf/+HV+LnQAdl2AhYJuiAwBB3c4T0iHWSQDag+Xi5VVlyePZIQ6ZUKbeKWkqKI2qUarMmUhACH5BAUNAAYALA8BTAEdACAAAAWLICCOJBmcaKqW7Ki+advCtMzSsF3ir27yMZ8IGBQSUcLhMZAELJnJZ1M6akAcggGi4FqOroEBWKBQHkWHMBd94koL7dIjgHB6CYEHq6GWMugscEx+gCWCFIR1hicShHolfAl2Z4dygFRpA2sTCHFUEWNjZZNEJFYCYQebnz6sOq42sDKyM15RtkZLIQAh+QQFDQAPACwdAUwBHQAgAAAFhSAgjmQZnGialiypvmgrw7Dc0q/N4qpu8jHfCBgUAognowgZUB6RTmYUCkikCBKEVbB4EkWEg2EQYBwCg/HBCywpUI3fl/Qu76ijeuE+zyf5bXRoN3gibwmEfYaDgDxujHKBfoiNOAAFCGJbEAcICkNQdTBrS4U+UkqoRqpCrKemOq6xSCEAIfkEBRQAAwAsLAFMAR0AIAAABaEgII4kGZxoqpbsqL5p28K0zNKwXeKvbvIxnwgYFBJRwuExkAQsmclnU+oIFFiKQMK5BCACCNYBzD0CCtrJzlomihaBBikLUZoBdNJY3ga62EoUdm4iBGR4AQZ/d2dpAHuLhG9xElUSkX4jWQZZDD+MIgkCY1eYPCWGjqY4N4erNSWipa85c4mtoI+Ua6ANqp9uCAYQJwozXWMCEMbHuTZPIQAh+QQFIQACACw6AUwBHQAgAAACRYSPGcvdCZGbLFpKbcRTQ+54CliJB1maALqoBhu4KyvDNe3aOa7qPW/yBYEiYZHoMSaRGmWTeYFupB/qCHXD7rQ/7hBVAAAh+QQFBgAVACxJAUwBHAAgAAAFliAgjiQZnGialuyovmjbwrDM0q9d4qpu8jGfCBgUEk/C4TEJOAaYTuhS1Fg4ThDCxDU1YCEJrERJFBUakVFhEEA3p61G4EAG2gqBR503ql5Tem9lEl55DAgIB3l7OAAEAQMFJHiBUV5ufYuCdhBtJV6VS4oGWyKKmlF4XydyoYMFB2EJB5KocD5RSblGtzq7uL02v75HIQAh+QQFDQAGACxXAUwBHQAgAAAFkyAgjiQZnGiqluyovmnbwrTM0rBd4q9u8jGfCBhTBCA6IgpgRNqUJ+YxCZU6ZdCAlarcIhICx6HgqjYHR0hYoBiaUW3RIQrISgONHb7+1pYYAQd8XU0sBFN2hSUNiG8Dho2EdCSAgokneT95l5MTcw4Rg0RSB2gDaidxlwQRXydjZV1CokCztDy2drO6Qrw+vlxEIQAh+QQFBgALACxlAUwBHQAgAAAFkCAgjmQZnGialiypvmgrw7Dc0q/N4qpu8jHfCBgUAognowgZUB6RTmYUOlIYHILEoeCiAhinBAQbIAypjZOiql5SsY0SIQBxEwGFZCkf4ErTAzcBa390LQmDT3eAgoRQfBN7J35viSRzgYpAImkCa5xtmjwjYHRjJwhnd1VXqJ92m0pSsl5Cs0a3trU+ubxIIQAh+QQFDQAIACx0AUwBHQAgAAAFgyAgjiQZnGiqluyovmnbwrTM0rBd4q9u8jGfCBgUElHC4TGQBCyZyWdTSggsbgGFcwmoXnfZ7bFrxWqpZfCZ6zWLieTvbz1uq9/AuBsNKRVOdHANJxIkVWFSfwEEIwoCgHg8IgcoEAlWD4hcIgQDjwOMDJpjUZtGpj5SpaSnrKmoOk8hACH5BAUNAAgALIIBTAEdACAAAAWGICCOZBmcaJqWLKm+aCvDsNzSr83iqm7yMd8IGBQCiKdSgyC5IUuDwMRJ/E13z1EhSQUCCFFaRISUgiFhtPpaJm2lXZ6bi61q6T+76H3Ne+9wdX97eC5ZhIF+coAFcThQAQdNio8kDShoDg1khyMKBg4nAwqcekJtRkedPqhGraerOq+sSCEAIfkEBQ0AFAAskQFMASsAIAAABbMgII5kaZ5Bqq7s6b4kK69wjc6zrY94vus92Q8YpA1rRePxlVQtYc3UkxmduqIB6625gwQULkVggd0dvuEAo6wjqEUFBqMgQnzZNrGhniKIDHdVOgUBAyIDDmOHAYSCRACEB14SE1J4NogFDWp2CoQPAJc1Z3GMYginoY42bpsOIoVufqIwm3YHi52qXDufaAB2XnS0UCl0AGJSu0lDCYUxz8tFWibEVtZT2E/aS9xH3kNYIQAh+QQFDQAJACytAUwBHQAgAAAFgSAgjmQZnGialiypvmgrw7Dc0q/N4qpu8jHfCBgkEQSGHTHAGpwKPyLruFBKhaIlEwvQio69KFBUYBwWTod1XCqc1jyWWy2Ot991nBzvWu7pfVckc3B6JQp8Q36HiVmLg41diw0IZ2kQBgcIUF4AaTScj0KdWKSjoj6mqag6qq1LIQAh+QQFBgANACy8AUwBHQAgAAAEdxDISWm4OOvKp/5Z14GkyJGgWaGfarGhK8Eh8SkrfQHK4R+CAO6l4ySEuWLlOPToAkYkkRZtzp7VJHUpdSopTC0sOx1zrYAn9Cxmkb1bcPf6nYTLbjYexSMwfEEDBgcIBWlYDCwEh3UqajJ0cS6PkJQylpNYlU8RACH5BAUhAAoALMoBTAEdACAAAAWaICCOZBmcaJqWLKm+aCvDsNzSr83iqm7yMd8IGJQdAgQR8aQ7JgHLQBOpXE6f0WuVOCogEgLHoeDcAkUFx2kwOBmoUCsAEhgU0G14Nh24d09YVgR1LGCBXEcHLG+HZ2Ulj1mJi3qCAQ+FlVx8fmiAZjwidA9+BXSaZwB8AhB0YqihaAdgYgUNsDhCQ3K6WbpxXL++vbxCw8ZLIQAh+QQFBgANACzZAUwBHAAgAAAEdhDISWm4OOfKp/5Y14GgyJGfWaGaarGhK8Eh8SkrHQDK4R+CAO5F4ySEuWLlOPTojEgiDNqcPZdRp5LCTE6xVYBuB/ayqGYUWnous9Vu7ZebtW4n3TeJR2D4ggMGBwgFYk8MLASGdypjMnZzLo6PkzKVkleWOhEAIfkEBQ0ADAAs5wFMAR0AIAAABH0QyElpuDjryqf+WdeBpMiRoFmhn2qxoSvB8VQMjrLSV9Vchx2vchPoXkMZgNdTMgMzDUFIkxAOhocgMEVWK9quh8kJU2Fl7pmVFkeTFLMXDVbP2XX3kpxfo9p+JQUIWFoBCQsHCA17QwosC41fMk9KknSUfJlwLpVOmp1MEQAh+QQFBgAEACz1AUwBHQAgAAAFgSAgjmQZnGialiypvmgrw7Dc0q/N4qpu8jHfCBgsKQyOU4IBIJ5YChQE4jg0nSyqYfIj7gKKr/dXEAMBhccgIahdvekpNbCWR95nspn3LXfzLgF+gWOBg0NYen98ioSAiIciTgF9eziVi5eNiIWQljRCnI8+k6GSiaGlpqqpqEKTIQAh+QQFDQAGACwEAkwBHAAgAAAFkCAgjiQZnGialuyovmjbwrDM0q9d4qpu8jGfCBgUEk/C4TEJOAaYTuhyRBgIBAPCxDUtJE4DyHVAURJF30NBVFgEDmYggBCASEiF6zoqbrAeAX5RVzhag3ULiYqJgkt6OnwBWjZRdGSUU18PayMKDHE8bF91EFZITVMiDYBgDAqgOFJnSVG0qT61RreQu5hEIQAh+QQFBgACACwMAEwBFAJAAAAC/4SPqcvtD6OctNqLs968+w+G4kiW5omm6sq27gvH8kzX9o3n+s73/g8MCofEovGITCqXzKbzCY1Kp9Sq9YrNJgXcrvcLDovH5LL5jE6r1+y2+w2Py+f0uv2Oz8Ml+r7/DxgoOEhYaHiIKMeXyNjo+AgZKTlJuRdRiZmpucnZ6Qm5+Ck6SlpqekoZirrK2ur6CuulGktba3uLGziby9vr+wu8CzxMXGzsKXysvMzcDJjsHC09TU0GXY2drV18ve39De7aHU5ebp45fq6+zk6Y3g4fLx/3Pm9/jw9Wn8/f377vL6DAbwAHGjwYrSDChQyHKWwIMWKthxIrWjxF8aLGjc6btHj8CDKkyAkBSpo8eXKkypUsW9JACdOky5k0a9rcEDPmzZ08e/LMCdOn0KFEPwJFWTSp0qVMjqZkCjWq1B1OZU69ijXriqoltXr9CpYD1wBhy5o9y2As2rVswaptCzcu1Ldy69rtSfeu3r0s8/L9CziL38CECz8ZbDix4iKIFzt+TJUr5MmUczSujDkzisuaO3v2wPmz6NEUQpM+jVqB6dSsT69uDdvz69i0K8+ujdvx7dy8C+/uDZzv7+DE6w4vjpzt8eTMyy5vDl1rAQAh+QQFhQAWACwMAGwBfgSAAAAG/0CAcEgsGo/IpHLJbDqf0Kh0Sq1ar9isdsvter/gsHhMLpvP6LR6zW673/C4fE6v2+/4vH7P7/v/gIGCg4SFhoeIiYqLjI2Oj5CRkpOUlZaXmJmam5ydnp+goaKjpKWmp6ipqqusra6vsLGys7S1tre4ubq7vL2+gR8eDZcKIh8VciEey8wRgMUlxMbIv3fFx77KIEwFIcElIiHUQxXBClnlIud/IMzM65IgJCLgJttT6fBp2mT8fxXLJLii566gMCHm0JhYpg9AwikKQBRgMoKhHHkhMgasosxduHtgoHFR4A3chxAg4VQ8WM3OymFmQKSEBNDDhyUFCH4wsdNIg/9lI7L89BD0D8mMHRs6Wmks2MwnQ4uq8Rem5s1/GytFnLhE5pFgTZmFXffQzEIPIYqUjVJRqc9pdmo6o9JxJ0G0Ar+I1NLOoAeucK6NazlHsBmAJirNe1qkIrYk+dCtBcYy0jLGUiKvoRpmMSC5ltritMlkMsLKZRYGA+wQNZR5brH+5eghZV/MWPZiyUmUWoUGuAkLV3JtlDKpA1034j33EOdRoBUrL1J8ienWMM3WJkrkehPvfqJPed7uqhfdV8oPX2/lZ2JRz1mBT8Q8UfxQ4ifNB+C+9HRzFYwA1giDDdHAPOAQOMVC9Z2mD0k6KWjgWX7Bw5s7rBUhDz0fSDj/RFsFCFiPUhCa5KER+UXxXIMAhHhWONkJMVQz3Zi0DnoA9JUhRXhxIyKM1BXUIoLqAOBZEdFdSCMSLnIYglIujihFRbiBk1cEG9Yz04wbEUlWNCWGU2CLP54UXFdE2nMlOXUFJRo5AhJEQowtGrQjYvwRyViYJ+lTkl/vZVYmSs3JOOiOQmRpjEQGClkjOOt0xExaRngDlkHw3HWZEoqq2V1BT+F5IId76tkaFXw+mQR4inpY3RGwETFqggVeWpsEP5IARZgGmWfrNrkOoemtSISpKhEg5gplsM+lGtsSki5DaWMHKUtFk9LS6YRcWc55RAWKlhphre4AK6cUChwq/yuFmA7xZ69f/bddCTwt85i79r6IqHXCtANSWX2FlVWe9drEE08WjpDRWYhWkCZB+lQU5wg/6hNwwSLsm+JrxJIzsE4FIxeiRsLUMw9X6A31bBLqLTFjwdOSSTI0Gk3Un4Y9yvznvjPeFTN/4ND7zc9M/ES0EA3WBfOaCiNI88mntUMCxWANdqFdA3Ns8NBrQhOWCbEKmy9BIjfNMIpAlRCCwvUEOXbORh4s7cFHcwMyQWteXXBD6dgbNGAjO21yCSg3Tc/R3shtzMHwYIQgZn0HvQxrjieItggSsw13jm+vBJHfXCNxncNvF1mnCKvONmTplCscDNsnnbTrwd/QPf8tCK4TpUzstyNl+RFMFZySxMZQbZHH+YLD2cuhR9GA4ZsLQXyHFU9xN1BTpC3wTHrXM9jFl7ZeEuze1A2Z33tPWPBOjLuruO3xutU8NCntDphjhX53kJUOIgu4Mkejx8rEhij74StjH5LWlXanof9FD2lZU1HHZDSwBjwIHIiq3Zh0Aw1tNaEm4jhfx3jDGA0iCYNqmSARJvct7xmINSS8klOgkJNAUexGOSsAo5A2QyOYMIUTfB0RANicAgwQMjtsUQ+VmEOIyeqCqjMCC084MN7oY4kSUACiWmYFIpJDKSCwWNaSciXM/DB/iXrgEFUIKzYKgYxIw82RiFATwiH/Dx70a1QE9ZcSLabOLQaUXlZQmKMRgCQrgcQVad5CjzN9cI+yYsaZ5kiE8uCRkABYScwYOATVcOVCKcGiH60gNeApEF+B2hUU9yXCjvXlijmcx8+SSLqjzciRnBIj6nw4nUouUgnXqRqychjBhXgQmNViyXw2JsAmTJGOz3wjdwRpR+St6XJMgqQTnuPFJNxHmIzkoRudAA20AHKajTLPp6pZhONAk51FiCa+zCdN5ABNnc7cJc3SGByjxQueYksl51JJyS74M4347CC0xinP09nTnQRsAhdpg0teAu54/gGoEUqphPu0041WfEJBHfpRqXSTmtmLon+gJE9ZnoYr/38Dmq5ORy5tRQUd2qQgOr3pxmYOUyqee2deoAHDTUWUCxylFjxZ1MVxivBn3UwqBFWKzbfsVAur4eUApSo6ednzZpmMXjvsudI6vWc/mBTbEQHQUK7eM4EzteYS0ipXunSsRKzkZ7zqJpKakDUKAQqaCRpCVwk0NBh8/SXnzNdQw/byqHV1Aj2QwRSBKOOYFIzrp+omxEiq05IhyRlEP7VVhlL1dA2R2BpJMKaNKpYKoJVg416LTHqm0bYeXSNj3NpRNtYkteiEFBXHg5bWxs+1mqXgVS57z9n8pCgVIdpYrZrXJzCVukwYKY7eKsjkRsetWJwHa5H6wOhS0bhR8P8XYDGq3E4qZx6YFUJhh1Jd4irFpyyjrVbjtaVfRssgtt2vkQ6yFl41Q8BzPa1qh9vd88rqXQeuql0rtFEiSZK/xQoIYrUALrIhzy+Hdepk1ZqExub0dCA+bW/P8bqDMOjBw0pu1LraX3x2xFtXmJU74moaY2rIwkaV4mlTZF5yDC2vvJXCjZ+FLXfAY8F8ZEKS8eXIkYb1rzwNlTyLvLETM+nIZaUWWeujXm3cikrSTDHRhsLhE7M5u2wMGIjh6uCwEo0zFwphFnhb5MhCQVEXpqE8GzQsUMFYcnt8sxWa/A4EI2HKJHY0dwfMOxJY2tJY/mNmsTOEvpiAYhTbI37/ldDQ+EQHylMdh6dBHVQJE7cpYEuiOMMBait7h2YR5svxnnbpXu8IPAu+7gqH7Oa59frS9R1mA2pYjG2MOM1To9hJIy3p7ZLEXhVto9pqnbNRS9NCQ+M2ZkxM1T53unaxgfTssH1Nr63NePAYbZg5pUYqw5kx8r735QrEZVF7oAogQDcylWJqFhYjLZPFC3MpfexLkxWsVxC2Z5swUk023NIJJCsirzrgpwQ8W1jgM8e9jKJwS9up2aRqg4zd8OysGtSJ1u+uTPJuEd33sQidN2mDZJ7cRsHHL31ISId9XCaUmuMNQjVNp0rYf7dQxdtE+YBn2dNellMmMqcNpSTe/1XMkpnjw77Tibme3tqot7i/LA9rXrXzoqeTSTeegtojyeP3Vmba6j0Cufmtxmu7Ud2AjfunZhK2jj9hP4tdaJXdmG/F75taW/d3jh+n6ZKizek1rOGpsmp4cmZ9vdEgzuftjTMZG0HpEdg4WT3q92w/urwjhzpPqe76pQtVvjhHbTydzvMrNHJNhaf26wXq9ra/vcFZADrQ2LaO6d4+hWtta/TAivopOt/P2OfYmeiKZknr8R4u3UK/Ze/9nXMeCeRe2cY+x7aJGFO0roG48I3fXhGu9aN0Amv4jc9904q93PXGORoFeJkhXBCET5x2W4eXe4nXW4u3WwFYKb61Zf/dphwkJ2XaZBpuBVbgoF5tARd2BhWjZ13FhoBt9BTbdXqxRw3ghXI6Qkqwp3Hkp3cAWHu/1XszRm/2tDHyt14IaBrehlw6N38cdIE6NxaCZE99gWFGR2xUtX/VpzpKt4RPh0YiJXUR9HvlBzShp3tasH/7N4Q4QyCj11hh2EYB9kglIUjXF3xTV3z0N2ldp2SuEVWvNXSOlT9aKGT/x3f0xINGCAVlwXZeyIXJloD0hltS13luc4iMiH19doZUGHEZqBy8kSH712I2sx1xRVSeZ4L4UIIUV3XxdWV15onIExzr9wQiJ4PoVYW7Z4PYs1qdNnpTGHOg+IlUdHNnomj/lQd9OChNH6APIeKI/VOLu/ZMAdNG93JNkLVG99Jqpvh8vriMsGhXZ7JEj1JCVldBgfgho0QmGMUbJwIccEhF3VdiKrZPX/RO5Vh7DrEQlHINd5dDmnSOx3h80rN2jbVQcRVYTRSQlaGNlEeDfQh55CBrE5WKV1COz3SDMuJE7jWMV7Js+Fh6jpeRvESRL4QEI0VkOZNHEfmNLaKQlXhO0fhMu7NLqReGu0OMJ1J/uyGK+uZaJZAS4PJkK+he47U6I2SSLAlbMVhnCwgS2wh6lfFKm7V2UlGNMRWMoVgZ5URwNvGKMTRwcIgjSqMvP+caQZMpc9NIz/ZO9LJ/6eI7/zaRESPgJ6UTMTt5GmUplrzHH2YzN4b0iifoMqWTd3RJMiexNoWilSPIS6ATZCO5NafUl2kCmNp2Wmfpl2qpS36zOeWENRGobdnxlChmaR0YktDzl/Y0GWwnWGkSleGgGlLlbrUjlXt5V3WZlmuzDiA5LZWZPtqGNcaIe8ZQmmuEmBpBjJdyKXj0mZhWKLgDmWuDbyAlbYu5lnQUnBJJJsQZm6mGkAnkmyRZm9HJH8dJJMnpQIhZAm6ZM2dBePnClTozNNRZBcIWIBmhns4JJ9MZn9Q0OB0TheOQZ6qRjtrJXqw4nyCBn87TmrJ4EuHJPcH5IuskN5Pons15fx/lm/8LR5ZgIzsSiJi/Ji/5R1vO4oz8AlyoYS0LAXc6EaAppkKtkiECijQiOpe20i7aVzSm0mz4AmKDsV2dBRWWQnMN0ykegiB+0VpDcTRM4Rdb8id9giI+mpsDFkUIkjdtMhHwZ6NxyHYbIielqASP0iFS6l1/Ig7BFybbQKP1aaTVOUwxA2jH8mhLIwVXKiUVdjjD8CaVBDM/mmL5eaJdqFtHUE6AEqcmMg5+CmBnSmeHxqUkiSUWtqZlahAzkaJukx0v4RNIuqaD6g6Z9kh7qkd+gRyXumNIMihJtKIs6iS/8ZOLCqFkOWdJSJQ6SioRQKYkqCvdkm6LCZNDw2K896n/s+im9eItdEpHSIoZnYKX7HGsyFoIypesppCozPqs0Bqt0jqtaTCW1OoJZHet2rqt3Nqt3tqnzvqth2Bu4lqu5nqu6IoL6QJDOZquiiATN2qA7jqv9Fqv9soJXmNpZ3SvhHAx3smvABuwAjuwgjAqcgKPBBsH4FKpqpqwDvuwEBuxEjuxFFuxFnuxGJuxGruxHNuxHvuxIBuyIjuyJFuyJnuyKJuyKruyLNuyLvuyMBuzMjuzNFuzNnuzOJuzOruzPNuzLWFlPhu0Qju0RDsGQFu0SJu0Sru0Wca0Tvu0UIu0Rxu1VFu1VruyU3u1Wru1XHuu6XIpegaoizIYlJQO/2nYtWibtmr7C93jn5FzF6xBSfLooWtbt3Z7t7YQRr4UlJSWHTpkecJohXg7uIRbuMlxUbl3JIlkuIzbuI5rHEk5gp7RoI9buZZ7ucsRJwWhk5n6bSqDuaAbuqL7DDQHamXReBIYPKO7uqzbunCwh9+GL53bpMcAu657u7ibu232g5Freh+FDaiou8I7vMSrlwKFh8Hrkfc5mMXbvM4ruhDZXLDEkTISt0F0mc+bvdrruFuJFhPKRIXJljgJDlm6veZ7vnZbq4oERnZKtigarugbv/I7v/Rbv/Z7v/ibv/q7v/zbv/77vwAcwAI8wARcwAZ8wAicwAq8wAzcwA78wP8QHMESPMEUXMEWfMEYnMEavMEc3MEe/MEgHMIiPMIkXMImfMIonMIqvMIs3MIu/MIwHMMyPMM0XMM2fMM4nMM6vMM83MM+/MNAHMRCPMREXMRGfMRInMRKvMRM3MRO/MRQHMVSPMVUXMVWfMVYnMVavMVc3MVe/MVgHMZiPMZkXMZmfMZonMZqvMZs3MZu/MZwHMdybAkBUMd2fMd3PMd6/L943Md2vMeAvL9+7MeBXMj3O8h9bMiKPL+IjMeL/Mjn28h5DMmU/LyS/MeVnMlUOwABUL5acMl1rMmi7K0F4AACwKR7wMnlW8oJILhOAMoBMMqyrK0G0MmOoMpOQAD/AXAAVgDLs/zL0aoAATAAj4DLTpAAtkwFvgzMzIysD5DMjWDMTaDLBlAFy9zM2EwYpXzKSaAAB4DMAjAAB+AWDfDM4YwAd/LNAlDHB5AhB3DH1dwAnBzOMIQA85wA40wEuKzO+KwxdYzKAHDN2TzQvUDN3WzHAwDO1VwEC1DHCQABpuwAMITQEF3HD8IA/NwA7AzRsrLODs3O+hwAuhzO4NzKSPDMnmwEAk3QLI0LtUwASYDMME0O7jzMgIEANk0EBMAaI/0tDs3LKMLOgGGRQzDPD4Ay6zzTRoDTQC0FK93SUD0LpsxK65xX25whEF2KyHwnDu2h77zQMR3KOh0A/w9QLDnt1KAc1WpNC/+cBM+8AKzE1Eu9y0vQ0FBi0UiwzgPEyU0tI8PMJGKN1pe81oT9ClcNGUaNKLWMyGDdIvZsynd81wHQMG2tBNLs1yYd1LE8BU9d2J5dCodNHCXdEBAtzgtw2qi9AAhgIOuMzwjw2sgs2ZS92ZYNzZgN2JPN2Wn92bxtCqHtMhWt1ADw0kxgysI93LaNNJWNJMuNBJfNH2fN3LQdBZ3d29bdCc29BD09BHKtpdEtBM/dItk9BHhd2/n33eQQ2NS929fd3p5Q1da13L/dp39NR+Vt37l9BA3d10fw3MKc2dRR34Itye5d4JzwzMdNDjs91g7A0MbDTIwM8En3/d/JLd757RN1jM7tWNTJ/d9I0N0D3sgGPuKYAOLS/dAVXeGL7QAQkOKA8c7DXNEHUMvwQAAzXtHizADaMtIfTdYhfd4A/uPKzN4kXuSRMN86/QAl3c7Fos7hnM+yguPDgNPwgMhBjjROjs/64N/obeEAXd1GHuaJkNWhYNBDPthinuaMoNHEDApbbc1EruZybgg0/glmfuYEPud6XghIjgms7MpNAOZ7PugXK+iEfugSa+iIvugJq+iM/uhjEAQAIfkEBQYABAAsjgDMARwAIAAABXIgII4kGZxompbsqL5o28KwzNKvXeKqbvIxnwgYFBJPDeAAcAwkHVABKvpgHktJyO1Kym6J2IB2xx15yeCu+AsMj39p8xrdVr9dZdEZXpffh3kAe3hxenN8PG5siXaLOIp0jEaBNk1CgIU6lpebk5mVRyEAIfkEBQ0AFAAsnADMAR0AIAAABZYgII4kGZxoqpbsqL5p28K0zNKwXeKvbvIxnwgYFBJRwuExkAQsmclnUzpqPByCBKLgWo4WpwQE6+A6vYBDAGKWINaT8xFQOLVF44acCHgfWH57QGk8BoI8AGMDC4yNjAiHOIQEOlKBNlJ1AXc7aAZrCiQFDFxUEZ8nEGN2kTVVBwkCWQd6rTlRaEa5Pqa8u5W/mMEyTyEAIfkEBQ0AAgAsqgDMAR0AIAAABXwgII5kGZxompYsqb5oK8Ow3NKvzeKqbvIx3wgYNOR+RBHhwDQGIkggq3CCuojPEjV7TWqrUd4U3JV+uUPsGC3CsgFba9pLiodxa3lbfdYD3Hl3NIFlYn2CNYeFeAANCEwQJw9MBAV/fAM8loBCe3RCnJ2hoHyipaSfPm4hACH5BAUNAA0ALLkAzAEcACAAAAR2EMhJabg458qn/ljXgaDIkZ9ZoZpqsaErwSHxKSsdAMrhH4IA7kXjJIS5YuU49OiMSCIM2pw9l1GnksJMTrFVgG4H9rKoZhRaei6z1W7tl5u1bifdN4lHYPiCAwYHCAViTwwsBIZ3KmMydnMujo+TMpWSV5Y6EQAh+QQFDQACACzHAMwBHQAgAAACRYSPGMvtCY+bLUaKLcRUJz55CliJBlmaKGOea9ACL9zOsV2/t56vu99D/YRB0tBYBB2VSc7S2cxEO9NQ9XFNiXAqXndVAAAh+QQFBgASACzVAMwBHQAgAAAFiyAgjmQZnGialiypvmgrw7Dc0q/N4qpu8jHfCBgUAognowgZUB6RTmYUiggUDAJH45ptLKmBxGBxgjiwgsiTCKg61GYr4BBQrIHtwEFUffADBHc8eYGEf4FSVV6Ghol1h5COiwSAkWCTlY2XkJpsipyUiJuMoYI4eZiFpVJKrEauQrA+sjq0NrYzSCEAIfkEBQYAFAAs5ADMAR0AIAAABZYgII4kGZxoqpbsqL5p28K0zNKwXeKvbvIxnwgYFBJRwuExkAQsmclnUzpqPByCBKLgWo4WpwQE6+A6vYBDAGKWINaT8xFQOLVF44acCHgfWH57QGk8BoI8AGMDC4yNjAiHOIQEOlKBNlJ1AXc7aAZrCiQFDFxUEZ8nEGN2kTVVBwkCWQd6rTlRaEa5Pqa8u5W/mMEyTyEAIfkEBQ0AAgAs8gDMAR0AIAAAAkWEjxnL3QmRmyxaSm3EU0PueApYiQdZmgC6qAYbuCsrwzXt2jmu6j1v8gWBImGR6DEmkRplk3mBbqQf6gh1w+60P+4QVQAAIfkEBQYAFAAsAQHMARwAIAAABYwgII4kGZxompbsqL5o28KwzNKvXeKqbvIxnwgYFBJPwuExCTgGmE7oUlRgDASChKExURKp2OwV6QWKDgFDZFRQuKaGAFcWBRACiQJ9Oomn3Tt8AAoQJwlzZTwsVScLb18tBSeAdS1Xc3UIeiMKJ5t1YRCijY9mgwYJoYhNgjqVPq+urTaxtLN7kEZHIQAh+QQFEwAGACwPAcwBHQAgAAAFiyAgjiQZnGiqluyovmnbwrTM0rBd4q9u8jGfCBgUElHC4TGQBCyZyWdTOmpAHIIBouBajq6BAVigUB5FhzAXfeJKC+3SI4BwegmBB6uhljLoLHBMfoAlghSEdYYnEoR6JXwJdmeHcoBUaQNrEwhxVBFjY2WTRCRWAmEHm58+rDquNrAysjNeUbZGSyEAIfkEBQYAEgAsHQHMAR0AIAAABYsgII5kGZxompYsqb5oK8Ow3NKvzeKqbvIx3wgYFAKIJ6MIGVAekU5mFIoIFAwCR+OabSypgcRgcYI4sILIkwioOtRmK+AQUKyB7cBBVH3wAwR3PHmBhH+BUlVehoaJdYeQjosEgJFgk5WNl5CabIqclIibjKGCOHmYhaVSSqxGrkKwPrI6tDa2M0ghACH5BAUNABQALCwBzAEdACAAAAWWICCOJBmcaKqW7Ki+advCtMzSsF3ir27yMZ8IGBQSUcLhMZAELJnJZ1M6ajwcggSi4FqOFqcEBOvgOr2AQwBiliDWk/MRUDi1ReOGnAh4H1h+e0BpPAaCPABjAwuMjYwIhziEBDpSgTZSdQF3O2gGawokBQxcVBGfJxBjdpE1VQcJAlkHeq05UWhGuT6mvLuVv5jBMk8hACH5BAUGAAgALDoBzAEdACAAAAWGICCOZBmcaJqWLKm+aCvDsNzSr83iqm7yMd8IGBQCiKdSgyC5IUuDwMRJ/E13z1EhSQUCCFFaRISUgiFhtPpaJm2lXZ6bi61q6T+76H3Ne+9wdX97eC5ZhIF+coAFcThQAQdNio8kDShoDg1khyMKBg4nAwqcekJtRkedPqhGraerOq+sSCEAIfkEBQYACAAsSQHMARwAIAAABYQgII4kGZxompbsqL5o28KwzNKvXeKqbvIxnwgYYwQOOuIJYETalAHmMamUOmVQK5WoJQwEDkPBVW1CAoPvclg+JcaAQiLwYHONAbhIsc4aByUFJ2N+Uzt5AIVXZIRti2yNd4Y/kUBah5U8l5SJjjeIip+ZOEJ2lqVZqFWqXKynQqmwSiEAIfkEBQ0ADAAsVwHMAR0AIAAABH0QyElpuDjryqf+WdeBpMiRoFmhn2qxoSvB8VQMjrLSV9Vchx2vchPoXkMZgNdTMgMzDUFIkxAOhocgMEVWK9quh8kJU2Fl7pmVFkeTFLMXDVbP2XX3kpxfo9p+JQUIWFoBCQsHCA17QwosC41fMk9KknSUfJlwLpVOmp1MEQAh+QQFQgAGACxlAcwBHQAgAAAFiSAgjmQZnGialiypvmgrw7Dc0q/N4qpu8jHfCBgUAognowgZUB6RTmYUChgIJIVD4nQoDKlWQuCB0CaX4FODFEYTRVZISSx/vqsBREkRSLiBcAFrJHx+doB4gyOFfzyBiiKMh46Je32NOI+WhlKVhJeTmZ6LoJ1tn5xURp2rqkKsr64+sLOyOkwhACH5BAUUABQALHQBzAEdACAAAAWoICCOJBmcaKqW7Ki+advCtMzSsF3ir27yMZ8IGBQSUcLhMZAELJnJZ1MqKhwcgQQiwggonEuRQnCCDLIQL/ioPEyqidNXSshKSGO11BDY7vRhcQ0saXOBaiV8hmyChIBsC32Oi0QAdQl3IwVya5WbAW5wnFR5AmZoj5VVV1luhZ1AOmcFsDw6ZLRULXl3UgV+mnEMSmylEAtndsSqCgfJDg8EmbU4U0shACH5BAVWAAIALIIBzAEdACAAAAJFhI8Zy90JkZssWkptxFND7ngKWIkHWZoAuqgGG7grK8M17do5ruo9b/IFgSJhkegxJpEaZZN5gW6kH+oIdcPutD/uEFUAACH5BAUNABMALJEBzAEcACAAAAWUICCOJBmcaJqW7Ki+aNvCsMzSr13iqm7yMZ8IGGwVTkNiwHZcApROI/KpZE6hVidW1Hg4BINGM0kUMVCQ7+FaVZwII0IQuwgcSgZ2WRBQlBp6QE0FJWNURIYkhlCDhYE8EnyEJG5aVXUMeI84AJUNI4CbNGYnCV4BCHxkQKBpAWEAA5ZlQoestVtCuT67Or02vzJQIQAh+QQFBgAMACyfAcwBHQAgAAAEfRDISWm4OOvKp/5Z14GkyJGgWaGfarGhK8HxVAyOstJX1VyHHa9yE+heQxmA11MyAzMNQUiTEA6GhyAwRVYr2q6HyQlTYWXumZUWR5MUsxcNVs/ZdfeSnF+j2n4lBQhYWgEJCwcIDXtDCiwLjV8yT0qSdJR8mXAulU6anUwRACH5BAU7AAgALK0BzAEdACAAAAWRICCOZBmcaJqWLKm+aCvDsNzSr83iqm7yMd8IGBQCiCejCBlQHpFOZhQ6UhwSAsHgoBhSAQ3UABsweIkjLEEyKhTOQO979n2U57tvYXA64OE8JQ1kEF2AOC0KECdrS18sBEmOaDIFJ3NSAAUEf5EOhzQilgEJEIsnDaA1IgQPhH4ujz6ZRrRCtrOyOri7ujZMIQAh+QQFBgAIACy8AcwBHQAgAAAFhiAgjiQZnGiqluyovmnbwrTM0rBd4q9u8jGfCBhjBA46IgpgRNqUJ+YxCZU6ZdCAlarcEgYCh6HgqjYhgQE4OjSfEmRAIRF4tLvGQFykYGelAyUFJ2R/TTd6AIZTO4mLV2WFbpBtkniMP5ZEW42aQJyZipOInjxCd5unf6pVrF2uqUKrslAhACH5BAUNAAYALMoBzAEdACAAAAWWICCOZBmcaJqWLKm+aCvDsNzSr83iqm7yMd8IGBQCiCejCBlQHpFOZhQ6akAcggGi4KJGroEBWKAYUg9hrgiRXEILJ/XoEUC4iQBC4MFqpJ94DHUscE2AQABoBIRtUoqMhlJ6fCV+CXeIhXIidAyYPCJoA3JsAmpSnCdiAidlnzgkVqwJB5uoRrdCuT67Or02vzNeukghACH5BAUGAAQALNkBzAEcACAAAAWBICCOJBmcaJqW7Ki+aNvCsMzSr13iqm7yMZ8IGBQST8LhMQk4BphO6JIRKBQYCYHAUHBNAwjHgIGAnBRKIoAaOEhGhwAkDVyfuqMCsvmd76p8alRuf11RgzeAh22Jhl+EP46CjIWBdYiVi5BekpeUkZY8UmpJUaVLp6RGqKuqPk4hACH5BAUGABQALOcBzAEdACAAAAWWICCOJBmcaKqW7Ki+advCtMzSsF3ir27yMZ8IGBQSUcLhMZAELJnJZ1M6ajwcggSi4FqOFqcEBOvgOr2AQwBiliDWk/MRUDi1ReOGnAh4H1h+e0BpPAaCPABjAwuMjYwIhziEBDpSgTZSdQF3O2gGawokBQxcVBGfJxBjdpE1VQcJAlkHeq05UWhGuT6mvLuVv5jBMk8hACH5BAUNAAgALPUBzAEdACAAAAWRICCOZBmcaJqWLKm+aCvDsNzSr83iqm7yMd8IGBQCiCejCBlQHpFOZhQ6UhwSAsHgoBhSAQ3UABsweIkjLEEyKhTOQO979n2U57tvYXA64OE8JQ1kEF2AOC0KECdrS18sBEmOaDIFJ3NSAAUEf5EOhzQilgEJEIsnDaA1IgQPhH4ujz6ZRrRCtrOyOri7ujZMIQAh+QQFSQAGACwEAswBHAAgAAAFgiAgjiQZnGialuyovmjbwrDM0q9d4qpu8jGfCBgUEk+kAkExO5IaAUSTSCJEp0DS4Xo7FiAJmhRwDHzBjhMYTBg6R9vx7i2KY3la7pwK1//4dX4udAB2XYCFgm6IDAEHdzhPSIdZJANqD5eLlVWXJ49khDplQpt4paSoojapRqsyZSEAIfkEBQYAAgAsDADMARQCQAAAAv+Ej6nL7Q+jnLTai7PevPsPhuJIluaJpurKtu4Lx/JM1/aN5/rO9/4PDAqHxKLxiEwql8ym8wmNSqfUqvWKzSYF3K73Cw6Lx+Sy+YxOq9fstvsNj8vn9Lr9js/DJfq+/w8YKDhIWGh4iCjHl8jY6PgIGSk5SbkXUYmZqbnJ2ekJufgpOkpaanpKGYq6ytrq+grrpRpLW2t7ixs4m8vb6/sLvAs8TFxs7Cl8rLzM3AyY7BwtPU1NBl2Nna1dfL3t/Q3u2h1OXm6eOX6uvs5OmN4OHy8f9z5vf48PVp/P39++7y+gwG8ABxo8GK0gwoUMhylsCDFirYcSK1o8RfGixo3Om7R4/AgypMgJAUqaPHlypMqVLFvSQAnTpMuZNGva3BAz5s2dPHvyzAnTp9ChRD8CRVk0qdKlTI6mZAo1qtQdTmVOvYo164qqJbV6/QqWA9cAYcuaPctgLNq1bMGqbQs3LtS3cuva7Un3rt69LPPy/Qs4i9/AhAs/GWw4seIiiBc7fkyVK+TJlHM0row5M4rLmjt79sD5s+jRFEKTPo1agenUrE+vbg3b8+vYtCvPro3b8e3cvAvv7g2c7+/gxOsOL46c7fHkzMsubw5dawEAIfkEBY8BAgAsDADsAUMDIAAABf8gII5kaZ5oqq5s675wLM90bd94ru987//AoHBILBqPyIByyWwin9CodEqtWq/YrHbL7XpJzTDzSy6bz+i0es1uu98AsRxOr9vv+Lx+z8fLxX2BgoOEhYaHiGp/YYmNjo+QkZKTfYtOlJiZmpucnZ4vlmM+HRYXRxcWGJ8tF6SpGKYlqBtvszYWFhGrJgW4u5MctL9ZGxYaPqFLABi4sSO4uTGkzkSoqsMnGtAYzNQizNEpGt5Z4Lri5M/hTwUaFWy9GdhX7uwZHe8pFcUcKvzBBVBoYMYBlgQbAzMUTAeglUIM9U78w/el2LEeyZQsw9WBBCpcAWFMO5VqnolmK2z/qSjZRWUKlid8RdHGkEw8k1RQXSyiDRpHb/u2oajAwSfKEuAUcjw4I6lSYSR6+rxGgqjRUl4sIsu4seiEEf9CvhhpxBrOEVa/3uhFFR7Mk+uQkFVz82wUnUb46WUmtiHHniiYUZzwsa82DCE/1lRxOPFREYoPWu1IQvC5wl20YuQqGOu3e3FbzK321mTdG2bdpI4Z2kg3t/LsQtFMBOQyqCYMpjWx2q8F3ArzAYjXT0ZwtNDUbqSWfETvj7iJGduabPlvEUQ3kDKctAO50REsV53Y94VZh/e8HfdYGr1BZrzIz8CbECBSn6akUsau/aG3Vk5N5dxVEgUoVAkF/IPB/waLZTMdCsd9dGAJ/V31lm3/pLMPQd7JUtpGiRkITYMlaLMTXONx+J9Rw7nCQSynQWZbEO6l4x5E54zgVH5F3SOCNgHVN1gM6w24H30cCieSiA96xOGJMKQWDAu7UeiZCEqF01tS5bGwZXNYxvYjNDsVwxyYMdS3EEJJ0vZjd+q1ZlZGAYCokIwOidUjNz1COZp4I+jXpwzWHJbUmeU9t01RgEW16GPmvQJRfdS00p8Ge0H1UUFJOscnOJ9C1Y525uT26UPcRCchn9dF2SovLI3aQalWshoqcqVw1KOqqHYq437qhGjrp12q4BKCMG1qK1PDYepKPLOKGSNmQFiFav97j84IGanGZDpmQhok1BoLRf5qJDfhukKiqd3xGecrShW7Ql1TzvshiGCVst2YzhR6pQs0GRmwX/tZ5RJf+eZpnK6DymAtq27ueaijUAKgFZ0gFvMOfAhDVmm5c5HSlm/9lsvKiPmKCYDJLsVTskwyvjhgBkqe/FZvM/+LnULOuHxCjEPda6QKr6FVM5UwhetYdMMJbS6Ej1Hb9JU+P11Za0WtGzRVSlvNM64VdzXyztJ+HQSg4b36o5LFMO2bzgD0hBs/Cyea7FIzKydNkybQnXKOLxSFT70qVEnCemYNbGawx4p2ZXAq4fWZKTAhXtLAUX487gpoc+iR5sIZHub/OxjzhQrlqnSMwuIJi+D31d6YGENkuOoZmkttlwizBFnrHrax2k7OG95WRgc02UgXhwLOJZg8g0zQyVjx8eyNrQ6wG5Wp9tSXfegVXFpDHbMwkudeVfCfqXx+bAYLwbzz658QvQmN1k7k7XeHVrQMbq5fMym/K1yPUnE0ZCnvcOH4nuLCoZVeuM0fDJyOA7/1tMolMBqYswHrXhAPJcUjgK7712hSU7po/OZ0+EqB5IRXv6EZcHbo693oPLQfk1WJeWyBofritj3fFNB5+8NV8oBnvXxhrykYbM4GhahCp0HKYq8ComfehzW4wUCBSvSMFL/TQ+SlDYQ5CKLwViIn//SN6YhmXAHL8ke/Ljrud8xjHsAGaJCtseaMePpMQFIzwSsm8Rp9HNh6YCYTbZDPioTiWwsOA76U8O1YrCthQDqzsbigxycnmobUcmahHTrygGNcGf5qWEVOWgiGY4vj7phopemt8mdOIxjniIfESXIASCmMXxOLGAH0GdKLrbRasBpZg32hKxodM2P/lmNHtAEhSxaiRoIMJL9X4lGXfrTbkT4kxzcKRJHcq8FDnshKBIaTh7EwZjj6OBZkrrNVsqONBc+ZQfO4iEx7YxoAZXFPXEDpezOMA2f+WBzVPSldfAPgU9oYjFk51KGozM3jRglMc46poQ+FqKtSyc17if8uhK705At36SVQyWsFNMkOXpxHPRfeUXfC+CgUL0LFLsmQBik94XQG2ZplihE5C0LTM3+TUYc6oycLClcLXQrTkWaThkwV5pJ+t8xebq4FU9pkOYO1vwVe4JdSZYGZwCpLv4zjQlQJTlcRSbS/IBSMfQNnxNwKLt9ZTZJTu0jHzDegTHKkfdXTAfP2t0ZSAi6g3UzkDlVZRGWCs6XYrCYv+UlOLxkDhSXxaCwT68so9vRBNSUmDXSCWVXMM64V+ynZSsFIIcDPf80rYxHJWlEXFDZvUJ1BVWcKq6uSSxgAderVXpdCQ+4OhTAwrsqQO4v9sbMzbdnXkngF1+Kh1oj/eluhFzeI19uEJJmhkd3ffBjYHAx2ouXhqw1hllgdHpa2YQ0o7Ho7RJJGSqQCxJQ72RnZ4fGys1DZYljNeNMZ9EK/lWSngO/DkKo5sweqZQ/TrNlN+E4gjWRMLxubqltwhrW9v5Vvfy8aF8tBSrr3FeE7aSleHgbvtS8Jr4eJKNGdmKzFrQtud+8Tkhgm9EqtVUf4aMxgHelMcNhlzzrMhponFji+UDSeNSG7VQ+Bkly+hVB/vuVhKnNWxlDhKzBxplVm3mLLPGyxmM85X1iWja0XuMBJLTvZNFtZst+ccJYlGqdtHpmtKHWjVf43Y8Z8dwNB+e+V81reMKlMpsOJ//NhHR3ZZyFQLB9sdOD0V+igqc8qNua0KxHNtx1f7btNCsqPqYE22jlnyGVtMxSjSzxXE0fJ/4H1poRD3KjiqmeMcnN9Pd3g2MXyJeKxhhWpDFjhXnNqwE61P3H12TqTK9mviHa0wxbhaaXR1UgckgS+ituYyTbPznYcrfPHAV4fG3g0Wx0Bx3s/aGIY0oDaZE/yUTWGFmvfZDuRq0V2Hz/POTCpdkV1C54jz31mJ6rmNmj6YmojO4ZhEFn1+qghKHDo2ntUUwqrFIXxVXa8skTkkFI4zq17bGAcVuYULUcMGWeBSjthm4jqiMMqQKfA46sVzldb3gGcT1rn8RYxD/9xoyxfhRDjKKY2nx7Y1olasle0HLqLig4loPU6ZgXkYLuUcjV45WodNqero/Q8aXsxLAMusYbKUW5bjN/H7HCfT7R+oreai0up7xpgaq9yRHpL1ChH1I9QHS34+ZCJIDh2wcMg36SmpyLy5i5BxfWIlmgZRLvg2WQ7bg7rWBtZmvJpj4uO8crRo6r0iXP61CzEtESnh1npTl8ni3UjwN0I0TQgLq26snv5dSdHAEbQRGykIgngMj57Kr3wHesf3PfTJ4A7HjMq9nW9Rz+9nYLPagm/9txTKfXbVIWQwp7cm/Pm5uwPHNWZxP1dMaRGuL8PCPFv/JW3PQJCUnqdJziuPLJwcTVt2jVNX6Ndv6Z5XIEN1mRe7yYbFCgFEaYJIJYGhKMFGFaBuSQCGDMMkIYDGeiBJmgEEbgJJXgGkrYFK4gT/AWCD/gLFiZYE3iCOAgEDOgJL2gX9YSDSySD1aGCK+JzNdCDOZiESkhnS9iE/pV/AjWEmoBUe9dpTWZtTpiFWmh6W3iC+lVZIaiCFRIMAhgpWNiFaIiDSJiGnxAWKBCGbBiHcjiHdNiFdBICACH5BAVkAAAALAAAAAABAAEAAAICRAEAOw==", "text/plain": [ "" ] }, "execution_count": 9, "metadata": { "image/gif": { "width": 600 } }, "output_type": "execute_result" } ], "source": [ "import urllib\n", "\n", "from IPython.display import Image\n", "\n", "# Get an image\n", "request = urllib.request.urlopen(\"https://raw.githubusercontent.com/neuml/txtai/master/demo.gif\")\n", "\n", "# Upsert new record having both text and an object\n", "embeddings.upsert([(\"txtai\", {\"text\": \"txtai executes machine-learning workflows to transform data and build AI-powered semantic search applications.\", \"object\": request.read()}, None)])\n", "\n", "# Query txtai for the most similar result to \"machine learning\" and get associated object\n", "result = embeddings.search(\"select object from txtai where similar('machine learning') limit 1\")[0][\"object\"]\n", "\n", "# Display image\n", "Image(result.getvalue(), width=600)" ] }, { "cell_type": "markdown", "metadata": { "id": "boEY-GSUsi_L" }, "source": [ "# Topic modeling\n", "\n", "Topic modeling is enabled via semantic graphs. Semantic graphs, also known as knowledge graphs or semantic networks, build a graph network with semantic relationships connecting the nodes. In txtai, they can take advantage of the relationships inherently learned within an embeddings index." ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "k7eRzturtCwr", "outputId": "794d10d6-8463-4e8c-c1d1-0af59e97e59f" }, "outputs": [ { "data": { "text/plain": [ "[{'topic': 'virus_confirmed_us_million',\n", " 'category': 'health',\n", " 'text': 'US tops 5 million confirmed virus cases'},\n", " {'topic': 'collapsed_has_forming_ice',\n", " 'category': 'climate',\n", " 'text': \"Canada's last fully intact ice shelf has suddenly collapsed, forming a Manhattan-sized iceberg\"},\n", " {'topic': 'along_escalate_mobilises_tensions',\n", " 'category': 'world politics',\n", " 'text': 'Beijing mobilises invasion craft along coast as Taiwan tensions escalate'}]" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Create embeddings with a graph index\n", "embeddings = Embeddings(\n", " path=\"sentence-transformers/nli-mpnet-base-v2\",\n", " content=True,\n", " functions=[\n", " {\"name\": \"graph\", \"function\": \"graph.attribute\"},\n", " ],\n", " expressions=[\n", " {\"name\": \"category\", \"expression\": \"graph(indexid, 'category')\"},\n", " {\"name\": \"topic\", \"expression\": \"graph(indexid, 'topic')\"},\n", " ],\n", " graph={\n", " \"topics\": {\n", " \"level\": \"first\",\n", " \"categories\": [\"health\", \"climate\", \"finance\", \"world politics\"]\n", " }\n", " }\n", ")\n", "\n", "embeddings.index(data)\n", "embeddings.search(\"select topic, category, text from txtai\")\n" ] }, { "cell_type": "markdown", "metadata": { "id": "0VTB-LjExpfv" }, "source": [ "When a graph index is enabled, topics are assigned to each of the entries in the embeddings instance. Topics are dynamically created using a sparse index over graph nodes grouped by [community detection algorithms](https://en.wikipedia.org/wiki/Community_structure).\n", "\n", "Topic categories are also be derived as shown above." ] }, { "cell_type": "markdown", "metadata": { "id": "0aOJOxE3y4vD" }, "source": [ "# Subindexes\n", "\n", "Subindexes can be configured for an embeddings. A single embeddings instance can have multiple subindexes each with different configurations.\n", "\n", "We'll build an embeddings index having both a keyword and dense index to demonstrate." ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "id": "TOwwKw3w_eJG" }, "outputs": [], "source": [ "# Create embeddings with subindexes\n", "embeddings = Embeddings(\n", " content=True,\n", " defaults=False,\n", " indexes={\n", " \"keyword\": {\n", " \"keyword\": True\n", " },\n", " \"dense\": {\n", " \"path\": \"sentence-transformers/nli-mpnet-base-v2\"\n", " }\n", " }\n", ")\n", "embeddings.index(data)" ] }, { "cell_type": "code", "execution_count": 12, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "M0HKb9mzxkL-", "outputId": "16200bfc-715a-4dfe-89c4-cd6476c0425a" }, "outputs": [ { "data": { "text/plain": [ "[]" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "embeddings.search(\"feel good story\", limit=1, index=\"keyword\")" ] }, { "cell_type": "code", "execution_count": 13, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "-SnA1s0kxw9x", "outputId": "9f6d7cc6-7325-4ac4-ded0-aa0502d088e0" }, "outputs": [ { "data": { "text/plain": [ "[{'id': '4',\n", " 'text': 'Maine man wins $1M from $25 lottery ticket',\n", " 'score': 0.08329025655984879}]" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "embeddings.search(\"feel good story\", limit=1, index=\"dense\")" ] }, { "cell_type": "markdown", "metadata": { "id": "7vFe31Gax-0r" }, "source": [ "Once again, this example demonstrates the difference between keyword and semantic search. The first search call uses the defined keyword index, the second uses the dense vector index." ] }, { "cell_type": "markdown", "metadata": { "id": "1M_OMEndzgnG" }, "source": [ "# LLM orchestration\n", "\n", "txtai is an all-in-one AI framework. txtai supports building autonomous agents, retrieval augmented generation (RAG), chat with your data, pipelines and workflows that interface with large language models (LLMs).\n", "\n", "The [RAG pipeline](https://neuml.github.io/txtai/pipeline/text/rag/) is txtai's spin on retrieval augmented generation (RAG). This pipeline extracts knowledge from content by joining a prompt, context data store and generative model together.\n", "\n", "The following example shows how a large language model (LLM) can use an embeddings database for context." ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "vWX9Q6Iy0X3Z", "outputId": "82f9f9cc-b7fb-4ee9-cd35-9b00c022f83a" }, "outputs": [ { "data": { "text/plain": [ "{'answer': 'Canada is having issues with climate change.',\n", " 'reference': 'da633124-33ff-58d6-8ecb-14f7a44c042a'}" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from txtai import RAG\n", "\n", "# Create embeddings\n", "embeddings = Embeddings(path=\"sentence-transformers/nli-mpnet-base-v2\", content=True, autoid=\"uuid5\")\n", "\n", "# Create an index for the list of text\n", "embeddings.index(data)\n", "\n", "# RAG Prompt Template\n", "template = \"\"\"\n", " Answer the following question using the provided context.\n", "\n", " Question:\n", " {question}\n", "\n", " Context:\n", " {context}\n", "\"\"\"\n", "\n", "# Create and run RAG instance\n", "rag = RAG(embeddings, \"Qwen/Qwen3-0.6B\", template=template, output=\"reference\")\n", "rag(\"What country is having issues with climate change?\")" ] }, { "cell_type": "markdown", "metadata": { "id": "lqsZreJQuSfO" }, "source": [ "The logic above first builds an embeddings index. It then loads a LLM and uses the embeddings index to drive a LLM prompt.\n", "\n", "The RAG pipeline can optionally return a reference to the id of the best matching record with the answer. That id can be used to resolve the full answer reference. Note that the embeddings above used an [uuid autosequence](https://neuml.github.io/txtai/embeddings/configuration/general/#autoid)." ] }, { "cell_type": "code", "execution_count": 15, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "ioC-gY4wwWVQ", "outputId": "d6eab14a-83cd-434c-faa8-2afe285e842b" }, "outputs": [ { "data": { "text/plain": [ "[{'id': 'da633124-33ff-58d6-8ecb-14f7a44c042a',\n", " 'text': \"Canada's last fully intact ice shelf has suddenly collapsed, forming a Manhattan-sized iceberg\"}]" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "uid = rag(\"What country is having issues with climate change?\")[\"reference\"]\n", "embeddings.search(f\"select id, text from txtai where id = '{uid}'\")" ] }, { "cell_type": "markdown", "metadata": { "id": "fwVMGqV2nHcP" }, "source": [ "LLM inference can also be run standalone." ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 36 }, "id": "NAFMSJO-k8qW", "outputId": "c2b07b49-f50d-4f74-a2fe-17bc699a91f1" }, "outputs": [ { "data": { "text/plain": [ "'One of the most popular and iconic places to visit in Washington, DC is the National Mall.'" ] }, "execution_count": 1, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from txtai import LLM\n", "\n", "# Default LLM is granite-4.0-350m\n", "# Supports any LLM (Hugging Face, llama.cpp, Ollama, vLLM, OpenAI, Claude etc)\n", "# See https://neuml.github.io/txtai/pipeline/llm/llm\n", "llm = LLM()\n", "llm(\"Say the name of 1 place to visit in Washington, DC\")" ] }, { "cell_type": "markdown", "metadata": { "id": "ekRIFk4uuoLN" }, "source": [ "# Language model workflows\n", "\n", "Language model workflows, also known as semantic workflows, connect language models together to build intelligent applications.\n", "\n", "Workflows can run right alongside an embeddings instance, similar to a stored procedure in a relational database. Workflows can be written in either Python or YAML. We'll demonstrate how to write a workflow with YAML." ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "JFzSs_Wa012D", "outputId": "055e4f6d-a324-47ce-e3be-5aae06b28651" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Writing embeddings.yml\n" ] } ], "source": [ "%%writefile embeddings.yml\n", "\n", "# Embeddings instance\n", "writable: true\n", "embeddings:\n", " path: sentence-transformers/nli-mpnet-base-v2\n", " content: true\n", " functions:\n", " - {name: translation, argcount: 2, function: translation}\n", "\n", "# Translation pipeline\n", "translation:\n", "\n", "# Workflow definitions\n", "workflow:\n", " search:\n", " tasks:\n", " - search\n", " - action: translation\n", " args:\n", " target: fr\n", " task: template\n", " template: \"{text}\"" ] }, { "cell_type": "markdown", "metadata": { "id": "2WU0fCZasVNf" }, "source": [ "The workflow above loads an embeddings index and defines a search workflow. The search workflow runs a search and then passes the results to a translation pipeline. The translation pipeline translates results to French." ] }, { "cell_type": "code", "execution_count": 18, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "ySOK3HDK1nOZ", "outputId": "551f425d-8c99-4705-8dc8-7e23458a3ed5" }, "outputs": [ { "data": { "text/plain": [ "['Maine homme gagne $1M à partir de $25 billet de loterie']" ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from txtai import Application\n", "\n", "# Build index\n", "app = Application(\"embeddings.yml\")\n", "app.add(data)\n", "app.index()\n", "\n", "# Run workflow\n", "list(app.workflow(\"search\", [\"select text from txtai where similar('feel good story') limit 1\"]))" ] }, { "cell_type": "markdown", "metadata": { "id": "rhxBBaUO4znH" }, "source": [ "SQL functions, in some cases, can accomplish the same thing as a workflow. The function below runs the translation pipeline as a function." ] }, { "cell_type": "code", "execution_count": 19, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "hJAC430s4yIV", "outputId": "8e0c4d6a-42d4-45e8-e79a-7872639c5512" }, "outputs": [ { "data": { "text/plain": [ "[{'text': 'Maine homme gagne $1M à partir de $25 billet de loterie'}]" ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ "app.search(\"select translation(text, 'fr') text from txtai where similar('feel good story') limit 1\")" ] }, { "cell_type": "markdown", "metadata": { "id": "u5nHeC8MpX7k" }, "source": [ "LLM chains with templates are also possible with workflows. Workflows are self-contained, they operate both with and without an associated embeddings instance. The following workflow uses a LLM to conditionally translate text to French and then detect the language of the text." ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "zfwpSmTLnVU8", "outputId": "154a34ff-2919-415b-a5b7-fb7c9f5b9cf1" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Writing workflow.yml\n" ] } ], "source": [ "%%writefile workflow.yml\n", "\n", "llm:\n", " path: Qwen/Qwen3-4B-Instruct-2507\n", "\n", "workflow:\n", " chain:\n", " tasks:\n", " - task: template\n", " template: Translate text '{statement}' to {language} if the text is English, otherwise keep the original text\n", " action: llm\n", " - task: template\n", " template: What language is the following text. Only print the answer? {text}\n", " action: llm" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "8mo2XSr9nXJH", "outputId": "b9775570-4dd1-486e-f0c8-62f94fdb85b1" }, "outputs": [ { "data": { "text/plain": [ "['French', 'German']" ] }, "execution_count": 21, "metadata": {}, "output_type": "execute_result" } ], "source": [ "inputs = [\n", " {\"statement\": \"Hello, how are you\", \"language\": \"French\"},\n", " {\"statement\": \"Hallo, wie geht's dir\", \"language\": \"French\"}\n", "]\n", "\n", "app = Application(\"workflow.yml\")\n", "list(app.workflow(\"chain\", inputs))" ] }, { "cell_type": "markdown", "metadata": { "id": "aDIF3tYt6X0O" }, "source": [ "# Wrapping up\n", "\n", "AI is advancing at a rapid pace. Things not possible even a year ago are now possible. This notebook introduced txtai, an all-in-one AI framework. The possibilities are limitless and we’re excited to see what can be built on top of txtai!\n", "\n", "Visit the links below for more.\n", "\n", "[GitHub](https://github.com/neuml/txtai) | [Documentation](https://neuml.github.io/txtai) | [Examples](https://neuml.github.io/txtai/examples/)" ] } ], "metadata": { "accelerator": "GPU", "colab": { "gpuType": "T4", "provenance": [] }, "kernelspec": { "display_name": "local", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.19" } }, "nbformat": 4, "nbformat_minor": 0 } ================================================ FILE: examples/02_Build_an_Embeddings_index_with_Hugging_Face_Datasets.ipynb ================================================ { "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "provenance": [] }, "kernelspec": { "name": "python3", "display_name": "Python 3" }, "accelerator": "GPU" }, "cells": [ { "cell_type": "markdown", "metadata": { "id": "LjmhJ4ad9kBL" }, "source": [ "# Build an Embeddings index with Hugging Face Datasets\n", "\n", "This notebook shows how txtai can index and search with Hugging Face's [Datasets](https://github.com/huggingface/datasets) library. Datasets opens access to a large and growing list of publicly available datasets. Datasets has functionality to select, transform and filter data stored in each dataset.\n", "\n", "In this example, txtai will be used to index and query a dataset.\n", "\n", "**Make sure to select a GPU runtime when running this notebook**" ] }, { "cell_type": "markdown", "metadata": { "id": "8tLWvo9v-Q0u" }, "source": [ "# Install dependencies\n", "\n", "Install `txtai` and all dependencies. Also install `datasets`." ] }, { "cell_type": "code", "metadata": { "id": "Fa5BCjMFqVKE" }, "source": [ "%%capture\n", "!pip install git+https://github.com/neuml/txtai\n", "!pip install datasets" ], "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "metadata": { "id": "hOdEv8MH-e5h" }, "source": [ "# Load dataset and build a txtai index\n", "\n", "In this example, we'll load the `ag_news` dataset, which is a collection of news article headlines. This only takes a single line of code!\n", "\n", "Next, txtai will index the first 10,000 rows of the dataset. A sentence similarity model is used to compute sentence embeddings. sentence-transformers has a number of [pre-trained models](https://huggingface.co/models?pipeline_tag=sentence-similarity) that can be swapped in.\n", "\n", "In addition to the embeddings index, we'll also create a Similarity instance to re-rank search hits for relevancy. " ] }, { "cell_type": "code", "metadata": { "id": "3hYRk9JnsM0J" }, "source": [ "%%capture\n", "from datasets import load_dataset\n", "\n", "from txtai.embeddings import Embeddings\n", "from txtai.pipeline import Similarity\n", "\n", "def stream(dataset, field, limit):\n", " index = 0\n", " for row in dataset:\n", " yield (index, row[field], None)\n", " index += 1\n", "\n", " if index >= limit:\n", " break\n", "\n", "def search(query):\n", " return [(result[\"score\"], result[\"text\"]) for result in embeddings.search(query, limit=50)]\n", "\n", "def ranksearch(query):\n", " results = [text for _, text in search(query)]\n", " return [(score, results[x]) for x, score in similarity(query, results)]\n", "\n", "# Load HF dataset\n", "dataset = load_dataset(\"ag_news\", split=\"train\")\n", "\n", "# Create embeddings model, backed by sentence-transformers & transformers, enable content storage\n", "embeddings = Embeddings({\"path\": \"sentence-transformers/paraphrase-MiniLM-L3-v2\", \"content\": True})\n", "embeddings.index(stream(dataset, \"text\", 10000))\n", "\n", "# Create similarity instance for re-ranking\n", "similarity = Similarity(\"valhalla/distilbart-mnli-12-3\")" ], "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "metadata": { "id": "LBhHcX6eFmGI" }, "source": [ "# Search the dataset\n", "\n", "Now that an index is ready, let's search the data! The following section runs a series of queries and show the results. Like basic search engines, txtai finds token matches. But the real power of txtai is finding semantically similar results.\n", "\n", "sentence-transformers has a great overview on [information retrieval](https://www.sbert.net/examples/applications/information-retrieval/README.html) that is well worth a read. " ] }, { "cell_type": "code", "metadata": { "id": "YVmbiY92vxEO", "colab": { "base_uri": "https://localhost:8080/", "height": 1000 }, "outputId": "85f5e0ad-14ba-4642-aed6-13c14a710d68" }, "source": [ "from IPython.core.display import display, HTML\n", "\n", "def table(query, rows):\n", " html = \"\"\"\n", " \n", " \"\"\"\n", "\n", " html += \"

%s

\" % (query)\n", " for score, text in rows:\n", " html += \"\" % (score, text)\n", " html += \"
ScoreText
%.4f%s
\"\n", "\n", " display(HTML(html))\n", "\n", "for query in [\"Positive Apple reports\", \"Negative Apple reports\", \"Best planets to explore for life\", \"LA Dodgers good news\", \"LA Dodgers bad news\"]:\n", " table(query, ranksearch(query)[:2])\n" ], "execution_count": null, "outputs": [ { "output_type": "display_data", "data": { "text/html": [ "\n", " \n", "

Positive Apple reports

ScoreText
0.9941Apple's iPod a Huge Hit in Japan The iPod is proving a colossal hit on the Japanese electronics and entertainment giant's home ground. The tiny white machine is catching on as a fashion statement and turning into a cultural icon here, much the same way it won a fanatical following in the United States.
0.9886Apple tops US consumer satisfaction Recent data published by the American Customer Satisfaction Index (ACSI) shows Apple leading the consumer computer industry with the the highest customer satisfaction.
" ], "text/plain": [ "" ] }, "metadata": {} }, { "output_type": "display_data", "data": { "text/html": [ "\n", " \n", "

Negative Apple reports

ScoreText
0.9847Apple Recalls 28,000 Faulty Batteries Sold with 15-inch PowerBook Apple has had to recall up to 28,000 notebook batteries that were sold for use with their 15-inch PowerBook. Apple reports that faulty batteries sold between January 2004 and August 2004 can overheat and pose a fire hazard.
0.9795Apple Announces Voluntary Recall of Powerbook Batteries Apple, in cooperation with the US Consumer Product Safety Commission (CPSC), announced Thursday a voluntary recall of 15 quot; Aluminum PowerBook batteries. The batteries being recalled could potentially overheat, though no injuries relating ...
" ], "text/plain": [ "" ] }, "metadata": {} }, { "output_type": "display_data", "data": { "text/html": [ "\n", " \n", "

Best planets to explore for life

ScoreText
0.9110Tiny 'David' Telescope Finds 'Goliath' Planet A newfound planet detected by a small, 4-inch-diameter telescope demonstrates that we are at the cusp of a new age of planet discovery. Soon, new worlds may be located at an accelerating pace, bringing the detection of the first Earth-sized world one step closer.
0.8838Venus: Inhabited World? by Harry Bortman In part 1 of this interview with Astrobiology Magazine editor Henry Bortman, planetary scientist David Grinspoon explained how Venus evolved from a wet planet similar to Earth to the scorching hot, dried-out furnace of today. In part 2, Grinspoon discusses the possibility of life on Venus...
" ], "text/plain": [ "" ] }, "metadata": {} }, { "output_type": "display_data", "data": { "text/html": [ "\n", " \n", "

LA Dodgers good news

ScoreText
0.9961Dodgers 7, Braves 4 Los Angeles, Ca. -- Shawn Green belted a grand slam and a solo homer as Los Angeles beat Mike Hampton and the Atlanta Braves 7-to-4 Saturday afternoon.
0.9928MLB: Los Angeles 7, Atlanta 4 Shawn Green hit two home runs Saturday, including a grand slam, to lead the Los Angeles Dodgers to a 7-4 victory over the Atlanta Braves.
" ], "text/plain": [ "" ] }, "metadata": {} }, { "output_type": "display_data", "data": { "text/html": [ "\n", " \n", "

LA Dodgers bad news

ScoreText
0.9880Expos Keep Dodgers at Bay With 8-7 Win (AP) AP - Giovanni Carrara walked Juan Rivera with the bases loaded and two outs in the ninth inning Monday night, spoiling Los Angeles' six-run comeback and handing the Montreal Expos an 8-7 victory over the Dodgers.
0.9671Gagne blows his 2d save Pinch-hitter Lenny Harris delivered a three-run double off Eric Gagne with two outs in the ninth, rallying the Florida Marlins past the Dodgers, 6-4, last night in Los Angeles.
" ], "text/plain": [ "" ] }, "metadata": {} } ] } ] } ================================================ FILE: examples/03_Build_an_Embeddings_index_from_a_data_source.ipynb ================================================ { "cells": [ { "cell_type": "markdown", "metadata": { "id": "WDbhGHtG8jFE" }, "source": [ "# Build an Embeddings index from a data source\n", "\n", "In Part 1, we gave a general overview of txtai, the backing technology and examples of how to use it for similarity searches. Part 2 covered an embedding index with a larger dataset.\n", "\n", "For real world large-scale use cases, data is often stored in a database (Elasticsearch, SQL, MongoDB, files, etc). Here we'll show how to read from SQLite, build an Embedding index and run queries against the generated Embeddings index.\n", "\n", "This example covers functionality found in the [paperai](https://github.com/neuml/paperai) library. See that library for a full solution that can be used with the dataset discussed below." ] }, { "cell_type": "markdown", "metadata": { "id": "UQ0fCwXn9bcH" }, "source": [ "# Install dependencies\n", "\n", "Install `txtai` and all dependencies." ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "czPYSA2Q9ZHO" }, "outputs": [], "source": [ "%%capture\n", "!pip install git+https://github.com/neuml/txtai" ] }, { "cell_type": "markdown", "metadata": { "id": "SN9SCZKQ9fJF" }, "source": [ "# Download data\n", "\n", "This example is going to work off a subset of the [CORD-19](https://www.semanticscholar.org/cord19) dataset. COVID-19 Open Research Dataset (CORD-19) is a free resource of scholarly articles, aggregated by a coalition of leading research groups, covering COVID-19 and the coronavirus family of viruses.\n", "\n", "The following download is a SQLite database generated from a [Kaggle notebook](https://www.kaggle.com/davidmezzetti/cord-19-slim/output). More information on this data format, can be found in the [CORD-19 Analysis](https://www.kaggle.com/davidmezzetti/cord-19-analysis-with-sentence-embeddings) notebook." ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "TONQ4_Kv9dtd" }, "outputs": [], "source": [ "%%capture\n", "!wget https://github.com/neuml/txtai/releases/download/v1.1.0/tests.gz\n", "!gunzip tests.gz\n", "!mv tests articles.sqlite" ] }, { "cell_type": "markdown", "metadata": { "id": "_UxcC1-JGH-d" }, "source": [ "# Build an embeddings index\n", "\n", "The following steps build an embeddings index using a vector model designed for medical papers, [PubMedBERT Embeddings](https://huggingface.co/NeuML/pubmedbert-base-embeddings)." ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "5PrrxGRPGHqX", "outputId": "61bf7211-6757-4147-8f2f-e4d1ebe58e11" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Iterated over 21499 total rows\n" ] } ], "source": [ "import sqlite3\n", "\n", "import regex as re\n", "\n", "from txtai import Embeddings\n", "\n", "def stream():\n", " # Connection to database file\n", " db = sqlite3.connect(\"articles.sqlite\")\n", " cur = db.cursor()\n", "\n", " # Select tagged sentences without a NLP label. NLP labels are set for non-informative sentences.\n", " cur.execute(\"SELECT Id, Name, Text FROM sections WHERE (labels is null or labels NOT IN ('FRAGMENT', 'QUESTION')) AND tags is not null\")\n", "\n", " count = 0\n", " for row in cur:\n", " # Unpack row\n", " uid, name, text = row\n", "\n", " # Only process certain document sections\n", " if not name or not re.search(r\"background|(?\n", " \n", " \n", " Title\n", " Published\n", " Reference\n", " Match\n", " \n", " \n", " \n", " \n", " Management of osteoarthritis during COVID‐19 pandemic\n", " 2020-05-21 00:00:00\n", " https://doi.org/10.1002/cpt.1910\n", " Indeed, risk factors are sex, obesity, genetic factors and mechanical factors (3) .\n", " \n", " \n", " Does apolipoprotein E genotype predict COVID-19 severity?\n", " 2020-04-27 00:00:00\n", " https://doi.org/10.1093/qjmed/hcaa142\n", " Risk factors associated with subsequent death include older age, hypertension, diabetes, ischemic heart disease, obesity and chronic lung disease; however, sometimes there are no obvious risk factors .\n", " \n", " \n", " Prevalence and Impact of Myocardial Injury in Patients Hospitalized with COVID-19 Infection\n", " 2020-04-24 00:00:00\n", " http://medrxiv.org/cgi/content/short/2020.04.20.20072702v1?rss=1\n", " This risk was consistent across patients stratified by history of CVD, risk factors but no CVD, and neither CVD nor risk factors.\n", " \n", " \n", " COVID-19 and associations with frailty and multimorbidity: a prospective analysis of UK Biobank participants\n", " 2020-07-23 00:00:00\n", " https://www.ncbi.nlm.nih.gov/pubmed/32705587/\n", " BACKGROUND: Frailty and multimorbidity have been suggested as risk factors for severe COVID-19 disease.\n", " \n", " \n", " Risk Stratification for Healthcare workers during the CoViD-19 Pandemic; using demographics, co-morbid disease and clinical domain in order to assign clinical duties\n", " 2020-05-09 00:00:00\n", " http://medrxiv.org/cgi/content/short/2020.05.05.20091967v1?rss=1\n", " Vascular disease, diabetes and chronic pulmonary disease further increased risk.\n", " \n", " \n", "" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "import pandas as pd\n", "\n", "from IPython.display import display, HTML\n", "\n", "pd.set_option(\"display.max_colwidth\", None)\n", "\n", "db = sqlite3.connect(\"articles.sqlite\")\n", "cur = db.cursor()\n", "\n", "results = []\n", "for uid, score in embeddings.search(\"risk factors\", 5):\n", " cur.execute(\"SELECT article, text FROM sections WHERE id = ?\", [uid])\n", " uid, text = cur.fetchone()\n", "\n", " cur.execute(\"SELECT Title, Published, Reference from articles where id = ?\", [uid])\n", " results.append(cur.fetchone() + (text,))\n", "\n", "# Free database resources\n", "db.close()\n", "\n", "df = pd.DataFrame(results, columns=[\"Title\", \"Published\", \"Reference\", \"Match\"])\n", "\n", "# It has been reported that displaying HTML within VSCode doesn't work.\n", "# When using VSCode, the data can be exported to an external HTML file to view.\n", "# See example below.\n", "\n", "# htmlData = df.to_html(index=False)\n", "# with open(\"data.html\", \"w\") as file:\n", "# file.write(htmlData)\n", "\n", "display(HTML(df.to_html(index=False)))" ] }, { "cell_type": "markdown", "metadata": { "id": "XSf68I-ZfXOG" }, "source": [ "# Extracting additional columns from query results\n", "\n", "The example above uses the Embeddings index to find the top 5 best matches. In addition to this, an Extractor instance (this will be explained further in part 5) is used to ask additional questions over the search results, creating a richer query response." ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "TLVOTQJchvTi" }, "outputs": [], "source": [ "%%capture\n", "from txtai.pipeline import Extractor\n", "\n", "# Create extractor instance using qa model designed for the CORD-19 dataset\n", "# Note: That extractive QA was a predecessor to Large Language Models (LLMs). LLMs likely will get better results.\n", "extractor = Extractor(embeddings, \"NeuML/bert-small-cord19qa\")" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 293 }, "id": "19fmKawThs6d", "outputId": "b7cd40e3-a87c-419d-f520-b7795607cebc" }, "outputs": [ { "data": { "text/html": [ "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
TitlePublishedReferenceMatchRisk FactorsLocations
Management of osteoarthritis during COVID‐19 pandemic2020-05-21 00:00:00https://doi.org/10.1002/cpt.1910Indeed, risk factors are sex, obesity, genetic factors and mechanical factors (3) .sex, obesity, genetic factors and mechanical factorshospitals and clinics
Does apolipoprotein E genotype predict COVID-19 severity?2020-04-27 00:00:00https://doi.org/10.1093/qjmed/hcaa142Risk factors associated with subsequent death include older age, hypertension, diabetes, ischemic heart disease, obesity and chronic lung disease; however, sometimes there are no obvious risk factors .NoneNone
Prevalence and Impact of Myocardial Injury in Patients Hospitalized with COVID-19 Infection2020-04-24 00:00:00http://medrxiv.org/cgi/content/short/2020.04.20.20072702v1?rss=1This risk was consistent across patients stratified by history of CVD, risk factors but no CVD, and neither CVD nor risk factors.neither CVD nor risk factorsMount Sinai Health System
COVID-19 and associations with frailty and multimorbidity: a prospective analysis of UK Biobank participants2020-07-23 00:00:00https://www.ncbi.nlm.nih.gov/pubmed/32705587/BACKGROUND: Frailty and multimorbidity have been suggested as risk factors for severe COVID-19 disease.Frailty and multimorbidity213 countries and territories
Risk Stratification for Healthcare workers during the CoViD-19 Pandemic; using demographics, co-morbid disease and clinical domain in order to assign clinical duties2020-05-09 00:00:00http://medrxiv.org/cgi/content/short/2020.05.05.20091967v1?rss=1Vascular disease, diabetes and chronic pulmonary disease further increased risk.Vascular disease, diabetes and chronic pulmonary diseaseNone
" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "db = sqlite3.connect(\"articles.sqlite\")\n", "cur = db.cursor()\n", "\n", "results = []\n", "for uid, score in embeddings.search(\"risk factors\", 5):\n", " cur.execute(\"SELECT article, text FROM sections WHERE id = ?\", [uid])\n", " uid, text = cur.fetchone()\n", "\n", " # Get list of document text sections to use for the context\n", " cur.execute(\"SELECT Name, Text FROM sections WHERE (labels is null or labels NOT IN ('FRAGMENT', 'QUESTION')) AND article = ? ORDER BY Id\", [uid])\n", " texts = []\n", " for name, txt in cur.fetchall():\n", " if not name or not re.search(r\"background|(?\n", " @import url('https://fonts.googleapis.com/css?family=Oswald&display=swap');\n", " table {\n", " border-collapse: collapse;\n", " width: 900px;\n", " }\n", " th, td {\n", " border: 1px solid #9e9e9e;\n", " padding: 10px;\n", " font: 15px Oswald;\n", " }\n", " \n", " \"\"\"\n", "\n", " html += \"

[%s] %s

\" % (category, query)\n", " for score, text in rows:\n", " html += \"\" % (score, text)\n", " html += \"
ScoreText
%.4f%s
\"\n", "\n", " display(HTML(html))\n", "\n", "def search(query, limit):\n", " query = {\n", " \"size\": limit,\n", " \"query\": {\n", " \"query_string\": {\"query\": query}\n", " }\n", " }\n", "\n", " results = []\n", " for result in es.search(index=\"articles\", body=query)[\"hits\"][\"hits\"]:\n", " source = result[\"_source\"]\n", " results.append((min(result[\"_score\"], 18) / 18, source[\"title\"]))\n", "\n", " return results\n", "\n", "limit = 3\n", "query= \"+yankees lose\"\n", "table(\"Elasticsearch\", query, search(query, limit))" ], "execution_count": null, "outputs": [ { "output_type": "display_data", "data": { "text/html": [ "\n", " \n", "

[Elasticsearch] +yankees lose

ScoreText
0.5808El Duque adds to gloomy NY forecast The Yankees #39; staff infection has spread to the one man the team can #39;t afford to lose. Orlando Hernandez was scratched from last night #39;s scheduled start because
0.5688Rangers Derail Red Sox The Red Sox lose for the first time in 11 games, falling to the Rangers 8-6 Saturday and missing a chance to pull within 1 1/2 games of the Yankees in the AL East.
0.5061Rout leaves Yanks #39; lead at 3 Royals gain control with 10-run 5th Against a nothing-to-lose team such as the Kansas City Royals, the Yankees #39; manager wanted his team to put down the hammer early and not let baseball #39;s second worst team believe it had a chance.
" ], "text/plain": [ "" ] }, "metadata": {} } ] }, { "cell_type": "markdown", "metadata": { "id": "W1DkpcIob5kt" }, "source": [ "The table above shows the results for the query `+yankees lose`. This query requires the token `yankees`. The search doesn't understand the semantic meaning of the query. It returns the most relevant results with those two tokens.\n", "\n", "We can see in this case, the results aren't capturing the meaning of the search. Let's try adding semantic similarity to the search!" ] }, { "cell_type": "markdown", "metadata": { "id": "hMro47KedzJq" }, "source": [ "# Ranking search results with txtai\n", "\n", "txtai has a similarity module that computes the similarity between a query and a list of strings. Of course, txtai can also build a full index as shown in the previous notebooks but in this case we'll just use the ad-hoc similarity function.\n", "\n", "The code below creates a Similarity instance and defines a ranking function to order search results based on the computed similarity.\n", "\n", "`ranksearch` queries Elasticsearch for a larger set of results, ranks the results using the similarity instance and returns the top n results. " ] }, { "cell_type": "code", "metadata": { "id": "RUOj5zhFFK8N" }, "source": [ "%%capture\n", "from txtai.pipeline import Similarity\n", "\n", "def ranksearch(query, limit):\n", " results = [text for _, text in search(query, limit * 10)]\n", " return [(score, results[x]) for x, score in similarity(query, results)][:limit]\n", "\n", "# Create similarity instance for re-ranking\n", "similarity = Similarity(\"valhalla/distilbart-mnli-12-3\")" ], "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "metadata": { "id": "UMFuv5-Hedfc" }, "source": [ "Now let's re-run the previous search." ] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 334 }, "id": "3jJI9OxU0dZk", "outputId": "3233d128-2bf4-4d53-f851-f9dd8f51629e" }, "source": [ "# Run the search\n", "table(\"Elasticsearch + txtai\", query, ranksearch(query, limit))" ], "execution_count": null, "outputs": [ { "output_type": "display_data", "data": { "text/html": [ "\n", " \n", "

[Elasticsearch + txtai] +yankees lose

ScoreText
0.9929Ouch! Yankees hit new low INDIANS 22, YANKEES 0---At New York, Omar Vizquel went 6-for-7 to tie the American League record for hits as Cleveland handed the Yankees the largest loss in their history last night.
0.9874Vazquez and Yankees Buckle Early Because Javier Vazquez fizzled while Brad Radke flourished, the Yankees sustained their first regular-season defeat by the Minnesota Twins since 2001.
0.9542Slide of the Yankees: Pinstripes Punished George Steinbrenner watched from his box as his Yankees suffered the most one-sided loss in the franchise's long history.
" ], "text/plain": [ "" ] }, "metadata": {} } ] }, { "cell_type": "markdown", "metadata": { "id": "RXB2PaDZfd8o" }, "source": [ "The results above do a much better job of finding results semantically similar in meaning to the query. Instead of just finding matches with `yankees` and `lose`, it finds matches where the `yankees lose`. \n", "\n", "This combination is effective and powerful. It takes advantage of the high performance of Elasticsearch while adding a semantic search capability. We may already have a large Elasticsearch cluster with TBs (or PBs)+ of data and years of engineering investment that solves most use cases. Semantically ranking search results is a practical approach." ] }, { "cell_type": "markdown", "metadata": { "id": "XBVL56fsRI86" }, "source": [ "# More examples\n", "\n", "Now for some more examples comparing the results from Elasticsearch vs Elasticsearch + txtai." ] }, { "cell_type": "code", "metadata": { "id": "7IHS38SERRpQ", "colab": { "base_uri": "https://localhost:8080/", "height": 1000 }, "outputId": "25325d94-ddc9-44f2-f602-d792ffeb0a8c" }, "source": [ "for query in [\"good news +economy\", \"bad news +economy\"]:\n", " table(\"Elasticsearch\", query, search(query, limit))\n", " table(\"Elasticsearch + txtai\", query, ranksearch(query, limit))" ], "execution_count": null, "outputs": [ { "output_type": "display_data", "data": { "text/html": [ "\n", " \n", "

[Elasticsearch] good news +economy

ScoreText
0.8756Surprise drop US wholesale prices is mixed news for economy (AFP) AFP - A surprise drop in US wholesale prices in August showed inflation apparently in check, but analysts said this was good and bad news for the US economy.
0.7379China investment slows Good news for officials who are trying to cool an overheated economy; austerity measures to remain. BEIJING (Reuters) - China reported a marked slowdown in investment and money supply growth Monday, but stubbornly
0.7145Spending Rebounds, Good News for Growth WASHINGTON (Reuters) - U.S. consumer spending rebounded sharply July, government data showed on Monday, erasing the disappointment of June and bolstering hopes that the U.S. economy has recovered from its recent soft spot.
" ], "text/plain": [ "" ] }, "metadata": {} }, { "output_type": "display_data", "data": { "text/html": [ "\n", " \n", "

[Elasticsearch + txtai] good news +economy

ScoreText
0.9996Spending Rebounds, Good News for Growth WASHINGTON (Reuters) - U.S. consumer spending rebounded sharply in July, the government said on Monday, erasing the disappointment of June and bolstering hopes that the U.S. economy has recovered from its recent soft spot.
0.9996Spending Rebounds, Good News for Growth WASHINGTON (Reuters) - U.S. consumer spending rebounded sharply July, government data showed on Monday, erasing the disappointment of June and bolstering hopes that the U.S. economy has recovered from its recent soft spot.
0.9993Home building surges Housing construction in August jumped to its highest level in five months, a dose of encouraging news for the economy #39;s expansion.
" ], "text/plain": [ "" ] }, "metadata": {} }, { "output_type": "display_data", "data": { "text/html": [ "\n", " \n", "

[Elasticsearch] bad news +economy

ScoreText
0.9228Surprise drop US wholesale prices is mixed news for economy (AFP) AFP - A surprise drop in US wholesale prices in August showed inflation apparently in check, but analysts said this was good and bad news for the US economy.
0.6405Field Poll: Californians liking economy Bee Staff Writer. Californians are slowly growing more optimistic about the health of the economy, but a majority still feels the state is in bad economic times, according to a new Field Poll.
0.6188ADB says China should raise rates to cool economy China should raise interest rates to cool the economy and prevent a future buildup of bad loans in the banking system, the Asian Development Bank #39;s (ADB) Bei-jing representative Bruce Murray said.
" ], "text/plain": [ "" ] }, "metadata": {} }, { "output_type": "display_data", "data": { "text/html": [ "\n", " \n", "

[Elasticsearch + txtai] bad news +economy

ScoreText
0.9977Aging society hits Japan #39;s economy Japan #39;s economy will be the most severely affected among industrialized nations by population aging, Kyodo News said Thursday.
0.9963Funds: Fund Mergers Can Hurt Investors (Reuters) Reuters - Mergers and acquisitions have\\played an enormous role in the U.S. economy during the past\\several decades, but sometimes the results have been bad for\\consumers. Similarly, consolidation in the mutual fund\\business has sometimes hurt fund investors.
0.9958Signs of listless economy persist In a sign of persistent weakness in the US economy, a widely watched measure of business activity declined in August for the third consecutive month.
" ], "text/plain": [ "" ] }, "metadata": {} } ] }, { "cell_type": "markdown", "metadata": { "id": "h-Bk3KLjZMpF" }, "source": [ "Once again while Elasticsearch usually returns quality results, occasionally it will match results that aren't semantically relevant. The power of semantic search is that not only will it find direct matches but matches with the same meaning. " ] } ] } ================================================ FILE: examples/05_Extractive_QA_with_txtai.ipynb ================================================ { "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "provenance": [] }, "kernelspec": { "name": "python3", "display_name": "Python 3" } }, "cells": [ { "cell_type": "markdown", "metadata": { "id": "vwELCooy4ljr" }, "source": [ "# Extractive QA with txtai\n", "\n", "In Parts 1 through 4, we gave a general overview of txtai, the backing technology and examples of how to use it for similarity searches. This notebook builds on that and extends to building extractive question-answering systems." ] }, { "cell_type": "markdown", "metadata": { "id": "ew7orE2O441o" }, "source": [ "# Install dependencies\n", "\n", "Install `txtai` and all dependencies." ] }, { "cell_type": "code", "metadata": { "id": "LPQTb25tASIG" }, "source": [ "%%capture\n", "!pip install git+https://github.com/neuml/txtai" ], "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "metadata": { "id": "_YnqorRKAbLu" }, "source": [ "# Create an Embeddings and Extractor instances\n", "\n", "The Embeddings instance is the main entrypoint for txtai. An Embeddings instance defines the method used to tokenize and convert a segment of text into an embeddings vector.\n", "\n", "The Extractor instance is the entrypoint for extractive question-answering.\n", "\n", "Both the Embeddings and Extractor instances take a path to a transformer model. Any model on the [Hugging Face model hub](https://huggingface.co/models) can be used in place of the models below." ] }, { "cell_type": "code", "metadata": { "id": "OUc9gqTyAYnm" }, "source": [ "%%capture\n", "\n", "from txtai.embeddings import Embeddings\n", "from txtai.pipeline import Extractor\n", "\n", "# Create embeddings model, backed by sentence-transformers & transformers\n", "embeddings = Embeddings({\"path\": \"sentence-transformers/nli-mpnet-base-v2\"})\n", "\n", "# Create extractor instance\n", "extractor = Extractor(embeddings, \"distilbert-base-cased-distilled-squad\")" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "4X5z3UjnAGe7", "colab": { "base_uri": "https://localhost:8080/" }, "outputId": "546d4fdd-9468-4130-ee93-fafafd966e8b" }, "source": [ "data = [\"Giants hit 3 HRs to down Dodgers\",\n", " \"Giants 5 Dodgers 4 final\",\n", " \"Dodgers drop Game 2 against the Giants, 5-4\",\n", " \"Blue Jays beat Red Sox final score 2-1\",\n", " \"Red Sox lost to the Blue Jays, 2-1\",\n", " \"Blue Jays at Red Sox is over. Score: 2-1\",\n", " \"Phillies win over the Braves, 5-0\",\n", " \"Phillies 5 Braves 0 final\",\n", " \"Final: Braves lose to the Phillies in the series opener, 5-0\",\n", " \"Lightning goaltender pulled, lose to Flyers 4-1\",\n", " \"Flyers 4 Lightning 1 final\",\n", " \"Flyers win 4-1\"]\n", "\n", "questions = [\"What team won the game?\", \"What was score?\"]\n", "\n", "execute = lambda query: extractor([(question, query, question, False) for question in questions], data)\n", "\n", "for query in [\"Red Sox - Blue Jays\", \"Phillies - Braves\", \"Dodgers - Giants\", \"Flyers - Lightning\"]:\n", " print(\"----\", query, \"----\")\n", " for answer in execute(query):\n", " print(answer)\n", " print()\n", "\n", "# Ad-hoc questions\n", "question = \"What hockey team won?\"\n", "\n", "print(\"----\", question, \"----\")\n", "print(extractor([(question, question, question, False)], data))" ], "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "---- Red Sox - Blue Jays ----\n", "('What team won the game?', 'Blue Jays')\n", "('What was score?', '2-1')\n", "\n", "---- Phillies - Braves ----\n", "('What team won the game?', 'Phillies')\n", "('What was score?', '5-0')\n", "\n", "---- Dodgers - Giants ----\n", "('What team won the game?', 'Giants')\n", "('What was score?', '5-4')\n", "\n", "---- Flyers - Lightning ----\n", "('What team won the game?', 'Flyers')\n", "('What was score?', '4-1')\n", "\n", "---- What hockey team won? ----\n", "[('What hockey team won?', 'Flyers')]\n" ] } ] } ] } ================================================ FILE: examples/06_Extractive_QA_with_Elasticsearch.ipynb ================================================ { "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "provenance": [] }, "kernelspec": { "name": "python3", "display_name": "Python 3" } }, "cells": [ { "cell_type": "markdown", "metadata": { "id": "zzZbP0LM6m5z" }, "source": [ "# Extractive QA with Elasticsearch\n", "\n", "txtai is datastore agnostic, the library analyzes sets of text. The following example shows how extractive question-answering can be added on top of an Elasticsearch system." ] }, { "cell_type": "markdown", "metadata": { "id": "xk7t5Jcd6reO" }, "source": [ "# Install dependencies\n", "\n", "Install `txtai` and `Elasticsearch`." ] }, { "cell_type": "code", "metadata": { "id": "0y1UA4-q-YdA" }, "source": [ "%%capture\n", "\n", "# Install txtai and elasticsearch python client\n", "!pip install git+https://github.com/neuml/txtai elasticsearch\n", "\n", "# Download and extract elasticsearch\n", "!wget https://artifacts.elastic.co/downloads/elasticsearch/elasticsearch-7.10.1-linux-x86_64.tar.gz\n", "!tar -xzf elasticsearch-7.10.1-linux-x86_64.tar.gz\n", "!chown -R daemon:daemon elasticsearch-7.10.1" ], "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "metadata": { "id": "nKWz-C5gCJy8" }, "source": [ "Start an instance of Elasticsearch directly within this notebook. " ] }, { "cell_type": "code", "metadata": { "id": "3ZfJeWbM6wmj" }, "source": [ "import os\n", "from subprocess import Popen, PIPE, STDOUT\n", "\n", "# If issues are encountered with this section, ES can be manually started as follows:\n", "# ./elasticsearch-7.10.1/bin/elasticsearch\n", "\n", "# Start and wait for server\n", "server = Popen(['elasticsearch-7.10.1/bin/elasticsearch'], stdout=PIPE, stderr=STDOUT, preexec_fn=lambda: os.setuid(1))\n", "!sleep 30" ], "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "metadata": { "id": "TWEn4w68-D1y" }, "source": [ "# Download data\n", "\n", "This example is going to work off a subset of the [CORD-19](https://www.semanticscholar.org/cord19) dataset. COVID-19 Open Research Dataset (CORD-19) is a free resource of scholarly articles, aggregated by a coalition of leading research groups, covering COVID-19 and the coronavirus family of viruses.\n", "\n", "The following download is a SQLite database generated from a [Kaggle notebook](https://www.kaggle.com/davidmezzetti/cord-19-slim/output). More information on this data format, can be found in the [CORD-19 Analysis](https://www.kaggle.com/davidmezzetti/cord-19-analysis-with-sentence-embeddings) notebook." ] }, { "cell_type": "code", "metadata": { "id": "8tVrIqSq-KBa" }, "source": [ "%%capture\n", "!wget https://github.com/neuml/txtai/releases/download/v1.1.0/tests.gz\n", "!gunzip tests.gz\n", "!mv tests articles.sqlite" ], "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "metadata": { "id": "hSWFzkCn61tM" }, "source": [ "# Load data into Elasticsearch\n", "\n", "The following block copies rows from SQLite to Elasticsearch." ] }, { "cell_type": "code", "metadata": { "id": "So-OBvUT61QD", "colab": { "base_uri": "https://localhost:8080/" }, "outputId": "9647b8f8-8471-41bf-ccfa-a75306665638" }, "source": [ "import sqlite3\n", "\n", "import regex as re\n", "\n", "from elasticsearch import Elasticsearch, helpers\n", "\n", "# Connect to ES instance\n", "es = Elasticsearch(hosts=[\"http://localhost:9200\"], timeout=60, retry_on_timeout=True)\n", "\n", "# Connection to database file\n", "db = sqlite3.connect(\"articles.sqlite\")\n", "cur = db.cursor()\n", "\n", "# Elasticsearch bulk buffer\n", "buffer = []\n", "rows = 0\n", "\n", "# Select tagged sentences without a NLP label. NLP labels are set for non-informative sentences.\n", "cur.execute(\"SELECT s.Id, Article, Title, Published, Reference, Name, Text FROM sections s JOIN articles a on s.article=a.id WHERE (s.labels is null or s.labels NOT IN ('FRAGMENT', 'QUESTION')) AND s.tags is not null\")\n", "for row in cur:\n", " # Build dict of name-value pairs for fields\n", " article = dict(zip((\"id\", \"article\", \"title\", \"published\", \"reference\", \"name\", \"text\"), row))\n", " name = article[\"name\"]\n", "\n", " # Only process certain document sections\n", " if not name or not re.search(r\"background|(?\n", " \n", " \n", " Title\n", " Published\n", " Reference\n", " Match\n", " \n", " \n", " \n", " \n", " Prevalence and Impact of Myocardial Injury in Patients Hospitalized with COVID-19 Infection\n", " 2020-04-24 00:00:00\n", " http://medrxiv.org/cgi/content/short/2020.04.20.20072702v1?rss=1\n", " This risk was consistent across patients stratified by history of CVD, risk factors but no CVD, and neither CVD nor risk factors.\n", " \n", " \n", " Does apolipoprotein E genotype predict COVID-19 severity?\n", " 2020-04-27 00:00:00\n", " https://doi.org/10.1093/qjmed/hcaa142\n", " Risk factors associated with subsequent death include older age, hypertension, diabetes, ischemic heart disease, obesity and chronic lung disease; however, sometimes there are no obvious risk factors .\n", " \n", " \n", " COVID-19 and associations with frailty and multimorbidity: a prospective analysis of UK Biobank participants\n", " 2020-07-23 00:00:00\n", " https://www.ncbi.nlm.nih.gov/pubmed/32705587/\n", " BACKGROUND: Frailty and multimorbidity have been suggested as risk factors for severe COVID-19 disease.\n", " \n", " \n", " COVID-19: what has been learned and to be learned about the novel coronavirus disease\n", " 2020-03-15 00:00:00\n", " https://doi.org/10.7150/ijbs.45134\n", " • Three major risk factors for COVID-19 were sex (male), age (≥60), and severe pneumonia.\n", " \n", " \n", " Associations with covid-19 hospitalisation amongst 406,793 adults: the UK Biobank prospective cohort study\n", " 2020-05-11 00:00:00\n", " http://medrxiv.org/cgi/content/short/2020.05.06.20092957v1?rss=1\n", " In addition, many risk factors for covid-19 documented in the literature are highly correlated and it is not clear which may be independently related to risk.\n", " \n", " \n", "" ], "text/plain": [ "" ] }, "metadata": {} } ] }, { "cell_type": "markdown", "metadata": { "id": "ylxOKji1-9_K" }, "source": [ "# Derive columns with Extractive QA\n", "\n", "The next section uses Extractive QA to derive additional columns. For each article, the full text is retrieved and a series of questions are asked of the document. The answers are added as a derived column per article." ] }, { "cell_type": "code", "metadata": { "id": "mwBTrCkcOM_H" }, "source": [ "%%capture\n", "from txtai.embeddings import Embeddings\n", "from txtai.pipeline import Extractor\n", "\n", "# Create embeddings model, backed by sentence-transformers & transformers\n", "embeddings = Embeddings({\"path\": \"sentence-transformers/nli-mpnet-base-v2\"})\n", "\n", "# Create extractor instance using qa model designed for the CORD-19 dataset\n", "extractor = Extractor(embeddings, \"NeuML/bert-small-cord19qa\")" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "Yv75Lh-cOpL9", "colab": { "base_uri": "https://localhost:8080/", "height": 400 }, "outputId": "adee88e1-02bf-4a20-febb-6d2c170a63f9" }, "source": [ "document = {\n", " \"_source\": [\"id\", \"name\", \"text\"],\n", " \"size\": 1000,\n", " \"query\": {\n", " \"term\": {\"article\": None}\n", " },\n", " \"sort\" : [\"id\"]\n", "}\n", "\n", "def sections(article):\n", " rows = []\n", "\n", " search = document.copy()\n", " search[\"query\"][\"term\"][\"article\"] = article\n", "\n", " for result in es.search(index=\"articles\", body=search)[\"hits\"][\"hits\"]:\n", " source = result[\"_source\"]\n", " name, text = source[\"name\"], source[\"text\"]\n", "\n", " if not name or not re.search(r\"background|(?\n", " \n", " \n", " Title\n", " Published\n", " Reference\n", " Match\n", " Risk Factors\n", " Locations\n", " \n", " \n", " \n", " \n", " Management of osteoarthritis during COVID‐19 pandemic\n", " 2020-05-21 00:00:00\n", " https://doi.org/10.1002/cpt.1910\n", " Indeed, risk factors are sex, obesity, genetic factors and mechanical factors (3) .\n", " sex, obesity, genetic factors and mechanical factors\n", " None\n", " \n", " \n", " Prevalence and Impact of Myocardial Injury in Patients Hospitalized with COVID-19 Infection\n", " 2020-04-24 00:00:00\n", " http://medrxiv.org/cgi/content/short/2020.04.20.20072702v1?rss=1\n", " This risk was consistent across patients stratified by history of CVD, risk factors but no CVD, and neither CVD nor risk factors.\n", " None\n", " Abbott, Abbott Park, Illinois\n", " \n", " \n", " Does apolipoprotein E genotype predict COVID-19 severity?\n", " 2020-04-27 00:00:00\n", " https://doi.org/10.1093/qjmed/hcaa142\n", " Risk factors associated with subsequent death include older age, hypertension, diabetes, ischemic heart disease, obesity and chronic lung disease; however, sometimes there are no obvious risk factors .\n", " None\n", " None\n", " \n", " \n", " COVID-19 and associations with frailty and multimorbidity: a prospective analysis of UK Biobank participants\n", " 2020-07-23 00:00:00\n", " https://www.ncbi.nlm.nih.gov/pubmed/32705587/\n", " BACKGROUND: Frailty and multimorbidity have been suggested as risk factors for severe COVID-19 disease.\n", " Frailty and multimorbidity\n", " comorbidity groupings and the corresponding health conditions\n", " \n", " \n", " COVID-19: what has been learned and to be learned about the novel coronavirus disease\n", " 2020-03-15 00:00:00\n", " https://doi.org/10.7150/ijbs.45134\n", " • Three major risk factors for COVID-19 were sex (male), age (≥60), and severe pneumonia.\n", " Mandatory contact tracing and quarantine\n", " cities, provinces, and countries\n", " \n", " \n", "" ], "text/plain": [ "" ] }, "metadata": {} } ] } ] } ================================================ FILE: examples/07_Apply_labels_with_zero_shot_classification.ipynb ================================================ { "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "provenance": [] }, "kernelspec": { "name": "python3", "display_name": "Python 3" } }, "cells": [ { "cell_type": "markdown", "metadata": { "id": "4Pjmz-RORV8E" }, "source": [ "# Apply labels with zero-shot classification\n", "\n", "This notebook shows how zero-shot classification can be used to perform text classification, labeling and topic modeling. txtai provides a light-weight wrapper around the zero-shot-classification pipeline in Hugging Face Transformers. This method works impressively well out of the box. Kudos to the Hugging Face team for the phenomenal work on zero-shot classification!\n", "\n", "The examples in this notebook pick the best matching label using a list of labels for a snippet of text.\n", "\n", "[tldrstory](https://github.com/neuml/tldrstory) has full-stack implementation of a zero-shot classification system using Streamlit, FastAPI and Hugging Face Transformers. There is also a [Medium article describing tldrstory](https://towardsdatascience.com/tldrstory-ai-powered-understanding-of-headlines-and-story-text-fc86abd702fc) and zero-shot classification. \n" ] }, { "cell_type": "markdown", "metadata": { "id": "Dk31rbYjSTYm" }, "source": [ "# Install dependencies\n", "\n", "Install `txtai` and all dependencies." ] }, { "cell_type": "code", "metadata": { "id": "XMQuuun2R06J" }, "source": [ "%%capture\n", "!pip install git+https://github.com/neuml/txtai" ], "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "metadata": { "id": "PNPJ95cdTKSS" }, "source": [ "# Create a Labels instance\n", "\n", "The Labels instance is the main entrypoint for zero-shot classification. This is a light-weight wrapper around the zero-shot-classification pipeline in Hugging Face Transformers.\n", "\n", "In addition to the default model, additional models can be found on the [Hugging Face model hub](https://huggingface.co/models?search=mnli).\n" ] }, { "cell_type": "code", "metadata": { "id": "nTDwXOUeTH2-" }, "source": [ "%%capture\n", "\n", "from txtai.pipeline import Labels\n", "\n", "# Create labels model\n", "labels = Labels()\n", "\n", "# Alternate models can be used via passing the model path as shown below\n", "# labels = Labels(\"roberta-large-mnli\")" ], "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "metadata": { "id": "-vGR_piwZZO6" }, "source": [ "# Applying labels to text\n", "\n", "The example below shows how a zero-shot classifier can be applied to arbitary text. The default model for the zero-shot classification pipeline is *bart-large-mnli*. \n", "\n", "Look at the results below. It's nothing short of amazing✨ how well it performs. These aren't all simple even for a human. For example, intercepted was purposely picked as that is more common in football than basketball. The amount of knowledge stored in larger Transformer models continues to impress me. " ] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "-K2YJJzsVtfq", "outputId": "7a1edf58-15e0-46c8-958e-3a8e6045f802" }, "source": [ "data = [\"Dodgers lose again, give up 3 HRs in a loss to the Giants\",\n", " \"Giants 5 Cardinals 4 final in extra innings\",\n", " \"Dodgers drop Game 2 against the Giants, 5-4\",\n", " \"Flyers 4 Lightning 1 final. 45 saves for the Lightning.\",\n", " \"Slashing, penalty, 2 minute power play coming up\",\n", " \"What a stick save!\",\n", " \"Leads the NFL in sacks with 9.5\",\n", " \"UCF 38 Temple 13\",\n", " \"With the 30 yard completion, down to the 10 yard line\",\n", " \"Drains the 3pt shot!!, 0:15 remaining in the game\",\n", " \"Intercepted! Drives down the court and shoots for the win\",\n", " \"Massive dunk!!! they are now up by 15 with 2 minutes to go\"]\n", "\n", "# List of labels\n", "tags = [\"Baseball\", \"Football\", \"Hockey\", \"Basketball\"]\n", "\n", "print(\"%-75s %s\" % (\"Text\", \"Label\"))\n", "print(\"-\" * 100)\n", "\n", "for text in data:\n", " print(\"%-75s %s\" % (text, tags[labels(text, tags)[0][0]]))" ], "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Text Label\n", "----------------------------------------------------------------------------------------------------\n", "Dodgers lose again, give up 3 HRs in a loss to the Giants Baseball\n", "Giants 5 Cardinals 4 final in extra innings Baseball\n", "Dodgers drop Game 2 against the Giants, 5-4 Baseball\n", "Flyers 4 Lightning 1 final. 45 saves for the Lightning. Hockey\n", "Slashing, penalty, 2 minute power play coming up Hockey\n", "What a stick save! Hockey\n", "Leads the NFL in sacks with 9.5 Football\n", "UCF 38 Temple 13 Football\n", "With the 30 yard completion, down to the 10 yard line Football\n", "Drains the 3pt shot!!, 0:15 remaining in the game Basketball\n", "Intercepted! Drives down the court and shoots for the win Basketball\n", "Massive dunk!!! they are now up by 15 with 2 minutes to go Basketball\n" ] } ] }, { "cell_type": "markdown", "metadata": { "id": "t-tGAzCxsHLy" }, "source": [ "# Let's try emoji 😀\n", "\n", "Does the model have knowledge of emoji? Check out the run below, sure looks like it does! Notice the labels are applied based on the perspective from which the information is presented. " ] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "uIf064M9pbjn", "outputId": "1d104014-e9ca-4c89-d259-2b5b231840ad" }, "source": [ "tags = [\"😀\", \"😡\"]\n", "\n", "print(\"%-75s %s\" % (\"Text\", \"Label\"))\n", "print(\"-\" * 100)\n", "\n", "for text in data:\n", " print(\"%-75s %s\" % (text, tags[labels(text, tags)[0][0]]))" ], "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Text Label\n", "----------------------------------------------------------------------------------------------------\n", "Dodgers lose again, give up 3 HRs in a loss to the Giants 😡\n", "Giants 5 Cardinals 4 final in extra innings 😀\n", "Dodgers drop Game 2 against the Giants, 5-4 😡\n", "Flyers 4 Lightning 1 final. 45 saves for the Lightning. 😀\n", "Slashing, penalty, 2 minute power play coming up 😡\n", "What a stick save! 😀\n", "Leads the NFL in sacks with 9.5 😀\n", "UCF 38 Temple 13 😀\n", "With the 30 yard completion, down to the 10 yard line 😀\n", "Drains the 3pt shot!!, 0:15 remaining in the game 😀\n", "Intercepted! Drives down the court and shoots for the win 😀\n", "Massive dunk!!! they are now up by 15 with 2 minutes to go 😀\n" ] } ] } ] } ================================================ FILE: examples/08_API_Gallery.ipynb ================================================ { "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "provenance": [] }, "kernelspec": { "display_name": "Python 3", "name": "python3" } }, "cells": [ { "cell_type": "markdown", "metadata": { "id": "4Pjmz-RORV8E" }, "source": [ "# API Gallery\n", "\n", "The txtai API is a web-based service backed by [FastAPI](https://fastapi.tiangolo.com/). All txtai functionality including similarity search, extractive QA and zero-shot labeling is available via the API.\n", "\n", "This notebook installs the txtai API and shows an example using each of the supported language bindings for txtai." ] }, { "cell_type": "markdown", "metadata": { "id": "Dk31rbYjSTYm" }, "source": [ "# Install dependencies\n", "\n", "Install `txtai` and all dependencies. Since this notebook uses the API, we need to install the api extras package." ] }, { "cell_type": "code", "metadata": { "id": "XMQuuun2R06J" }, "source": [ "%%capture\n", "!pip install git+https://github.com/neuml/txtai#egg=txtai[api]" ], "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "metadata": { "id": "-vGR_piwZZO6" }, "source": [ "# Python\n", "\n", "The first method we'll try is direct access via Python. We'll use zero-shot labeling for all the examples here. See [this notebook](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/07_Apply_labels_with_zero_shot_classification.ipynb) for more details on zero-shot classification. " ] }, { "cell_type": "markdown", "metadata": { "id": "P4q72tkRMMkR" }, "source": [ "## Configure Labels instance" ] }, { "cell_type": "code", "metadata": { "id": "8Dy_TJ0iM38Q" }, "source": [ "%%capture\n", "import os\n", "from IPython.core.display import display, HTML\n", "from txtai.pipeline import Labels\n", "\n", "def table(rows):\n", " html = \"\"\"\n", " \n", " \"\"\"\n", "\n", " html += \"\"\n", " for text, label in rows:\n", " html += \"\" % (text, label)\n", " html += \"
TextLabel
%s%s
\"\n", "\n", " display(HTML(html))\n", "\n", "# Create labels model\n", "labels = Labels()" ], "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "metadata": { "id": "L4B73tGkMT6Q" }, "source": [ "## Apply labels to text" ] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 324 }, "id": "-K2YJJzsVtfq", "outputId": "65782fd8-51fb-4531-8e8b-f28bca678fa0" }, "source": [ "data = [\"Wears a red suit and says ho ho\",\n", " \"Pulls a flying sleigh\",\n", " \"This is cut down and decorated\",\n", " \"Santa puts these under the tree\",\n", " \"Best way to spend the holidays\"]\n", "\n", "# List of labels\n", "tags = [\"🎅 Santa Clause\", \"🦌 Reindeer\", \"🍪 Cookies\", \"🎄 Christmas Tree\", \"🎁 Gifts\", \"👪 Family\"]\n", "\n", "# Render output to table\n", "table([(text, tags[labels(text, tags)[0][0]]) for text in data])" ], "execution_count": null, "outputs": [ { "output_type": "display_data", "data": { "text/html": [ "\n", " \n", "
TextLabel
Wears a red suit and says ho ho🎅 Santa Clause
Pulls a flying sleigh🦌 Reindeer
This is cut down and decorated🎄 Christmas Tree
Santa puts these under the tree🎁 Gifts
Best way to spend the holidays👪 Family
" ], "text/plain": [ "" ] }, "metadata": {} } ] }, { "cell_type": "markdown", "metadata": { "id": "UF_bImkLHTMs" }, "source": [ "Once again we see the power of zero-shot labeling. The model wasn't trained on any data specific to this example. Still amazed with how much knowledge is stored in large NLP models." ] }, { "cell_type": "markdown", "metadata": { "id": "PNPJ95cdTKSS" }, "source": [ "# Start an API instance\n", "\n", "Now we'll start an API instance to run the remaining examples. The API needs a configuration file to run. The example below is simplified to only include labeling. See [this link](https://github.com/neuml/txtai#api) for a more detailed configuration example.\n", "\n", "The API instance is started in the background.\n" ] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "nTDwXOUeTH2-", "outputId": "2220a3c9-1cff-4c2f-b21e-13dd2d7cb816" }, "source": [ "%%writefile index.yml\n", "\n", "# Labels settings\n", "labels:" ], "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Writing index.yml\n" ] } ] }, { "cell_type": "code", "metadata": { "id": "nGITHxUyRzyp" }, "source": [ "!CONFIG=index.yml nohup uvicorn \"txtai.api:app\" &> api.log &\n", "!sleep 90" ], "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "metadata": { "id": "NHvBFZeSd9AG" }, "source": [ "# JavaScript\n", "\n", "txtai.js is available via NPM and can be installed as follows.\n", "\n", "```bash\n", "npm install txtai\n", "```\n", "\n", "For this example, we'll clone the txtai.js project to import the example build configuration." ] }, { "cell_type": "code", "metadata": { "id": "b52knObEdcCr" }, "source": [ "%%capture\n", "!git clone https://github.com/neuml/txtai.js" ], "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "metadata": { "id": "rUGS0t-JMsS9" }, "source": [ "## Create labels.js\n", "\n", "The following file is a JavaScript version of the labels example." ] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "zJbKRTSJV-kd", "outputId": "6c111b5d-6e55-4dac-c6c2-0988c2a834da" }, "source": [ "%%writefile txtai.js/examples/node/src/labels.js\n", "import {Labels} from \"txtai\";\n", "import {sprintf} from \"sprintf-js\";\n", "\n", "const run = async () => {\n", " try {\n", " let labels = new Labels(\"http://localhost:8000\");\n", "\n", " let data = [\"Wears a red suit and says ho ho\",\n", " \"Pulls a flying sleigh\",\n", " \"This is cut down and decorated\",\n", " \"Santa puts these under the tree\",\n", " \"Best way to spend the holidays\"];\n", "\n", " // List of labels\n", " let tags = [\"🎅 Santa Clause\", \"🦌 Reindeer\", \"🍪 Cookies\", \"🎄 Christmas Tree\", \"🎁 Gifts\", \"👪 Family\"];\n", "\n", " console.log(sprintf(\"%-40s %s\", \"Text\", \"Label\"));\n", " console.log(\"-\".repeat(75))\n", "\n", " for (let text of data) {\n", " let label = await labels.label(text, tags);\n", " label = tags[label[0].id];\n", "\n", " console.log(sprintf(\"%-40s %s\", text, label));\n", " }\n", " }\n", " catch (e) {\n", " console.trace(e);\n", " }\n", "};\n", "\n", "run();\n" ], "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Overwriting txtai.js/examples/node/src/labels.js\n" ] } ] }, { "cell_type": "markdown", "metadata": { "id": "nTBs11j-GtD-" }, "source": [ "## Build and run labels example\n", "\n", "\n", "\n" ] }, { "cell_type": "code", "metadata": { "id": "kC5Oub6wa1nK" }, "source": [ "%%capture\n", "os.chdir(\"txtai.js/examples/node\")\n", "!npm install\n", "!npm run build" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "ckOHNqyaeL-B", "outputId": "6d8e745c-52d1-4456-fc46-2ff8fda2e675" }, "source": [ "!node dist/labels.js" ], "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Text Label\n", "---------------------------------------------------------------------------\n", "Wears a red suit and says ho ho 🎅 Santa Clause\n", "Pulls a flying sleigh 🦌 Reindeer\n", "This is cut down and decorated 🎄 Christmas Tree\n", "Santa puts these under the tree 🎁 Gifts\n", "Best way to spend the holidays 👪 Family\n" ] } ] }, { "cell_type": "markdown", "metadata": { "id": "1yukBIMYG5OE" }, "source": [ "The JavaScript program is showing the same results as when natively running through Python!" ] }, { "cell_type": "markdown", "metadata": { "id": "nNiMgvg0p2BG" }, "source": [ "# Java\n", "\n", "txtai.java integrates with standard Java build tools (Gradle, Maven, SBT). The following shows how to add txtai as a dependency to Gradle.\n", "\n", "```gradle\n", "implementation 'com.github.neuml:txtai.java:v4.0.0'\n", "```\n", "\n", "For this example, we'll clone the txtai.java project to import the example build configuration." ] }, { "cell_type": "code", "metadata": { "id": "qs2ai8lhqmga" }, "source": [ "%%capture\n", "os.chdir(\"/content\")\n", "!git clone https://github.com/neuml/txtai.java" ], "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "metadata": { "id": "o8QFvzXkNFgq" }, "source": [ "## Create LabelsDemo.java\n", "\n", "The following file is a Java version of the labels example." ] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "v73L8Gw0p6fh", "outputId": "a7f797f2-a91f-4033-89c7-4baf76204d93" }, "source": [ "%%writefile txtai.java/examples/src/main/java/LabelsDemo.java\n", "import java.util.Arrays;\n", "import java.util.ArrayList;\n", "import java.util.List;\n", "\n", "import txtai.API.IndexResult;\n", "import txtai.Labels;\n", "\n", "public class LabelsDemo {\n", " public static void main(String[] args) {\n", " try {\n", " Labels labels = new Labels(\"http://localhost:8000\");\n", "\n", " List data = \n", " Arrays.asList(\"Wears a red suit and says ho ho\",\n", " \"Pulls a flying sleigh\",\n", " \"This is cut down and decorated\",\n", " \"Santa puts these under the tree\",\n", " \"Best way to spend the holidays\");\n", "\n", " // List of labels\n", " List tags = Arrays.asList(\"🎅 Santa Clause\", \"🦌 Reindeer\", \"🍪 Cookies\", \"🎄 Christmas Tree\", \"🎁 Gifts\", \"👪 Family\");\n", "\n", " System.out.printf(\"%-40s %s%n\", \"Text\", \"Label\");\n", " System.out.println(new String(new char[75]).replace(\"\\0\", \"-\"));\n", "\n", " for (String text: data) {\n", " List label = labels.label(text, tags);\n", " System.out.printf(\"%-40s %s%n\", text, tags.get(label.get(0).id));\n", " }\n", " }\n", " catch (Exception ex) {\n", " ex.printStackTrace();\n", " }\n", " }\n", "}\n" ], "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Overwriting txtai.java/examples/src/main/java/LabelsDemo.java\n" ] } ] }, { "cell_type": "markdown", "metadata": { "id": "wZv7eMIOLnRC" }, "source": [ "## Build and run labels example" ] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "N2Mm3Gl5sH1z", "outputId": "b5249daf-e5a1-4b71-b64c-2b3c6748e846" }, "source": [ "os.chdir(\"txtai.java/examples\")\n", "!../gradlew -q --console=plain labels 2> /dev/null" ], "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Text Label\n", "---------------------------------------------------------------------------\n", "Wears a red suit and says ho ho 🎅 Santa Clause\n", "Pulls a flying sleigh 🦌 Reindeer\n", "This is cut down and decorated 🎄 Christmas Tree\n", "Santa puts these under the tree 🎁 Gifts\n", "Best way to spend the holidays 👪 Family\n", "\u001b[m" ] } ] }, { "cell_type": "markdown", "metadata": { "id": "iHpQvUAgNp7j" }, "source": [ "The Java program is showing the same results as when natively running through Python!" ] }, { "cell_type": "markdown", "metadata": { "id": "zU6jK2UL7D5H" }, "source": [ "# Rust\n", "\n", "txtai.rs is available via crates.io and can be installed by adding the following to your cargo.toml file.\n", "\n", "```toml\n", "[dependencies]\n", "txtai = { version = \"4.0\" }\n", "tokio = { version = \"0.2\", features = [\"full\"] }\n", "```\n", "\n", "For this example, we'll clone the txtai.rs project to import the example build configuration. First we need to install Rust." ] }, { "cell_type": "code", "metadata": { "id": "Ob4aswkx7jRh" }, "source": [ "%%capture\n", "os.chdir(\"/content\")\n", "!apt-get install rustc\n", "!git clone https://github.com/neuml/txtai.rs" ], "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "metadata": { "id": "evEQQXBuObZn" }, "source": [ "## Create labels.rs\n", "\n", "The following file is a Rust version of the labels example." ] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "jjggKnKQ7jQO", "outputId": "76a2b1d9-2889-47b0-a3af-5d71a763bb0b" }, "source": [ "%%writefile txtai.rs/examples/demo/src/labels.rs\n", "use std::error::Error;\n", "\n", "use txtai::labels::Labels;\n", "\n", "pub async fn labels() -> Result<(), Box> {\n", " let labels = Labels::new(\"http://localhost:8000\");\n", "\n", " let data = [\"Wears a red suit and says ho ho\",\n", " \"Pulls a flying sleigh\",\n", " \"This is cut down and decorated\",\n", " \"Santa puts these under the tree\",\n", " \"Best way to spend the holidays\"];\n", "\n", " println!(\"{:<40} {}\", \"Text\", \"Label\");\n", " println!(\"{}\", \"-\".repeat(75));\n", "\n", " for text in data.iter() {\n", " let tags = vec![\"🎅 Santa Clause\", \"🦌 Reindeer\", \"🍪 Cookies\", \"🎄 Christmas Tree\", \"🎁 Gifts\", \"👪 Family\"];\n", " let label = labels.label(text, &tags).await?[0].id;\n", "\n", " println!(\"{:<40} {}\", text, tags[label]);\n", " }\n", "\n", " Ok(())\n", "}" ], "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Overwriting txtai.rs/examples/demo/src/labels.rs\n" ] } ] }, { "cell_type": "markdown", "metadata": { "id": "gFFPZO8sQZC4" }, "source": [ "## Build and run labels example\n", "\n", "\n", "\n" ] }, { "cell_type": "code", "metadata": { "id": "wuoAidGz9T4g" }, "source": [ "%%capture\n", "os.chdir(\"txtai.rs/examples/demo\")\n", "!cargo build" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "-_v_FbL0-yPk", "outputId": "821333f5-5f90-4f89-c2eb-673c2e14e4fe" }, "source": [ "!cargo run labels" ], "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "\u001b[0m\u001b[0m\u001b[1m\u001b[32m Finished\u001b[0m dev [unoptimized + debuginfo] target(s) in 0.07s\n", "\u001b[0m\u001b[0m\u001b[1m\u001b[32m Running\u001b[0m `target/debug/demo labels`\n", "Text Label\n", "---------------------------------------------------------------------------\n", "Wears a red suit and says ho ho 🎅 Santa Clause\n", "Pulls a flying sleigh 🦌 Reindeer\n", "This is cut down and decorated 🎄 Christmas Tree\n", "Santa puts these under the tree 🎁 Gifts\n", "Best way to spend the holidays 👪 Family\n" ] } ] }, { "cell_type": "markdown", "metadata": { "id": "kDmS89TPS3kb" }, "source": [ "The Rust program is showing the same results as when natively running through Python!" ] }, { "cell_type": "markdown", "metadata": { "id": "ezznN4I8_CCQ" }, "source": [ "# Go\n", "\n", "txtai.go can be installed by adding the following import statement. When using modules, txtai.go will automatically be installed. Otherwise use `go get`.\n", "\n", "```golang\n", "import \"github.com/neuml/txtai.go\"\n", "```\n", "\n", "For this example, we'll create a standalone process for labeling. First we need to install Go." ] }, { "cell_type": "code", "metadata": { "id": "b-b6fhLQ_DpQ" }, "source": [ "%%capture\n", "os.chdir(\"/content\")\n", "!apt install golang-go\n", "!go get \"github.com/neuml/txtai.go\"" ], "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "metadata": { "id": "Dw-I6jOGR6vA" }, "source": [ "## Create labels.go\n", "\n", "The following file is a Go version of the labels example." ] }, { "cell_type": "code", "metadata": { "id": "bLBJwkN4ANpi", "colab": { "base_uri": "https://localhost:8080/" }, "outputId": "883ea7b2-2fbc-471c-e0bb-59ef5172a6a4" }, "source": [ "%%writefile labels.go\n", "package main\n", "\n", "import (\n", "\t\"fmt\"\n", "\t\"strings\"\n", "\t\"github.com/neuml/txtai.go\"\n", ")\n", "\n", "func main() {\n", "\tlabels := txtai.Labels(\"http://localhost:8000\")\n", "\n", "\tdata := []string{\"Wears a red suit and says ho ho\",\n", " \"Pulls a flying sleigh\",\n", " \"This is cut down and decorated\",\n", " \"Santa puts these under the tree\",\n", " \"Best way to spend the holidays\"}\n", "\n", "\t// List of labels\n", "\ttags := []string{\"🎅 Santa Clause\", \"🦌 Reindeer\", \"🍪 Cookies\", \"🎄 Christmas Tree\", \"🎁 Gifts\", \"👪 Family\"}\n", "\n", "\tfmt.Printf(\"%-40s %s\\n\", \"Text\", \"Label\")\n", "\tfmt.Println(strings.Repeat(\"-\", 75))\n", "\n", "\tfor _, text := range data {\n", "\t\tlabel := labels.Label(text, tags)\n", "\t\tfmt.Printf(\"%-40s %s\\n\", text, tags[label[0].Id])\n", "\t}\n", "}" ], "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Writing labels.go\n" ] } ] }, { "cell_type": "markdown", "metadata": { "id": "PJ2XzzDbSeZh" }, "source": [ "## Build and run labels example\n" ] }, { "cell_type": "code", "metadata": { "id": "l1xnUbtdAy0p", "colab": { "base_uri": "https://localhost:8080/" }, "outputId": "5bc6015c-5c9c-4d8a-daf7-6897ec6cbd80" }, "source": [ "!go run labels.go" ], "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Text Label\n", "---------------------------------------------------------------------------\n", "Wears a red suit and says ho ho 🎅 Santa Clause\n", "Pulls a flying sleigh 🦌 Reindeer\n", "This is cut down and decorated 🎄 Christmas Tree\n", "Santa puts these under the tree 🎁 Gifts\n", "Best way to spend the holidays 👪 Family\n" ] } ] }, { "cell_type": "markdown", "metadata": { "id": "oml43X5eS6YB" }, "source": [ "The Go program is showing the same results as when natively running through Python!" ] } ] } ================================================ FILE: examples/09_Building_abstractive_text_summaries.ipynb ================================================ { "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "provenance": [] }, "kernelspec": { "name": "python3", "display_name": "Python 3" } }, "cells": [ { "cell_type": "markdown", "metadata": { "id": "4Pjmz-RORV8E" }, "source": [ "# Building abstractive text summaries\n", "\n", "In the field of text summarization, there are two primary categories of summarization, extractive and abstractive summarization.\n", "\n", "Extractive summarization takes subsections of the text and joins them together to form a summary. This is commonly backed by graph algorithms like TextRank to find the sections/sentences with the most commonality. These summaries can be highly effective but they are unable to transform text and don't have a contextual understanding.\n", "\n", "Abstractive summarization uses Natural Language Processing (NLP) models to build transformative summaries of text. This is similar to having a human read an article and asking what was it about. A human wouldn't just give a verbose reading of the text. This notebook shows how blocks of text can be summarized using an abstractive summarization pipeline. " ] }, { "cell_type": "markdown", "metadata": { "id": "Dk31rbYjSTYm" }, "source": [ "# Install dependencies\n", "\n", "Install `txtai` and all dependencies. Since this notebook is using optional pipelines, we need to install the pipeline extras package." ] }, { "cell_type": "code", "metadata": { "id": "XMQuuun2R06J" }, "source": [ "%%capture\n", "!pip install git+https://github.com/neuml/txtai#egg=txtai[pipeline]" ], "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "metadata": { "id": "PNPJ95cdTKSS" }, "source": [ "# Create a Summary instance\n", "\n", "The Summary instance is the main entrypoint for text summarization. This is a light-weight wrapper around the summarization pipeline in Hugging Face Transformers.\n", "\n", "In addition to the default model, additional models can be found on the [Hugging Face model hub](https://huggingface.co/models?pipeline_tag=summarization).\n" ] }, { "cell_type": "code", "metadata": { "id": "nTDwXOUeTH2-" }, "source": [ "%%capture\n", "\n", "from txtai.pipeline import Summary\n", "\n", "# Create summary model\n", "summary = Summary()" ], "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "metadata": { "id": "-vGR_piwZZO6" }, "source": [ "# Summarize text\n", "\n", "The example below shows how a large block of text can be distilled down into a smaller summary." ] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 36 }, "id": "-K2YJJzsVtfq", "outputId": "cdf54f20-72ad-4f65-bc17-100e32e6cc71" }, "source": [ "text = (\"Search is the base of many applications. Once data starts to pile up, users want to be able to find it. It’s the foundation \"\n", " \"of the internet and an ever-growing challenge that is never solved or done. The field of Natural Language Processing (NLP) is \"\n", " \"rapidly evolving with a number of new developments. Large-scale general language models are an exciting new capability \"\n", " \"allowing us to add amazing functionality quickly with limited compute and people. Innovation continues with new models \"\n", " \"and advancements coming in at what seems a weekly basis. This article introduces txtai, an AI-powered search engine \"\n", " \"that enables Natural Language Understanding (NLU) based search in any application.\"\n", ")\n", "\n", "summary(text, maxlength=10)" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "application/vnd.google.colaboratory.intrinsic+json": { "type": "string" }, "text/plain": [ "'Search is the foundation of the internet'" ] }, "metadata": {}, "execution_count": 3 } ] }, { "cell_type": "markdown", "metadata": { "id": "n2jndgE-JyWX" }, "source": [ "Notice how the summarizer built a sentence using parts of the document above. It takes a basic understanding of language in order to understand the first two sentences and how to combine them into a single transformative sentence." ] }, { "cell_type": "markdown", "metadata": { "id": "27PneZxQx7NR" }, "source": [ "# Summarize a document\n", "\n", "The next section retrieves an article, extracts text from it (more to come on this topic) and summarizes that text." ] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 53 }, "id": "idPThgJGvIju", "outputId": "7d0580e6-2531-48c9-a32a-481ccf32900d" }, "source": [ "!wget -q \"https://medium.com/neuml/time-lapse-video-for-the-web-a7d8874ff397\"\n", "\n", "from txtai.pipeline import Textractor\n", "\n", "textractor = Textractor()\n", "text = textractor(\"time-lapse-video-for-the-web-a7d8874ff397\")\n", "\n", "summary(text)" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "application/vnd.google.colaboratory.intrinsic+json": { "type": "string" }, "text/plain": [ "'Time-lapse video is a popular way to show an area or event over a long period of time. The same concept can be applied to a dynamic real-time website with frequently updated data. webelapse is an open source project developed to provide this functionality. It can be used as is or modified for different use cases.'" ] }, "metadata": {}, "execution_count": 4 } ] }, { "cell_type": "markdown", "metadata": { "id": "a63k89aDyKTW" }, "source": [ "Click through the link to see the full article. This summary does a pretty good job of covering what the article is about!" ] } ] } ================================================ FILE: examples/10_Extract_text_from_documents.ipynb ================================================ { "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "provenance": [] }, "kernelspec": { "name": "python3", "display_name": "Python 3" } }, "cells": [ { "cell_type": "markdown", "metadata": { "id": "4Pjmz-RORV8E" }, "source": [ "# Extract text from documents\n", "\n", "Up to this point, all the examples have been working with sections of text, which have already been split through some other means. What happens if we're working with documents? First we need to get the text out of these documents, then figure out how to index to best support vector search.\n", "\n", "This notebook shows how documents can have text extracted and split to support vector search and retrieval augmented generation (RAG)." ] }, { "cell_type": "markdown", "metadata": { "id": "Dk31rbYjSTYm" }, "source": [ "# Install dependencies\n", "\n", "Install `txtai` and all dependencies. Since this notebook is using optional pipelines, we need to install the pipeline extras package." ] }, { "cell_type": "code", "metadata": { "id": "XMQuuun2R06J" }, "source": [ "%%capture\n", "!pip install git+https://github.com/neuml/txtai#egg=txtai[pipeline]\n", "\n", "# Get test data\n", "!wget -N https://github.com/neuml/txtai/releases/download/v6.2.0/tests.tar.gz\n", "!tar -xvzf tests.tar.gz\n", "\n", "# Install NLTK\n", "import nltk\n", "nltk.download(['punkt', 'punkt_tab'])" ], "execution_count": 19, "outputs": [] }, { "cell_type": "markdown", "metadata": { "id": "PNPJ95cdTKSS" }, "source": [ "# Create a Textractor instance\n", "\n", "The Textractor instance is the main entrypoint for extracting text. This method is backed by Apache Tika, a robust text extraction library written in Java. [Apache Tika](https://tika.apache.org/0.9/formats.html) has support for a large number of file formats: PDF, Word, Excel, HTML and others. The [Python Tika package](https://github.com/chrismattmann/tika-python) automatically installs Tika and starts a local REST API instance used to read extracted data.\n", "\n", "*Note: This requires Java to be installed locally.*" ] }, { "cell_type": "code", "metadata": { "id": "nTDwXOUeTH2-" }, "source": [ "%%capture\n", "\n", "from txtai.pipeline import Textractor\n", "\n", "# Create textractor model\n", "textractor = Textractor()" ], "execution_count": 20, "outputs": [] }, { "cell_type": "markdown", "metadata": { "id": "-vGR_piwZZO6" }, "source": [ "# Extract text\n", "\n", "The example below shows how to extract text from a file." ] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 118 }, "id": "-K2YJJzsVtfq", "outputId": "7754c508-264a-41fa-9843-83460719820f" }, "source": [ "textractor(\"txtai/article.pdf\")" ], "execution_count": 21, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "'Introducing txtai, an AI-powered search engine \\nbuilt on Transformers\\n\\nAdd Natural Language Understanding to any application\\n\\nSearch is the base of many applications. Once data starts to pile up, users want to be able to find it. It’s \\nthe foundation of the internet and an ever-growing challenge that is never solved or done.\\n\\nThe field of Natural Language Processing (NLP) is rapidly evolving with a number of new \\ndevelopments. Large-scale general language models are an exciting new capability allowing us to add \\namazing functionality quickly with limited compute and people. Innovation continues with new models\\nand advancements coming in at what seems a weekly basis.\\n\\nThis article introduces txtai, an AI-powered search engine that enables Natural Language \\nUnderstanding (NLU) based search in any application.\\n\\nIntroducing txtai\\ntxtai builds an AI-powered index over sections of text. txtai supports building text indices to perform \\nsimilarity searches and create extractive question-answering based systems. txtai also has functionality \\nfor zero-shot classification. txtai is open source and available on GitHub.\\n\\ntxtai and/or the concepts behind it has already been used to power the Natural Language Processing \\n(NLP) applications listed below:\\n\\n• paperai — AI-powered literature discovery and review engine for medical/scientific papers\\n• tldrstory — AI-powered understanding of headlines and story text\\n• neuspo — Fact-driven, real-time sports event and news site\\n• codequestion — Ask coding questions directly from the terminal\\n\\nBuild an Embeddings index\\nFor small lists of texts, the method above works. But for larger repositories of documents, it doesn’t \\nmake sense to tokenize and convert all embeddings for each query. txtai supports building pre-\\ncomputed indices which significantly improves performance.\\n\\nBuilding on the previous example, the following example runs an index method to build and store the \\ntext embeddings. In this case, only the query is converted to an embeddings vector each search.\\n\\nhttps://github.com/neuml/codequestion\\nhttps://neuspo.com/\\nhttps://github.com/neuml/tldrstory\\nhttps://github.com/neuml/paperai\\n - Introducing txtai, an AI-powered search engine built on Transformers\\n - Add Natural Language Understanding to any application\\n - Introducing txtai\\n - Build an Embeddings index'" ], "application/vnd.google.colaboratory.intrinsic+json": { "type": "string" } }, "metadata": {}, "execution_count": 21 } ] }, { "cell_type": "markdown", "metadata": { "id": "n2jndgE-JyWX" }, "source": [ "Note that the text from the article was extracted into a single string. Depending on the articles, this may be acceptable. For long articles, often you'll want to split the content into logical sections to build better downstream vectors." ] }, { "cell_type": "markdown", "metadata": { "id": "1w2bhBCPOUdu" }, "source": [ "# Extract sentences\n", "\n", "Sentence extraction uses a model that specializes in sentence detection. This call returns a list of sentences." ] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "PKZVK5vuOTqB", "outputId": "a31e182e-037b-4e29-c1b0-0f6815e3b2c9" }, "source": [ "textractor = Textractor(sentences=True)\n", "textractor(\"txtai/article.pdf\")" ], "execution_count": 22, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "['Introducing txtai, an AI-powered search engine \\nbuilt on Transformers\\n\\nAdd Natural Language Understanding to any application\\n\\nSearch is the base of many applications.',\n", " 'Once data starts to pile up, users want to be able to find it.',\n", " 'It’s \\nthe foundation of the internet and an ever-growing challenge that is never solved or done.',\n", " 'The field of Natural Language Processing (NLP) is rapidly evolving with a number of new \\ndevelopments.',\n", " 'Large-scale general language models are an exciting new capability allowing us to add \\namazing functionality quickly with limited compute and people.',\n", " 'Innovation continues with new models\\nand advancements coming in at what seems a weekly basis.',\n", " 'This article introduces txtai, an AI-powered search engine that enables Natural Language \\nUnderstanding (NLU) based search in any application.',\n", " 'Introducing txtai\\ntxtai builds an AI-powered index over sections of text.',\n", " 'txtai supports building text indices to perform \\nsimilarity searches and create extractive question-answering based systems.',\n", " 'txtai also has functionality \\nfor zero-shot classification.',\n", " 'txtai is open source and available on GitHub.',\n", " 'txtai and/or the concepts behind it has already been used to power the Natural Language Processing \\n(NLP) applications listed below:\\n\\n• paperai — AI-powered literature discovery and review engine for medical/scientific papers\\n• tldrstory — AI-powered understanding of headlines and story text\\n• neuspo — Fact-driven, real-time sports event and news site\\n• codequestion — Ask coding questions directly from the terminal\\n\\nBuild an Embeddings index\\nFor small lists of texts, the method above works.',\n", " 'But for larger repositories of documents, it doesn’t \\nmake sense to tokenize and convert all embeddings for each query.',\n", " 'txtai supports building pre-\\ncomputed indices which significantly improves performance.',\n", " 'Building on the previous example, the following example runs an index method to build and store the \\ntext embeddings.',\n", " 'In this case, only the query is converted to an embeddings vector each search.',\n", " 'https://github.com/neuml/codequestion\\nhttps://neuspo.com/\\nhttps://github.com/neuml/tldrstory\\nhttps://github.com/neuml/paperai\\n - Introducing txtai, an AI-powered search engine built on Transformers\\n - Add Natural Language Understanding to any application\\n - Introducing txtai\\n - Build an Embeddings index']" ] }, "metadata": {}, "execution_count": 22 } ] }, { "cell_type": "markdown", "metadata": { "id": "vdVCCc9UOv5S" }, "source": [ "Now the document is split up at the sentence level. These sentences can be feed to a workflow that adds each sentence to an embeddings index. Depending on the task, this may work well. Alternatively, it may be even better to split at the paragraph level." ] }, { "cell_type": "markdown", "metadata": { "id": "z1H8XYkaSoP4" }, "source": [ "# Extract paragraphs\n", "\n", "Paragraph detection looks for consecutive newlines. This call returns a list of paragraphs." ] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "9VUito4ISoAe", "outputId": "08079380-a7c8-4886-ecc3-de9f02be4584" }, "source": [ "textractor = Textractor(paragraphs=True)\n", "for paragraph in textractor(\"txtai/article.pdf\"):\n", " print(paragraph, \"\\n----\")" ], "execution_count": 23, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Introducing txtai, an AI-powered search engine \n", "built on Transformers \n", "----\n", "Add Natural Language Understanding to any application \n", "----\n", "Search is the base of many applications. Once data starts to pile up, users want to be able to find it. It’s \n", "the foundation of the internet and an ever-growing challenge that is never solved or done. \n", "----\n", "The field of Natural Language Processing (NLP) is rapidly evolving with a number of new \n", "developments. Large-scale general language models are an exciting new capability allowing us to add \n", "amazing functionality quickly with limited compute and people. Innovation continues with new models\n", "and advancements coming in at what seems a weekly basis. \n", "----\n", "This article introduces txtai, an AI-powered search engine that enables Natural Language \n", "Understanding (NLU) based search in any application. \n", "----\n", "Introducing txtai\n", "txtai builds an AI-powered index over sections of text. txtai supports building text indices to perform \n", "similarity searches and create extractive question-answering based systems. txtai also has functionality \n", "for zero-shot classification. txtai is open source and available on GitHub. \n", "----\n", "txtai and/or the concepts behind it has already been used to power the Natural Language Processing \n", "(NLP) applications listed below: \n", "----\n", "• paperai — AI-powered literature discovery and review engine for medical/scientific papers\n", "• tldrstory — AI-powered understanding of headlines and story text\n", "• neuspo — Fact-driven, real-time sports event and news site\n", "• codequestion — Ask coding questions directly from the terminal \n", "----\n", "Build an Embeddings index\n", "For small lists of texts, the method above works. But for larger repositories of documents, it doesn’t \n", "make sense to tokenize and convert all embeddings for each query. txtai supports building pre-\n", "computed indices which significantly improves performance. \n", "----\n", "Building on the previous example, the following example runs an index method to build and store the \n", "text embeddings. In this case, only the query is converted to an embeddings vector each search. \n", "----\n", "https://github.com/neuml/codequestion\n", "https://neuspo.com/\n", "https://github.com/neuml/tldrstory\n", "https://github.com/neuml/paperai\n", " - Introducing txtai, an AI-powered search engine built on Transformers\n", " - Add Natural Language Understanding to any application\n", " - Introducing txtai\n", " - Build an Embeddings index \n", "----\n" ] } ] }, { "cell_type": "markdown", "source": [ "# Extract sections\n", "\n", "Section extraction is format dependent. If page breaks are available, each section is a page. Otherwise, this call returns logical sections such by headings." ], "metadata": { "id": "Ae6dRQ2LvN-w" } }, { "cell_type": "code", "source": [ "textractor = Textractor(sections=True)\n", "print(\"\\n[PAGE BREAK]\\n\".join(section for section in textractor(\"txtai/article.pdf\")))" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "nQ6ev2UMwqnh", "outputId": "3d45491d-3547-4218-d30c-ba0c4d161256" }, "execution_count": 24, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Introducing txtai, an AI-powered search engine \n", "built on Transformers\n", "\n", "Add Natural Language Understanding to any application\n", "\n", "Search is the base of many applications. Once data starts to pile up, users want to be able to find it. It’s \n", "the foundation of the internet and an ever-growing challenge that is never solved or done.\n", "\n", "The field of Natural Language Processing (NLP) is rapidly evolving with a number of new \n", "developments. Large-scale general language models are an exciting new capability allowing us to add \n", "amazing functionality quickly with limited compute and people. Innovation continues with new models\n", "and advancements coming in at what seems a weekly basis.\n", "\n", "This article introduces txtai, an AI-powered search engine that enables Natural Language \n", "Understanding (NLU) based search in any application.\n", "\n", "Introducing txtai\n", "txtai builds an AI-powered index over sections of text. txtai supports building text indices to perform \n", "similarity searches and create extractive question-answering based systems. txtai also has functionality \n", "for zero-shot classification. txtai is open source and available on GitHub.\n", "\n", "txtai and/or the concepts behind it has already been used to power the Natural Language Processing \n", "(NLP) applications listed below:\n", "\n", "• paperai — AI-powered literature discovery and review engine for medical/scientific papers\n", "• tldrstory — AI-powered understanding of headlines and story text\n", "• neuspo — Fact-driven, real-time sports event and news site\n", "• codequestion — Ask coding questions directly from the terminal\n", "\n", "Build an Embeddings index\n", "For small lists of texts, the method above works. But for larger repositories of documents, it doesn’t \n", "make sense to tokenize and convert all embeddings for each query. txtai supports building pre-\n", "computed indices which significantly improves performance.\n", "\n", "Building on the previous example, the following example runs an index method to build and store the \n", "text embeddings. In this case, only the query is converted to an embeddings vector each search.\n", "\n", "https://github.com/neuml/codequestion\n", "https://neuspo.com/\n", "https://github.com/neuml/tldrstory\n", "https://github.com/neuml/paperai\n", "[PAGE BREAK]\n", "- Introducing txtai, an AI-powered search engine built on Transformers\n", " - Add Natural Language Understanding to any application\n", " - Introducing txtai\n", " - Build an Embeddings index\n" ] } ] } ] } ================================================ FILE: examples/11_Transcribe_audio_to_text.ipynb ================================================ { "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "provenance": [] }, "kernelspec": { "name": "python3", "display_name": "Python 3" } }, "cells": [ { "cell_type": "markdown", "metadata": { "id": "4Pjmz-RORV8E" }, "source": [ "# Transcribe audio to text\n", "\n", "This notebook covers the transcription of audio files to text using models provided by Hugging Face." ] }, { "cell_type": "markdown", "metadata": { "id": "Dk31rbYjSTYm" }, "source": [ "# Install dependencies\n", "\n", "Install `txtai` and all dependencies. Since this notebook is using optional pipelines, we need to install the pipeline extras package. We'll also demonstrate running this pipeline through the API." ] }, { "cell_type": "code", "metadata": { "id": "XMQuuun2R06J" }, "source": [ "%%capture\n", "!pip install git+https://github.com/neuml/txtai#egg=txtai[api,pipeline]\n", "\n", "# Get test data\n", "!wget -N https://github.com/neuml/txtai/releases/download/v3.5.0/tests.tar.gz\n", "!tar -xvzf tests.tar.gz" ], "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "metadata": { "id": "PNPJ95cdTKSS" }, "source": [ "# Create a Transcription instance\n", "\n", "The Transcription instance is the main entrypoint for transcribing audio to text. The pipeline abstracts transcribing audio into a one line call! \n", "\n", "The pipeline executes logic to read audio files into memory, run the data through a machine learning model and output the results to text.\n", "\n" ] }, { "cell_type": "code", "metadata": { "id": "nTDwXOUeTH2-" }, "source": [ "%%capture\n", "\n", "from txtai.pipeline import Transcription\n", "\n", "# Create transcription model\n", "transcribe = Transcription()" ], "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "metadata": { "id": "-vGR_piwZZO6" }, "source": [ "# Transcribe audio to text\n", "\n", "The example below shows how to transcribe a list of audio files to text. Let's transcribe audio to text and look at each result." ] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 497 }, "id": "-K2YJJzsVtfq", "outputId": "7f2fe352-de55-428e-f15a-28f389498961" }, "source": [ "from IPython.display import Audio, display\n", "\n", "files = [\"Beijing_mobilises.wav\", \"Canadas_last_fully.wav\", \"Maine_man_wins_1_mil.wav\", \"Make_huge_profits.wav\", \"The_National_Park.wav\", \"US_tops_5_million.wav\"]\n", "files = [\"txtai/%s\" % x for x in files]\n", "\n", "for x, text in enumerate(transcribe(files)):\n", " display(Audio(files[x]))\n", " print(text)\n", " print()\n" ], "execution_count": null, "outputs": [ { "output_type": "display_data", "data": { "text/plain": [ "" ], "text/html": [ "\n", " \n", " " ] }, "metadata": {} }, { "output_type": "stream", "name": "stdout", "text": [ "Baging mobilizes invasion kraft along coast as tie one tensions escalates\n", "\n" ] }, { "output_type": "display_data", "data": { "text/plain": [ "" ], "text/html": [ "\n", " \n", " " ] }, "metadata": {} }, { "output_type": "stream", "name": "stdout", "text": [ "Canodas last fully intact ice shelf has suddenly collapsed forming a manhattan sized iceberge\n", "\n" ] }, { "output_type": "display_data", "data": { "text/plain": [ "" ], "text/html": [ "\n", " \n", " " ] }, "metadata": {} }, { "output_type": "stream", "name": "stdout", "text": [ "Main man wins from lottery ticket\n", "\n" ] }, { "output_type": "display_data", "data": { "text/plain": [ "" ], "text/html": [ "\n", " \n", " " ] }, "metadata": {} }, { "output_type": "stream", "name": "stdout", "text": [ "Make huge profits without working make up to one hundred thousand dollars a day\n", "\n" ] }, { "output_type": "display_data", "data": { "text/plain": [ "" ], "text/html": [ "\n", " \n", " " ] }, "metadata": {} }, { "output_type": "stream", "name": "stdout", "text": [ "National park service warns against sacrificing slower friends in a bare attack\n", "\n" ] }, { "output_type": "display_data", "data": { "text/plain": [ "" ], "text/html": [ "\n", " \n", " " ] }, "metadata": {} }, { "output_type": "stream", "name": "stdout", "text": [ "Ues virus cases top a million\n", "\n" ] } ] }, { "cell_type": "markdown", "metadata": { "id": "Xn3SlVE1LYvm" }, "source": [ "Overall, the results are solid. Each result sounds phonetically like the audio." ] }, { "cell_type": "markdown", "source": [ "# OpenAI Whisper\n", "\n", "In September 2022, [OpenAI Whisper](https://github.com/openai/whisper) was released. This model brings a dramatic improvement in transcription quality. Whisper support was added to Hugging Face Transformers in v4.23.0. Let's give it a try." ], "metadata": { "id": "bDxW-tsCELob" } }, { "cell_type": "code", "source": [ "# Transcribe files\n", "transcribe = Transcription(\"openai/whisper-base\")\n", "for text in transcribe(files):\n", " print(text)" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "-KgYwAQzFVll", "outputId": "7b1e9541-1a6f-4814-ae14-21f4bd9794e7" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Beijing mobilizes invasion craft along coast as Taiwan tensions escalate.\n", "Canada's last fully intact ice shelf has suddenly collapsed, forming a Manhattan sized iceberg.\n", "Maine Man wins from lottery ticket.\n", "make huge profits without working. Make up to $100,000 a day.\n", "National Park Service warns against sacrificing slower friends in a bear attack.\n", "U.S. virus cases top of million.\n" ] } ] }, { "cell_type": "markdown", "source": [ "Results were transcribed with near perfect accuracy, amazing!\n", "\n", "This can also be run as a txtai application or API instance. Let's try a full indexing workflow with a txtai application." ], "metadata": { "id": "fgMr3B2_IoeN" } }, { "cell_type": "code", "source": [ "%%writefile workflow.yml\n", "writable: true\n", "\n", "embeddings:\n", " path: sentence-transformers/nli-mpnet-base-v2\n", " content: true\n", "\n", "transcription:\n", " path: openai/whisper-base\n", "\n", "workflow:\n", " index:\n", " tasks:\n", " - transcription\n", " - index" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "k3Rb2OU9Mq3h", "outputId": "5e40ecd5-3df1-4864-c7a3-3e26bd087415" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Overwriting workflow.yml\n" ] } ] }, { "cell_type": "code", "source": [ "from txtai.app import Application\n", "\n", "app = Application(\"workflow.yml\")\n", "\n", "list(app.workflow(\"index\", files))\n", "app.search(\"feel good story\", 1)" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "cj49U095IxTc", "outputId": "a55b8717-f5fc-4e8f-ef1b-5fecc3f8200a" }, "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "[{'id': '2',\n", " 'text': 'Maine Man wins from lottery ticket.',\n", " 'score': 0.1285860687494278}]" ] }, "metadata": {}, "execution_count": 26 } ] }, { "cell_type": "markdown", "source": [ "This workflow transcribed the input files, loaded the transcriptions into an embeddings index and finally ran a search. Last thing we'll do is run the workflow as an API instance." ], "metadata": { "id": "k6ptCiEyR8p2" } }, { "cell_type": "code", "source": [ "!CONFIG=workflow.yml uvicorn \"txtai.api:app\" &> api.log &\n", "!sleep 30\n", "\n", "# Run indexing workflow\n", "!curl -s -o /dev/null \\\n", " -X POST \"http://localhost:8000/workflow\" \\\n", " -H \"Content-Type: application/json\" \\\n", " -d '{\"name\":\"index\", \"elements\":[\"txtai/Beijing_mobilises.wav\", \"txtai/Canadas_last_fully.wav\", \"txtai/Maine_man_wins_1_mil.wav\", \"txtai/Make_huge_profits.wav\", \"txtai/The_National_Park.wav\", \"txtai/US_tops_5_million.wav\"]}'\n", "\n", "# Test API search\n", "!curl \"http://localhost:8000/search?query=feel+good+story&limit=1\"" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "XDpqwf8PNHNo", "outputId": "7ec28122-69a3-47cc-d2ed-acaa186e4aa1" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "[{\"id\":\"2\",\"text\":\"Maine Man wins from lottery ticket.\",\"score\":0.1285860687494278}]" ] } ] }, { "cell_type": "markdown", "source": [ "Once again, the same results as in Python and with an application." ], "metadata": { "id": "jfdVtwbBIN4Q" } }, { "cell_type": "markdown", "source": [ "# Wrapping up\n", "\n", "There is a lot of development in the audio transcription space. In only a couple of lines of code, high-quality transcription models are now readily available!" ], "metadata": { "id": "VCU8zGGDXQ0Y" } } ] } ================================================ FILE: examples/12_Translate_text_between_languages.ipynb ================================================ { "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "provenance": [] }, "kernelspec": { "display_name": "Python 3", "name": "python3" } }, "cells": [ { "cell_type": "markdown", "metadata": { "id": "4Pjmz-RORV8E" }, "source": [ "# Translate text between languages\n", "\n", "This notebook covers machine translation backed by Hugging Face models. The quality of machine translation via cloud services has come a very long way and produces high quality results. This notebook shows how the models from Hugging Face give developers a reasonable alternative for local machine translation." ] }, { "cell_type": "markdown", "metadata": { "id": "Dk31rbYjSTYm" }, "source": [ "# Install dependencies\n", "\n", "Install `txtai` and all dependencies. Since this notebook is using optional pipelines, we need to install the pipeline extras package." ] }, { "cell_type": "code", "metadata": { "id": "XMQuuun2R06J" }, "source": [ "%%capture\n", "!pip install git+https://github.com/neuml/txtai#egg=txtai[pipeline]" ], "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "metadata": { "id": "PNPJ95cdTKSS" }, "source": [ "# Create a Translation instance\n", "\n", "The Translation instance is the main entrypoint for translating text between languages. The pipeline abstracts translating text into a one line call! \n", "\n", "The pipeline has logic to detect the input language, load the relevant model that handles translating from source to target language and return results. The translation pipeline also has built-in logic to handle splitting large text blocks into smaller sections the models can handle.\n", "\n" ] }, { "cell_type": "code", "metadata": { "id": "nTDwXOUeTH2-" }, "source": [ "%%capture\n", "\n", "from txtai.pipeline import Translation\n", "\n", "# Create translation model\n", "translate = Translation()" ], "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "metadata": { "id": "-vGR_piwZZO6" }, "source": [ "# Translate text\n", "\n", "The example below shows how to translate text from English to Spanish. This text is then translated back to English." ] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 36 }, "id": "-K2YJJzsVtfq", "outputId": "44df5404-ea14-4746-fc8b-a2e205bd9466" }, "source": [ "translation = translate(\"This is a test translation into Spanish\", \"es\")\n", "translation" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "'Esta es una traducción de prueba al español'" ], "application/vnd.google.colaboratory.intrinsic+json": { "type": "string" } }, "metadata": {}, "execution_count": 16 } ] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 36 }, "id": "K_UnAZQpetM8", "outputId": "46c9c68c-ddcf-4f55-bac3-89f25931e91b" }, "source": [ "translate(translation, \"en\")" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "'This is a test translation into Spanish'" ], "application/vnd.google.colaboratory.intrinsic+json": { "type": "string" } }, "metadata": {}, "execution_count": 17 } ] }, { "cell_type": "markdown", "metadata": { "id": "4cSI8GdtjhEM" }, "source": [ "# Translating multiple languages in a single call\n", "\n", "The section below translates a single English sentence into 5 different languages. The results are then passed to a single translation call to translate back into English. The pipeline detects each input language and is able to load the relevant translation models." ] }, { "cell_type": "code", "metadata": { "id": "8jLxGtwNf0Aj", "colab": { "base_uri": "https://localhost:8080/" }, "outputId": "47040b2f-f6e5-482a-df81-7b758f47a7d5" }, "source": [ "def run():\n", " languages = [\"fr\", \"es\", \"de\", \"hi\", \"ja\"]\n", " translations = [translate(\"The sky is blue, the stars are far\", language) for language in languages]\n", " english = translate(translations, \"en\")\n", "\n", " for x, text in enumerate(translations):\n", " print(\"Original Language: %s\" % languages[x])\n", " print(\"Translation: %s\" % text)\n", " print(\"Back to English: %s\" % english[x])\n", " print()\n", "\n", "# Run multiple translations\n", "run()" ], "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Original Language: fr\n", "Translation: Le ciel est bleu, les étoiles sont loin\n", "Back to English: The sky is blue, the stars are far away\n", "\n", "Original Language: es\n", "Translation: El cielo es azul, las estrellas están lejos.\n", "Back to English: The sky is blue, the stars are far away.\n", "\n", "Original Language: de\n", "Translation: Der Himmel ist blau, die Sterne sind weit\n", "Back to English: The sky is blue, the stars are wide\n", "\n", "Original Language: hi\n", "Translation: आकाश नीला है, तारे दूर हैं\n", "Back to English: Sky is blue, stars are away\n", "\n", "Original Language: ja\n", "Translation: 天は青い、星は遠い。\n", "Back to English: The heavens are blue and the stars are far away.\n", "\n" ] } ] }, { "cell_type": "markdown", "metadata": { "id": "Xn3SlVE1LYvm" }, "source": [ "The translation quality overall is very high!" ] }, { "cell_type": "markdown", "source": [ "# Additional model types\n", "\n", "The translation pipeline is flexible and supports multiple model types. The default mode for the pipeline is to scan the Hugging Face Hub for models that best match the source-target translation pair. This often produces the best quality and is usually a smaller model than a large multi-language mode.\n", "\n", "There is a parameter that can override this and always use the base model." ], "metadata": { "id": "3FdS5slz60eA" } }, { "cell_type": "code", "source": [ "translate = Translation(\"t5-small\", findmodels=False)\n", "translate(\"translate English to French: The sky is blue, the stars are far\", None)" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 36 }, "id": "REb0X2Kz60Ew", "outputId": "ae13d4d0-318f-4740-f725-0029ceeabeac" }, "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "'Le ciel est bleu, les étoiles sont loin'" ], "application/vnd.google.colaboratory.intrinsic+json": { "type": "string" } }, "metadata": {}, "execution_count": 11 } ] }, { "cell_type": "markdown", "source": [ "Translation isn't limited to spoken languages. txtai provides a text-to-sql model that converts English text into a txtai-compatible SQL statement. " ], "metadata": { "id": "5vSJbW7zVJeH" } }, { "cell_type": "code", "source": [ "translate = Translation(\"NeuML/t5-small-txtsql\", findmodels=False)\n", "translate(\"translate English to SQL: feel good story since yesterday\", None)" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 36 }, "id": "d5usam_jhKaz", "outputId": "c12368a0-ea26-4191-be5a-f2de70711003" }, "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "\"select id, text, score from txtai where similar('feel good story') and entry >= date('now', '-1 day')\"" ], "application/vnd.google.colaboratory.intrinsic+json": { "type": "string" } }, "metadata": {}, "execution_count": 12 } ] }, { "cell_type": "markdown", "source": [ "Last thing we'll do is run the multiple language example only using a single large language model." ], "metadata": { "id": "cVb7uk7TXr_v" } }, { "cell_type": "code", "source": [ "translate = Translation(\"facebook/mbart-large-50-many-to-many-mmt\", findmodels=False)\n", "run()" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "JkOLnvKZWI95", "outputId": "fef6402c-e20e-43e1-a3eb-e4580913fa7e" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Original Language: fr\n", "Translation: Le ciel est bleu, les étoiles sont loin\n", "Back to English: The sky is blue, the stars are far away\n", "\n", "Original Language: es\n", "Translation: El cielo es azul, las estrellas están lejos.\n", "Back to English: The sky is blue, the stars are far away.\n", "\n", "Original Language: de\n", "Translation: Der Himmel ist blau, die Sterne sind weit\n", "Back to English: The sky is blue, the stars are far.\n", "\n", "Original Language: hi\n", "Translation: आकाश नीली है, तारे दूर हैं।\n", "Back to English: The sky is blue, and the stars are far away.\n", "\n", "Original Language: ja\n", "Translation: 空は青い、星は遠い\n", "Back to English: the sky is blue, the stars are far away.\n", "\n" ] } ] }, { "cell_type": "markdown", "source": [ "# Wrapping up\n", "\n", "Machine translation has made giant leaps and strides the last couple of years. These models give developers a solid, locally-hosted alternative to cloud translation services. Additionally, there are models built for low resource languages that cloud translation services don't support.\n", "\n", "A number of different models and configurations are supported, give it a try!" ], "metadata": { "id": "TvWx9PS-X32c" } } ] } ================================================ FILE: examples/13_Similarity_search_with_images.ipynb ================================================ { "cells": [ { "cell_type": "markdown", "metadata": { "id": "4Pjmz-RORV8E" }, "source": [ "# Similarity search with images\n", "\n", "txtai as the name implies works with text and ai, pretty straightforward. But that doesn't mean it can't work with different types of content. For example, an image can be described with words. We can use that description to compare an image to a query or other documents. This notebook shows how images and text can be embedded into the same space to support similarity search." ] }, { "cell_type": "markdown", "metadata": { "id": "Dk31rbYjSTYm" }, "source": [ "# Install dependencies\n", "\n", "Install `txtai` and all dependencies. Since this notebook uses sentence-transformers directly, we need to install the similarity extras package." ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "id": "XMQuuun2R06J" }, "outputs": [], "source": [ "%%capture\n", "!pip install torchvision ipyplot git+https://github.com/neuml/txtai#egg=txtai[similarity]\n", "\n", "# Get test data\n", "!wget -N https://github.com/neuml/txtai/releases/download/v3.5.0/tests.tar.gz\n", "!tar -xvzf tests.tar.gz" ] }, { "cell_type": "markdown", "metadata": { "id": "PNPJ95cdTKSS" }, "source": [ "# Create an Embeddings model\n", "\n", "[sentence-transformers](https://github.com/UKPLab/sentence-transformers/tree/master/examples/applications/image-search) has support for the [OpenAI CLIP model](https://github.com/openai/CLIP). This model embeds text and images into the same space, enabling image similarity search. txtai can directly utilize these models through sentence-transformers. Check out the sentence-transformers link above for additional examples on how to use this model.\n", "\n", "This section builds an embeddings index over a series of images.\n", "\n" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "id": "nTDwXOUeTH2-" }, "outputs": [], "source": [ "%%capture\n", "\n", "import glob\n", "\n", "from PIL import Image\n", "\n", "from txtai.embeddings import Embeddings\n", "from txtai.pipeline import Caption\n", "\n", "def images():\n", " # Create image caption pipeline\n", " caption = Caption()\n", "\n", " for path in glob.glob('txtai/*jpg'):\n", " # Add image object along with image metadata\n", " image = Image.open(path)\n", "\n", " yield (path, {\"object\": image, \"format\": image.format, \"width\": image.width, \"height\": image.height, \"caption\": caption(image)}, None)\n", "\n", "# Index with content and objects\n", "embeddings = Embeddings({\"method\": \"sentence-transformers\", \"path\": \"sentence-transformers/clip-ViT-B-32\", \"content\": True, \"objects\": \"image\"})\n", "embeddings.index(images())" ] }, { "cell_type": "markdown", "metadata": { "id": "PTZbRHiE5_l3" }, "source": [ "Next let's query and see what's available in the index." ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "040r95YG1w3J", "outputId": "65a2b2b2-6153-4f3d-e32c-5e4e661956ab" }, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "[{'id': 'txtai/books.jpg',\n", " 'object': ,\n", " 'format': 'JPEG',\n", " 'width': 1024,\n", " 'height': 682,\n", " 'caption': 'a book shelf filled with books and a stack of books'},\n", " {'id': 'txtai/buildings.jpg',\n", " 'object': ,\n", " 'format': 'JPEG',\n", " 'width': 700,\n", " 'height': 466,\n", " 'caption': 'a city skyline with buildings and a sky background'},\n", " {'id': 'txtai/chop.jpg',\n", " 'object': ,\n", " 'format': 'JPEG',\n", " 'width': 700,\n", " 'height': 466,\n", " 'caption': 'a tree branch with a person holding a stick'}]" ] }, "metadata": {}, "execution_count": 9 } ], "source": [ "embeddings.search(\"select id, object, format, width, height, caption from txtai\")" ] }, { "cell_type": "markdown", "metadata": { "id": "r5GjmdCA6IPJ" }, "source": [ "The query above shows the metadata that was added in addition to the image object. These fields can be retrieved on search and/or used to filter results." ] }, { "cell_type": "markdown", "metadata": { "id": "HLf0KrXwLH5-" }, "source": [ "# Search the index\n", "\n", "Now that we have an index, let's search it! This section runs a list of queries against the index and shows the top result for each query. Have to say this is pretty 🔥🔥🔥" ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 600 }, "id": "WHTq86MG9UBF", "outputId": "b679e3d3-a82a-43bb-c59e-c7e9a3ce0577" }, "outputs": [ { "output_type": "display_data", "data": { "text/plain": [ "" ], "text/html": [ "\n", " \n", "
\n", " \n", " \n", " \n", "
\n", " " ] }, "metadata": {} }, { "output_type": "display_data", "data": { "text/plain": [ "" ], "text/html": [ "\n", " \n", "
\n", "
\n", "
\n", "

Walking into the office

\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
\n", "
\n", " \n", "
\n", "
\n", "

Saturday cleaning the yard

\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
\n", "
\n", " \n", "
\n", "
\n", "

Working on the latest analysis

\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
\n", "
\n", " \n", "
\n", "
\n", "

Working on my homework

\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
\n", "
\n", " \n", "
\n", "
\n", "

Watching an exciting race

\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
\n", "
\n", " \n", "
\n", "
\n", "

The universe is massive

\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
\n", "
\n", " \n", "
\n", "
\n", "

Time lapse video of traffic

\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
\n", "
\n", " \n", "
\n", "
\n", "

Relaxing Thanksgiving day

\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
\n", "
\n", "
" ] }, "metadata": {} } ], "source": [ "import ipyplot\n", "from PIL import Image\n", "\n", "def resize(images):\n", " results = []\n", " for image in images:\n", " results.append(image.resize((350, int(image.height * (350 / image.width))), Image.Resampling.LANCZOS))\n", "\n", " return results\n", "\n", "images, labels = [], []\n", "for query in [\"Walking into the office\", \"Saturday cleaning the yard\", \"Working on the latest analysis\", \"Working on my homework\", \"Watching an exciting race\",\n", " \"The universe is massive\", \"Time lapse video of traffic\", \"Relaxing Thanksgiving day\"]:\n", " result = embeddings.search(f\"select object from txtai where similar(\\\"{query}\\\")\", 1)[0]\n", " images.append(result[\"object\"])\n", " labels.append(query)\n", "\n", "ipyplot.plot_images(resize(images), labels, img_width=350, force_b64=True)" ] }, { "cell_type": "markdown", "metadata": { "id": "8BYDpAeoiOvt" }, "source": [ "# Search with SQL\n", "\n", "txtai has support for SQL bind parameters, which enables similarity search with binary content." ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 317 }, "id": "a2V0wE84iWkh", "outputId": "f98abb71-1679-40fb-d3b4-98726b10dfe6" }, "outputs": [ { "output_type": "display_data", "data": { "text/plain": [ "" ], "text/html": [ "\n", " \n", "
\n", " \n", " \n", " \n", "
\n", " " ] }, "metadata": {} }, { "output_type": "display_data", "data": { "text/plain": [ "" ], "text/html": [ "\n", " \n", "
\n", "
\n", "
\n", "

Result

\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
\n", "
\n", "
" ] }, "metadata": {} } ], "source": [ "result = embeddings.search(f\"select object from txtai where similar(:x)\", 1, parameters={\"x\": Image.open(\"txtai/books.jpg\")})[0]\n", "\n", "ipyplot.plot_images(resize([result[\"object\"]]), [\"Result\"], img_width=350, force_b64=True)" ] }, { "cell_type": "markdown", "metadata": { "id": "0d1WVJyQkZ8A" }, "source": [ "# Multilingual Support\n", "\n", "sentence-transformers also has a [model](https://huggingface.co/sentence-transformers/clip-ViT-B-32-multilingual-v1) that supports over 50+ languages. This enables running queries using those languages with an image index.\n", "\n", "Note this model only supports text, so images must first be indexed with the model used above." ] }, { "cell_type": "code", "execution_count": 12, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 630 }, "id": "e8BxURU6gZV3", "outputId": "e58335f5-25ee-4c9a-bc90-b0027f21cdef" }, "outputs": [ { "output_type": "display_data", "data": { "text/plain": [ "" ], "text/html": [ "\n", " \n", "
\n", " \n", " \n", " \n", "
\n", " " ] }, "metadata": {} }, { "output_type": "display_data", "data": { "text/plain": [ "" ], "text/html": [ "\n", " \n", "
\n", "
\n", "
\n", "

Zu Fuß ins Büro
(Walking into the office)

\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
\n", "
\n", " \n", "
\n", "
\n", "

Samstag Reinigung des Hofes
(Saturday cleaning the yard)

\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
\n", "
\n", " \n", "
\n", "
\n", "

Arbeiten an der neuesten Analyse
(Working on the latest analysis)

\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
\n", "
\n", " \n", "
\n", "
\n", "

Arbeiten an meinen Hausaufgaben
(Working on my homework)

\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
\n", "
\n", " \n", "
\n", "
\n", "

Ein spannendes Rennen beobachten
(Watching an exciting race)

\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
\n", "
\n", " \n", "
\n", "
\n", "

Das Universum ist riesig
(The universe is massive)

\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
\n", "
\n", " \n", "
\n", "
\n", "

Zeitraffer Video des Verkehrs
(Time lapse video of traffic)

\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
\n", "
\n", " \n", "
\n", "
\n", "

Entspannender Thanksgiving-Tag
(Relaxing Thanksgiving day)

\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
\n", "
\n", "
" ] }, "metadata": {} } ], "source": [ "import ipyplot\n", "\n", "from txtai.pipeline import Translation\n", "\n", "# Update model at query time to support multilingual queries\n", "embeddings.config[\"path\"] = \"sentence-transformers/clip-ViT-B-32-multilingual-v1\"\n", "embeddings.model = embeddings.loadvectors()\n", "\n", "# Translate queries to German\n", "queries = [\"Walking into the office\", \"Saturday cleaning the yard\", \"Working on the latest analysis\", \"Working on my homework\", \"Watching an exciting race\",\n", " \"The universe is massive\", \"Time lapse video of traffic\", \"Relaxing Thanksgiving day\"]\n", "translate = Translation()\n", "translated = translate(queries, \"de\")\n", "\n", "images, labels = [], []\n", "for x, query in enumerate(translated):\n", " result = embeddings.search(f\"select object from txtai where similar(:x)\", 1, parameters={\"x\": query})[0]\n", "\n", " images.append(result[\"object\"])\n", " labels.append(\"%s
(%s)\" % (query, queries[x]))\n", "\n", "ipyplot.plot_images(resize(images), labels, img_width=350, force_b64=True)" ] } ], "metadata": { "accelerator": "GPU", "colab": { "provenance": [] }, "kernelspec": { "display_name": "Python 3", "name": "python3" } }, "nbformat": 4, "nbformat_minor": 0 } ================================================ FILE: examples/14_Run_pipeline_workflows.ipynb ================================================ { "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "provenance": [] }, "kernelspec": { "name": "python3", "display_name": "Python 3" }, "accelerator": "GPU" }, "cells": [ { "cell_type": "markdown", "metadata": { "id": "4Pjmz-RORV8E" }, "source": [ "# Run pipeline workflows\n", "\n", "txtai has a growing list of models available through it's pipeline framework. Pipelines wrap a machine learning model and transform data. Currently, pipelines can wrap Hugging Face models, Hugging Face pipelines or PyTorch models (support for TensorFlow is in the backlog).\n", "\n", "The following is a list of the currently implemented pipelines.\n", "\n", "* **Questions** - Answer questions using a text context\n", "* **Labels** - Apply labels to text using a zero-shot classification model. Also supports similarity comparisions.\n", "* **Summary** - Abstractive text summarization\n", "* **Textractor** - Extract text from documents\n", "* **Transcription** - Transcribe audio to text\n", "* **Translation** - Machine translation\n", "\n", "Pipelines are great and make using a variety of machine learning models easier. But what if we want to glue the results of different pipelines together? For example, extract text, summarize it, translate it to English and load it into an Embedding index. That would require code to join those operations together in an efficient manner.\n", "\n", "Enter workflows. Workflows are a simple yet powerful construct that takes a callable and returns elements. Workflows don't know they are working with pipelines but enable efficient processing of pipeline data. Workflows are streaming by nature and work on data in batches, allowing large volumes of data to be processed efficiently." ] }, { "cell_type": "markdown", "metadata": { "id": "Dk31rbYjSTYm" }, "source": [ "# Install dependencies\n", "\n", "Install `txtai` and all dependencies. Since this notebook is using optional pipelines/workflows, we need to install the pipeline and workflow extras package." ] }, { "cell_type": "code", "metadata": { "id": "XMQuuun2R06J" }, "source": [ "%%capture\n", "!pip install git+https://github.com/neuml/txtai#egg=txtai[pipeline,workflow] sacremoses\n", "\n", "# Get test data\n", "!wget -N https://github.com/neuml/txtai/releases/download/v2.0.0/tests.tar.gz\n", "!tar -xvzf tests.tar.gz" ], "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "metadata": { "id": "I1dNQE7WT4kE" }, "source": [ "# Create a series of pipelines to use in this notebook" ] }, { "cell_type": "code", "metadata": { "id": "w4YqwBJaT4QD" }, "source": [ "%%capture\n", "from txtai.pipeline import Summary, Textractor, Transcription, Translation\n", "\n", "# Summary instance\n", "summary = Summary()\n", "\n", "# Text extraction\n", "textractor = Textractor()\n", "\n", "# Transcription instance\n", "transcribe = Transcription(\"facebook/wav2vec2-large-960h\")\n", "\n", "# Create a translation instance\n", "translate = Translation()" ], "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "metadata": { "id": "PNPJ95cdTKSS" }, "source": [ "# Basic workflow\n", "\n", "The following shows a basic workflow in action!" ] }, { "cell_type": "code", "metadata": { "id": "nTDwXOUeTH2-", "colab": { "base_uri": "https://localhost:8080/" }, "outputId": "906d4354-cf29-4593-a790-8c175d981dee" }, "source": [ "from txtai.workflow import Workflow, Task\n", "\n", "# Workflow that translate text to French\n", "workflow = Workflow([Task(lambda x: translate(x, \"fr\"))])\n", "\n", "# Data to run through the pipeline\n", "data = [\"The sky is blue\", \"Forest through the trees\"]\n", "\n", "# Workflows are generators for efficiency, read results to list for display\n", "list(workflow(data))" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "['Le ciel est bleu', 'Forêt à travers les arbres']" ] }, "metadata": {}, "execution_count": 13 } ] }, { "cell_type": "markdown", "metadata": { "id": "wicr0CAYRWZ0" }, "source": [ "This isn't too different from previous pipeline examples. The only difference is data is feed through the workflow. In this example, the workflow calls the translation pipeline and translates text to French. Let's look at a more complex example." ] }, { "cell_type": "markdown", "metadata": { "id": "0EeD8m6FR5cH" }, "source": [ "# Multistep workflow\n", "\n", "The following workflow reads a series of audio files, transcribes them to text and translates the text to French. This is based on the classic txtai example from [Introducing txtai](https://colab.research.google.com/github/neuml/txtai/blob/master/examples/01_Introducing_txtai.ipynb).\n", "\n", "Workflows take two main parameters. The action to execute which is a callable and a pattern to filter data with. Data that is accepted by the filter will be processed, otherwise it will be passed through to the next task." ] }, { "cell_type": "code", "metadata": { "id": "OF2G5-OiSBzy", "colab": { "base_uri": "https://localhost:8080/" }, "outputId": "e5c74089-1916-4bbd-93d3-9e25b1fe4ee5" }, "source": [ "from txtai.workflow import FileTask\n", "\n", "tasks = [\n", " FileTask(transcribe, r\"\\.wav$\"),\n", " Task(lambda x: translate(x, \"fr\"))\n", "]\n", "\n", "# List of files to process\n", "data = [\n", " \"txtai/US_tops_5_million.wav\",\n", " \"txtai/Canadas_last_fully.wav\",\n", " \"txtai/Beijing_mobilises.wav\",\n", " \"txtai/The_National_Park.wav\",\n", " \"txtai/Maine_man_wins_1_mil.wav\",\n", " \"txtai/Make_huge_profits.wav\"\n", "]\n", "\n", "# Workflow that translate text to French\n", "workflow = Workflow(tasks)\n", "\n", "# Run workflow\n", "list(workflow(data))" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "[\"Les cas de virus U sont en tête d'un million\",\n", " \"La dernière plate-forme de glace entièrement intacte du Canada s'est soudainement effondrée en formant un berge de glace de taille manhatten\",\n", " \"Bagage mobilise les embarcations d'invasion le long des côtes à mesure que les tensions tiwaniennes s'intensifient\",\n", " \"Le service des parcs nationaux met en garde contre le sacrifice d'amis plus lents dans une attaque nue\",\n", " \"L'homme principal gagne du billet de loterie\",\n", " \"Faire d'énormes profits sans travailler faire jusqu'à cent mille dollars par jour\"]" ] }, "metadata": {}, "execution_count": 14 } ] }, { "cell_type": "markdown", "metadata": { "id": "PN08rnrQU1hx" }, "source": [ "# Complex workflow\n", "\n", "Let's put this all together into a full-fledged workflow to build an embeddings index. This workflow will work with both documents and audio files. Documents will have text extracted and summarized. Audio files will be transcribed. Both results will be joined, translated into French and loaded into an Embeddings index." ] }, { "cell_type": "code", "metadata": { "id": "coZJw_1yU1Sq", "colab": { "base_uri": "https://localhost:8080/" }, "outputId": "213b34d5-157f-4548-8788-ac29cb4039dd" }, "source": [ "from txtai.embeddings import Embeddings, Documents\n", "from txtai.workflow import FileTask, WorkflowTask\n", "\n", "# Embeddings index\n", "embeddings = Embeddings({\"path\": \"sentence-transformers/paraphrase-multilingual-mpnet-base-v2\", \"content\": True})\n", "documents = Documents()\n", "\n", "# List of files to process\n", "files = [\n", " \"txtai/article.pdf\",\n", " \"txtai/US_tops_5_million.wav\",\n", " \"txtai/Canadas_last_fully.wav\",\n", " \"txtai/Beijing_mobilises.wav\",\n", " \"txtai/The_National_Park.wav\",\n", " \"txtai/Maine_man_wins_1_mil.wav\",\n", " \"txtai/Make_huge_profits.wav\"\n", "]\n", "\n", "data = [(x, element, None) for x, element in enumerate(files)]\n", "\n", "# Workflow that extracts text and builds a summary\n", "articles = Workflow([\n", " FileTask(textractor),\n", " Task(summary)\n", "])\n", "\n", "# Define workflow tasks. Workflows can also be tasks!\n", "tasks = [\n", " WorkflowTask(articles, r\".\\.pdf$\"),\n", " FileTask(transcribe, r\"\\.wav$\"),\n", " Task(lambda x: translate(x, \"fr\")),\n", " Task(documents.add, unpack=False)\n", "]\n", "\n", "# Workflow that translate text to French\n", "workflow = Workflow(tasks)\n", "\n", "# Run workflow and show results to be indexed\n", "for x in workflow(data):\n", " print(x)\n", "\n", "# Build the embeddings index\n", "embeddings.index(documents)\n", "\n", "# Cleanup temporary storage\n", "documents.close()" ], "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "(0, \"Txtai, un moteur de recherche alimenté par l'IA construit sur Transformers, permet la recherche basée sur la compréhension du langage naturel (NLU) dans n'importe quelle application. Le champ de traitement du langage naturel (NLP) évolue rapidement avec un certain nombre de nouveaux développements. Le moteur de recherche open-source est open source et disponible sur GitHub.\", None)\n", "(1, \"Les cas de virus U sont en tête d'un million\", None)\n", "(2, \"La dernière plate-forme de glace entièrement intacte du Canada s'est soudainement effondrée en formant un berge de glace de taille manhatten\", None)\n", "(3, \"Bagage mobilise les embarcations d'invasion le long des côtes à mesure que les tensions tiwaniennes s'intensifient\", None)\n", "(4, \"Le service des parcs nationaux met en garde contre le sacrifice d'amis plus lents dans une attaque nue\", None)\n", "(5, \"L'homme principal gagne du billet de loterie\", None)\n", "(6, \"Faire d'énormes profits sans travailler faire jusqu'à cent mille dollars par jour\", None)\n" ] } ] }, { "cell_type": "markdown", "metadata": { "id": "n6i-xhJya8o4" }, "source": [ "# Query for results in French" ] }, { "cell_type": "code", "metadata": { "id": "cHbjivUOaUGu", "colab": { "base_uri": "https://localhost:8080/" }, "outputId": "0da8d8cb-dac6-4cad-ef00-a096b44533cf" }, "source": [ "# Run a search query and show the result.\n", "embeddings.search(\"changement climatique\", 1)[0]" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "{'id': '2',\n", " 'score': 0.2982647716999054,\n", " 'text': \"La dernière plate-forme de glace entièrement intacte du Canada s'est soudainement effondrée en formant un berge de glace de taille manhatten\"}" ] }, "metadata": {}, "execution_count": 16 } ] }, { "cell_type": "code", "metadata": { "id": "aNerHvNpaxD4", "colab": { "base_uri": "https://localhost:8080/" }, "outputId": "f3792220-4518-4388-c7e7-c38f38f19b20" }, "source": [ "# Run a search query and show the result.\n", "embeddings.search(\"traitement du langage naturel\", 1)[0]" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "{'id': '0',\n", " 'score': 0.47031939029693604,\n", " 'text': \"Txtai, un moteur de recherche alimenté par l'IA construit sur Transformers, permet la recherche basée sur la compréhension du langage naturel (NLU) dans n'importe quelle application. Le champ de traitement du langage naturel (NLP) évolue rapidement avec un certain nombre de nouveaux développements. Le moteur de recherche open-source est open source et disponible sur GitHub.\"}" ] }, "metadata": {}, "execution_count": 17 } ] }, { "cell_type": "markdown", "source": [ "# Configuration-driven workflow\n", "\n", "Workflows can also be defined with YAML and run as an application. Applications can run standalone or as a FastAPI instance. More information can be [found here](https://neuml.github.io/txtai/api/). " ], "metadata": { "id": "Sz_f9qoOMC_m" } }, { "cell_type": "code", "source": [ "workflow = \"\"\"\n", "writable: true\n", "embeddings:\n", " path: sentence-transformers/paraphrase-multilingual-mpnet-base-v2\n", " content: True\n", "\n", "# Summarize text\n", "summary:\n", "\n", "# Extract text from documents\n", "textractor:\n", "\n", "# Transcribe audio to text\n", "transcription:\n", " path: facebook/wav2vec2-large-960h\n", "\n", "# Translate text between languages\n", "translation:\n", "\n", "workflow:\n", " summarize:\n", " tasks:\n", " - action: textractor\n", " task: file\n", " - summary\n", " index:\n", " tasks:\n", " - action: summarize\n", " select: '\\\\.pdf$'\n", " - action: transcription\n", " select: '\\\\.wav$'\n", " task: file\n", " - action: translation\n", " args: ['fr']\n", " - action: index\n", "\"\"\"\n", "\n", "# Create and run the workflow\n", "from txtai.app import Application\n", "\n", "# Create and run the workflow\n", "app = Application(workflow)\n", "list(app.workflow(\"index\", files))" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "HoVlk_vNJKHY", "outputId": "34b68bcb-a6d5-4029-9bf2-f33e4381d1bc" }, "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "[\"Txtai, un moteur de recherche alimenté par l'IA construit sur Transformers, permet la recherche basée sur la compréhension du langage naturel (NLU) dans n'importe quelle application. Le champ de traitement du langage naturel (NLP) évolue rapidement avec un certain nombre de nouveaux développements. Le moteur de recherche open-source est open source et disponible sur GitHub.\",\n", " \"Les cas de virus U sont en tête d'un million\",\n", " \"La dernière plate-forme de glace entièrement intacte du Canada s'est soudainement effondrée en formant un berge de glace de taille manhatten\",\n", " \"Bagage mobilise les embarcations d'invasion le long des côtes à mesure que les tensions tiwaniennes s'intensifient\",\n", " \"Le service des parcs nationaux met en garde contre le sacrifice d'amis plus lents dans une attaque nue\",\n", " \"L'homme principal gagne du billet de loterie\",\n", " \"Faire d'énormes profits sans travailler faire jusqu'à cent mille dollars par jour\"]" ] }, "metadata": {}, "execution_count": 18 } ] }, { "cell_type": "code", "source": [ "# Run a search query and show the result.\n", "app.search(\"changement climatique\", 1)[0]" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "a_klVZAXHJcw", "outputId": "33229268-0f98-4ca1-af7d-212bcbde6482" }, "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "{'id': '2',\n", " 'score': 0.2982647716999054,\n", " 'text': \"La dernière plate-forme de glace entièrement intacte du Canada s'est soudainement effondrée en formant un berge de glace de taille manhatten\"}" ] }, "metadata": {}, "execution_count": 19 } ] }, { "cell_type": "code", "source": [ "# Run a search query and show the result.\n", "app.search(\"traitement du langage naturel\", 1)[0]" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "I5xin0VNHJOu", "outputId": "2fbb9a93-b860-437d-c361-ee21eed75b6b" }, "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "{'id': '0',\n", " 'score': 0.47031939029693604,\n", " 'text': \"Txtai, un moteur de recherche alimenté par l'IA construit sur Transformers, permet la recherche basée sur la compréhension du langage naturel (NLU) dans n'importe quelle application. Le champ de traitement du langage naturel (NLP) évolue rapidement avec un certain nombre de nouveaux développements. Le moteur de recherche open-source est open source et disponible sur GitHub.\"}" ] }, "metadata": {}, "execution_count": 20 } ] }, { "cell_type": "markdown", "metadata": { "id": "7zG4AimucFJs" }, "source": [ "# Wrapping up\n", "\n", "Results are good! We can see the power of workflows and how they can join a series of pipelines together in an efficient manner. Workflows can work with any callable, not just pipelines, workflows transform data from one format to another. Workflows are an exciting and promising development for txtai." ] } ] } ================================================ FILE: examples/15_Distributed_embeddings_cluster.ipynb ================================================ { "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "provenance": [] }, "kernelspec": { "name": "python3", "display_name": "Python 3" } }, "cells": [ { "cell_type": "markdown", "metadata": { "id": "4Pjmz-RORV8E" }, "source": [ "# Distributed embeddings cluster\n", "\n", "The txtai API is a web-based service backed by [FastAPI](https://fastapi.tiangolo.com/). All txtai functionality is available via the API. The API can also cluster multiple embeddings indices into a single logical index to horizontally scale over multiple nodes. \n", "\n", "This notebook installs the txtai API and shows an example of building an embeddings cluster." ] }, { "cell_type": "markdown", "metadata": { "id": "Dk31rbYjSTYm" }, "source": [ "# Install dependencies\n", "\n", "Install `txtai` and all dependencies. Since this notebook uses the API, we need to install the api extras package." ] }, { "cell_type": "code", "metadata": { "id": "XMQuuun2R06J" }, "source": [ "%%capture\n", "!pip install git+https://github.com/neuml/txtai#egg=txtai[api]" ], "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "metadata": { "id": "PNPJ95cdTKSS" }, "source": [ "# Start distributed embeddings cluster\n", "\n", "First we'll start multiple API instances that will serve as embeddings index shards. Each shard stores a subset of the indexed data and these shards work in tandem to form a single logical index.\n", "\n", "Then we'll start the main API instance that clusters the shards together into a logical instance.\n", "\n", "The API instances are all started in the background.\n" ] }, { "cell_type": "code", "metadata": { "id": "USb4JXZHxqTA" }, "source": [ "import os\n", "os.chdir(\"/content\")" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "nTDwXOUeTH2-", "colab": { "base_uri": "https://localhost:8080/" }, "outputId": "dee26849-39ae-4390-8bba-76bf9025fa61" }, "source": [ "%%writefile index.yml\n", "writable: true\n", "\n", "# Embeddings settings\n", "embeddings:\n", " path: sentence-transformers/nli-mpnet-base-v2\n", " content: true" ], "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Writing index.yml\n" ] } ] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "iCdBh-JgfyBl", "outputId": "0066e314-7461-47c7-ca3b-15204911783e" }, "source": [ "%%writefile cluster.yml\n", "# Embeddings cluster\n", "cluster:\n", " shards:\n", " - http://127.0.0.1:8001\n", " - http://127.0.0.1:8002" ], "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Writing cluster.yml\n" ] } ] }, { "cell_type": "code", "metadata": { "id": "nGITHxUyRzyp" }, "source": [ "# Start embeddings shards\n", "!CONFIG=index.yml nohup uvicorn --port 8001 \"txtai.api:app\" &> shard-1.log &\n", "!CONFIG=index.yml nohup uvicorn --port 8002 \"txtai.api:app\" &> shard-2.log &\n", "\n", "# Start main instance\n", "!CONFIG=cluster.yml nohup uvicorn --port 8000 \"txtai.api:app\" &> main.log &\n", "\n", "# Wait for startup\n", "!sleep 90" ], "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "metadata": { "id": "lxkbVng3giWP" }, "source": [ "# Python\n", "\n", "Let's first try the cluster out directly in Python. The code below aggregates the two shards into a single cluster and executes actions against the cluster." ] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "36HGAokoglfg", "outputId": "368ae013-2afc-4a1b-d7df-c429183637d7" }, "source": [ "%%writefile run.py\n", "from txtai.api import Cluster\n", "\n", "cluster = Cluster({\"shards\": [\"http://127.0.0.1:8001\", \"http://127.0.0.1:8002\"]})\n", "\n", "data = [\n", " \"US tops 5 million confirmed virus cases\",\n", " \"Canada's last fully intact ice shelf has suddenly collapsed, forming a Manhattan-sized iceberg\",\n", " \"Beijing mobilises invasion craft along coast as Taiwan tensions escalate\",\n", " \"The National Park Service warns against sacrificing slower friends in a bear attack\",\n", " \"Maine man wins $1M from $25 lottery ticket\",\n", " \"Make huge profits without work, earn up to $100,000 a day\",\n", "]\n", "\n", "# Index data\n", "cluster.add([{\"id\": x, \"text\": row} for x, row in enumerate(data)])\n", "cluster.index()\n", "\n", "# Test search\n", "result = cluster.search(\"feel good story\", 1)[0]\n", "print(\"Query: feel good story\\nResult:\", result[\"text\"])" ], "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Writing run.py\n" ] } ] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "6dQOzcfEs2Pk", "outputId": "a667594a-b778-4e4e-a75c-72e7982b7fbe" }, "source": [ "!python run.py" ], "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Query: feel good story\n", "Result: Maine man wins $1M from $25 lottery ticket\n" ] } ] }, { "cell_type": "markdown", "metadata": { "id": "NHvBFZeSd9AG" }, "source": [ "# JavaScript\n", "\n", "Next let's try to run the same code above via the API using JavaScript.\n", "\n", "```bash\n", "npm install txtai\n", "```\n", "\n", "For this example, we'll clone the txtai.js project to import the example build configuration." ] }, { "cell_type": "code", "metadata": { "id": "b52knObEdcCr" }, "source": [ "%%capture\n", "!git clone https://github.com/neuml/txtai.js" ], "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "metadata": { "id": "rUGS0t-JMsS9" }, "source": [ "## Run cluster.js\n", "\n", "The following script is a JavaScript version of the logic above" ] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "bPQ40_xRyFmA", "outputId": "b86a12c4-f2c7-427b-bd28-edba354c6713" }, "source": [ "%%writefile txtai.js/examples/node/src/cluster.js\n", "import {Embeddings} from \"txtai\";\n", "import {sprintf} from \"sprintf-js\";\n", "\n", "const run = async () => {\n", " try {\n", " let embeddings = new Embeddings(process.argv[2]);\n", "\n", " let data = [\"US tops 5 million confirmed virus cases\",\n", " \"Canada's last fully intact ice shelf has suddenly collapsed, forming a Manhattan-sized iceberg\",\n", " \"Beijing mobilises invasion craft along coast as Taiwan tensions escalate\",\n", " \"The National Park Service warns against sacrificing slower friends in a bear attack\",\n", " \"Maine man wins $1M from $25 lottery ticket\",\n", " \"Make huge profits without work, earn up to $100,000 a day\"];\n", "\n", " console.log();\n", " console.log(\"Querying an Embeddings cluster\");\n", " console.log(sprintf(\"%-20s %s\", \"Query\", \"Best Match\"));\n", " console.log(\"-\".repeat(50));\n", "\n", " for (let query of [\"feel good story\", \"climate change\", \"public health story\", \"war\", \"wildlife\", \"asia\", \"lucky\", \"dishonest junk\"]) {\n", " let results = await embeddings.search(query, 1);\n", " if (results && results.length > 0) {\n", " let result = results[0].text;\n", " console.log(sprintf(\"%-20s %s\", query, result));\n", " }\n", " }\n", " }\n", " catch (e) {\n", " console.trace(e);\n", " }\n", "};\n", "\n", "run();" ], "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Writing txtai.js/examples/node/src/cluster.js\n" ] } ] }, { "cell_type": "markdown", "metadata": { "id": "nTBs11j-GtD-" }, "source": [ "## Build and run cluster.js\n", "\n", "\n", "\n" ] }, { "cell_type": "code", "metadata": { "id": "kC5Oub6wa1nK" }, "source": [ "%%capture\n", "os.chdir(\"txtai.js/examples/node\")\n", "!npm install\n", "!npm run build" ], "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "metadata": { "id": "Xr5IlvqH8W77" }, "source": [ "Next lets run the code against the main cluster URL" ] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "ckOHNqyaeL-B", "outputId": "9c243fac-2316-4b8e-b044-6de529a8f3e8" }, "source": [ "!node dist/cluster.js http://127.0.0.1:8000" ], "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "\n", "Querying an Embeddings cluster\n", "Query Best Match\n", "--------------------------------------------------\n", "feel good story Maine man wins $1M from $25 lottery ticket\n", "climate change Canada's last fully intact ice shelf has suddenly collapsed, forming a Manhattan-sized iceberg\n", "public health story US tops 5 million confirmed virus cases\n", "war Beijing mobilises invasion craft along coast as Taiwan tensions escalate\n", "wildlife The National Park Service warns against sacrificing slower friends in a bear attack\n", "asia Beijing mobilises invasion craft along coast as Taiwan tensions escalate\n", "lucky Maine man wins $1M from $25 lottery ticket\n", "dishonest junk Make huge profits without work, earn up to $100,000 a day\n" ] } ] }, { "cell_type": "markdown", "metadata": { "id": "1yukBIMYG5OE" }, "source": [ "The JavaScript program is showing the same results as the Python code above. This is running a clustered query against both nodes in the cluster and aggregating the results together.\n", "\n", "Queries can be run against each individual shard to see what the queries independently return." ] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "73rZCo4O4IQR", "outputId": "9f2cb119-7a21-41d9-fdbf-4410af246934" }, "source": [ "!node dist/cluster.js http://127.0.0.1:8001" ], "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "\n", "Querying an Embeddings cluster\n", "Query Best Match\n", "--------------------------------------------------\n", "feel good story Maine man wins $1M from $25 lottery ticket\n", "climate change Beijing mobilises invasion craft along coast as Taiwan tensions escalate\n", "public health story US tops 5 million confirmed virus cases\n", "war Beijing mobilises invasion craft along coast as Taiwan tensions escalate\n", "wildlife Beijing mobilises invasion craft along coast as Taiwan tensions escalate\n", "asia Beijing mobilises invasion craft along coast as Taiwan tensions escalate\n", "lucky Maine man wins $1M from $25 lottery ticket\n" ] } ] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "ZeVBLJyr4Knr", "outputId": "b75691a4-25bf-43dc-8878-f9792a4430b8" }, "source": [ "!node dist/cluster.js http://127.0.0.1:8002" ], "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "\n", "Querying an Embeddings cluster\n", "Query Best Match\n", "--------------------------------------------------\n", "feel good story Make huge profits without work, earn up to $100,000 a day\n", "climate change Canada's last fully intact ice shelf has suddenly collapsed, forming a Manhattan-sized iceberg\n", "public health story The National Park Service warns against sacrificing slower friends in a bear attack\n", "war The National Park Service warns against sacrificing slower friends in a bear attack\n", "wildlife The National Park Service warns against sacrificing slower friends in a bear attack\n", "asia The National Park Service warns against sacrificing slower friends in a bear attack\n", "lucky The National Park Service warns against sacrificing slower friends in a bear attack\n", "dishonest junk Make huge profits without work, earn up to $100,000 a day\n" ] } ] }, { "cell_type": "markdown", "metadata": { "id": "J2I_4hmZ8uXs" }, "source": [ "Note the differences. The section below runs a count against the full cluster and each shard to show the count of records in each." ] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "BKm27yna4MWr", "outputId": "bfc60af7-1b2b-451f-b10e-e2f8cf6f14fa" }, "source": [ "!curl http://127.0.0.1:8000/count\n", "!printf \"\\n\"\n", "!curl http://127.0.0.1:8001/count\n", "!printf \"\\n\"\n", "!curl http://127.0.0.1:8002/count" ], "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "6\n", "3\n", "3" ] } ] }, { "cell_type": "markdown", "metadata": { "id": "6rKj-I0djRQj" }, "source": [ "This notebook showed how a distributed embeddings cluster can be created with txtai. This example can be further scaled out on Kubernetes with StatefulSets, which will be covered in a future tutorial." ] } ] } ================================================ FILE: examples/16_Train_a_text_labeler.ipynb ================================================ { "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "provenance": [] }, "kernelspec": { "name": "python3", "display_name": "Python 3" }, "accelerator": "GPU" }, "cells": [ { "cell_type": "markdown", "metadata": { "id": "4Pjmz-RORV8E" }, "source": [ "# Train a text labeler\n", "\n", "The [Hugging Face Model Hub](https://huggingface.co/models) has a wide range of models that can handle many tasks. While these models perform well, the best performance often is found when fine-tuning a model with task-specific data. \n", "\n", "Hugging Face provides a [number of full-featured examples](https://github.com/huggingface/transformers/tree/master/examples) available to assist with training task-specific models. When building models from the command line, these scripts are a great way to get started.\n", "\n", "txtai provides a training pipeline that can be used to train new models programatically using the Transformers Trainer framework. The training pipeline supports the following:\n", "\n", "- Building transient models without requiring an output directory\n", "- Load training data from Hugging Face datasets, pandas DataFrames and list of dicts\n", "- Text sequence classification tasks (single/multi label classification and regression) including all GLUE tasks\n", "- All training arguments\n", "\n", "This notebook shows examples of how to use txtai to train/fine-tune new models." ] }, { "cell_type": "markdown", "metadata": { "id": "Dk31rbYjSTYm" }, "source": [ "# Install dependencies\n", "\n", "Install `txtai` and all dependencies." ] }, { "cell_type": "code", "metadata": { "id": "XMQuuun2R06J" }, "source": [ "%%capture\n", "!pip install git+https://github.com/neuml/txtai#egg=txtai[pipeline-train] datasets pandas" ], "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "metadata": { "id": "PNPJ95cdTKSS" }, "source": [ "# Train a model\n", "\n", "Let's get right to it! The following example fine-tunes a tiny Bert model with the sst2 dataset.\n", "\n", "The trainer pipeline is basically a one-liner that fine-tunes any text classification/regression model available (locally and/or from the HF Hub). \n" ] }, { "cell_type": "code", "metadata": { "id": "USb4JXZHxqTA" }, "source": [ "from datasets import load_dataset\n", "\n", "from txtai.pipeline import HFTrainer\n", "\n", "trainer = HFTrainer()\n", "\n", "# Hugging Face dataset\n", "ds = load_dataset(\"glue\", \"sst2\")\n", "model, tokenizer = trainer(\"google/bert_uncased_L-2_H-128_A-2\", ds[\"train\"], columns=(\"sentence\", \"label\"))" ], "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "metadata": { "id": "CubsNAbpEWQg" }, "source": [ "The default trainer pipeline functionality will not store any logs, checkpoints or models to disk. The trainer can take any of the standard TrainingArguments to enable persistent models.\n", "\n", "The next section creates a Labels pipeline using the newly built model and runs the model against the sst2 validation set. " ] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "xw2y2C5Mg11_", "outputId": "78400e45-ea5c-4cd9-d205-b55ee7a9f005" }, "source": [ "from txtai.pipeline import Labels\n", "\n", "labels = Labels((model, tokenizer), dynamic=False)\n", "\n", "# Determine accuracy on validation set\n", "results = [row[\"label\"] == labels(row[\"sentence\"])[0][0] for row in ds[\"validation\"]]\n", "sum(results) / len(ds[\"validation\"])" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "0.8268348623853211" ] }, "metadata": {}, "execution_count": 10 } ] }, { "cell_type": "markdown", "metadata": { "id": "ZAHSwaB3Ex49" }, "source": [ "82.68% accuracy - not bad for a tiny Bert model. \n", "\n" ] }, { "cell_type": "markdown", "metadata": { "id": "f3GkY4JNEhhE" }, "source": [ "# Train a model with Lists\n", "\n", "As mentioned earlier, the trainer pipeline supports Hugging Face datasets, pandas DataFrames and lists of dicts. The example below trains a model using lists." ] }, { "cell_type": "code", "metadata": { "id": "QkApw1b2hfZq", "colab": { "base_uri": "https://localhost:8080/", "height": 182 }, "outputId": "8c3dceae-49fb-4b63-837d-5944e63c768e" }, "source": [ "data = [{\"text\": \"This is a test sentence\", \"label\": 0}, {\"text\": \"This is not a test\", \"label\": 1}]\n", "\n", "model, tokenizer = trainer(\"google/bert_uncased_L-2_H-128_A-2\", data)" ], "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stderr", "text": [ "Some weights of the model checkpoint at google/bert_uncased_L-2_H-128_A-2 were not used when initializing BertForSequenceClassification: ['cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias']\n", "- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n", "- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n", "Some weights of BertForSequenceClassification were not initialized from the model checkpoint at google/bert_uncased_L-2_H-128_A-2 and are newly initialized: ['classifier.weight', 'classifier.bias']\n", "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n" ] }, { "output_type": "display_data", "data": { "text/html": [ "\n", "
\n", " \n", " \n", " [3/3 00:00, Epoch 3/3]\n", "
\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
StepTraining Loss

" ], "text/plain": [ "" ] }, "metadata": {} } ] }, { "cell_type": "markdown", "metadata": { "id": "cjYTxm7sFKyZ" }, "source": [ "# Train a model with DataFrames\n", "\n", "The next section builds a new model using data stored in a pandas DataFrame." ] }, { "cell_type": "code", "metadata": { "id": "0XaKKQ32wqbs", "colab": { "base_uri": "https://localhost:8080/", "height": 182 }, "outputId": "edb82a45-6c2a-4718-ce0b-56030f95ffbf" }, "source": [ "import pandas as pd\n", "\n", "df = pd.DataFrame(data)\n", "\n", "model, tokenizer = trainer(\"google/bert_uncased_L-2_H-128_A-2\", data)" ], "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stderr", "text": [ "Some weights of the model checkpoint at google/bert_uncased_L-2_H-128_A-2 were not used when initializing BertForSequenceClassification: ['cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias']\n", "- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n", "- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n", "Some weights of BertForSequenceClassification were not initialized from the model checkpoint at google/bert_uncased_L-2_H-128_A-2 and are newly initialized: ['classifier.weight', 'classifier.bias']\n", "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n" ] }, { "output_type": "display_data", "data": { "text/html": [ "\n", "

\n", " \n", " \n", " [3/3 00:00, Epoch 3/3]\n", "
\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
StepTraining Loss

" ], "text/plain": [ "" ] }, "metadata": {} } ] }, { "cell_type": "markdown", "metadata": { "id": "QH3D8PQSFvQO" }, "source": [ "# Train a regression model\n", "\n", "The previous models were classification tasks. The following model trains a sentence similarity model with a regression output per sentence pair between 0 (dissimilar) and 1 (similar)." ] }, { "cell_type": "code", "metadata": { "id": "1rXuz4ncw9G-" }, "source": [ "ds = load_dataset(\"glue\", \"stsb\")\n", "model, tokenizer = trainer(\"google/bert_uncased_L-2_H-128_A-2\", ds[\"train\"], columns=(\"sentence1\", \"sentence2\", \"label\"))" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "fyvAslSP6j0F", "outputId": "ec46a6aa-25a7-4777-e226-d53aeb37899b" }, "source": [ "labels = Labels((model, tokenizer), dynamic=False)\n", "labels([[(\"Sailing to the arctic\", \"Dogs and cats don't get along\")], \n", " [(\"Walking down the road\", \"Walking down the street\")]])" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "[[(0, 0.5648878216743469)], [(0, 0.97544926404953)]]" ] }, "metadata": {}, "execution_count": 14 } ] } ] } ================================================ FILE: examples/17_Train_without_labels.ipynb ================================================ { "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "provenance": [] }, "kernelspec": { "name": "python3", "display_name": "Python 3" }, "accelerator": "GPU" }, "cells": [ { "cell_type": "markdown", "metadata": { "id": "4Pjmz-RORV8E" }, "source": [ "# Train without labels\n", "\n", "Almost all data available is unlabeled. Labeled data takes effort to manually review and/or takes time to collect. Zero-shot classification takes existing large language models and runs a similarity comparison between candidate text and a list of labels. This has been shown to perform surprisingly well.\n", "\n", "The problem with zero-shot classifiers is that they need to have a large number of parameters (400M+) to perform well against general tasks, which comes with sizable hardware requirements.\n", "\n", "This notebook explores using zero-shot classifiers to build training data for smaller models. A simple form of [knowledge distillation](https://en.wikipedia.org/wiki/Knowledge_distillation). " ] }, { "cell_type": "markdown", "metadata": { "id": "Dk31rbYjSTYm" }, "source": [ "# Install dependencies\n", "\n", "Install `txtai` and all dependencies." ] }, { "cell_type": "code", "metadata": { "id": "XMQuuun2R06J" }, "source": [ "%%capture\n", "!pip install git+https://github.com/neuml/txtai#egg=txtai[pipeline-train] datasets pandas" ], "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "metadata": { "id": "3PUe1OW8IZR5" }, "source": [ "# Apply zero-shot classifier to unlabeled text\n", "\n", "The following section takes a small 1000 record random sample of the sst2 dataset and applies a zero-shot classifer to the text. The labels are ignored. This dataset was chosen only to be able to evaluate the accuracy at then end. " ] }, { "cell_type": "code", "metadata": { "id": "GlrOnS4cmkih" }, "source": [ "import random\n", "\n", "from datasets import load_dataset\n", "\n", "from txtai.pipeline import Labels\n", "\n", "def batch(texts, size):\n", " return [texts[x : x + size] for x in range(0, len(texts), size)]\n", "\n", "# Set random seed for repeatable sampling\n", "random.seed(42)\n", "\n", "ds = load_dataset(\"glue\", \"sst2\")\n", "\n", "sentences = random.sample(ds[\"train\"][\"sentence\"], 1000)\n", "\n", "# Load a zero shot classifier - txtai provides this through the Labels pipeline\n", "labels = Labels(\"microsoft/deberta-large-mnli\")\n", "\n", "train = []\n", "\n", "# Zero-shot prediction using [\"negative\", \"positive\"] labels\n", "for chunk in batch(sentences, 32):\n", " train.extend([{\"text\": chunk[x], \"label\": label[0][0]} for x, label in enumerate(labels(chunk, [\"negative\", \"positive\"]))])" ], "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "metadata": { "id": "TLsZmRpHJGav" }, "source": [ "Next, we'll use the training set we just built to train a smaller Electra model." ] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 214 }, "id": "nAt42TIHnfTN", "outputId": "7080b21d-ecf4-459a-c818-11c748e28bb7" }, "source": [ "from txtai.pipeline import HFTrainer\n", "\n", "trainer = HFTrainer()\n", "model, tokenizer = trainer(\"google/electra-base-discriminator\", train, num_train_epochs=5)" ], "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stderr", "text": [ "Some weights of the model checkpoint at google/electra-base-discriminator were not used when initializing ElectraForSequenceClassification: ['discriminator_predictions.dense.bias', 'discriminator_predictions.dense.weight', 'discriminator_predictions.dense_prediction.weight', 'discriminator_predictions.dense_prediction.bias']\n", "- This IS expected if you are initializing ElectraForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n", "- This IS NOT expected if you are initializing ElectraForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n", "Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at google/electra-base-discriminator and are newly initialized: ['classifier.dense.bias', 'classifier.out_proj.weight', 'classifier.out_proj.bias', 'classifier.dense.weight']\n", "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n" ] }, { "output_type": "display_data", "data": { "text/html": [ "\n", "

\n", " \n", " \n", " [625/625 02:51, Epoch 5/5]\n", "
\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
StepTraining Loss
5000.282800

" ], "text/plain": [ "" ] }, "metadata": {} } ] }, { "cell_type": "markdown", "metadata": { "id": "J9pugqJSJRn6" }, "source": [ "# Evaluating accuracy\n", "\n", "Recall the training set is only 1000 records. To be clear, training an Electra model against the full sst2 dataset would perform better than below. But for this exercise, we're are not using the training labels and simulating labeled data not being available.\n", "\n", "First, lets see what the baseline accuracy for the zero-shot model would be against the sst2 evaluation set. Reminder that this has not seen any of the sst2 training data. \n" ] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "RbgIrkgMvJS4", "outputId": "69287790-e01c-4c17-dfd5-0dc6afd73c98" }, "source": [ "labels = Labels(\"microsoft/deberta-large-mnli\")" ], "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stderr", "text": [ "Some weights of the model checkpoint at microsoft/deberta-large-mnli were not used when initializing DebertaForSequenceClassification: ['config']\n", "- This IS expected if you are initializing DebertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n", "- This IS NOT expected if you are initializing DebertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n" ] } ] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "-36UBMILpKYh", "outputId": "3a340b9f-57c5-4c4c-d975-0fcc47df4930" }, "source": [ "results = [row[\"label\"] == labels(row[\"sentence\"], [\"negative\", \"positive\"])[0][0] for row in ds[\"validation\"]]\n", "sum(results) / len(ds[\"validation\"])" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "0.8818807339449541" ] }, "metadata": {}, "execution_count": 21 } ] }, { "cell_type": "markdown", "metadata": { "id": "uJVnWHZZKFIN" }, "source": [ "88.19% accuracy, not bad for a model that has not been trained on the dataset at all! Shows the power of zero-shot classification.\n", "\n", "Next, let's test our model trained on the 1000 zero-shot labeled records." ] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "Kr5IZqZtvXlP", "outputId": "1faeb0d6-349b-4982-e9e8-cdbbde9e9a09" }, "source": [ "labels = Labels((model, tokenizer), dynamic=False)\n", "\n", "results = [row[\"label\"] == labels(row[\"sentence\"])[0][0] for row in ds[\"validation\"]]\n", "sum(results) / len(ds[\"validation\"])" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "0.8738532110091743" ] }, "metadata": {}, "execution_count": 22 } ] }, { "cell_type": "markdown", "metadata": { "id": "sDw-Zh43KVdX" }, "source": [ "87.39% accuracy! Wouldn't get too carried away with the percentages but this at least nearly meets the accuracy of the zero-shot classifier.\n", "\n", "Now this model will be highly tuned for a specific task but it had the opportunity to learn from the combined 1000 records whereas the zero-shot classifier views each record independently. It's also much more performant. " ] }, { "cell_type": "markdown", "metadata": { "id": "QEAwki2lLM2A" }, "source": [ "# Conclusion\n", "\n", "This notebook explored a method of building trained text classifiers without training data being available. Given the amount of resources needed to run large-scale zero-shot classifiers, this method is a simple way to build smaller models tuned for specific tasks. In this example, the zero-shot classifier has 400M parameters and the trained text classifier has 110M. " ] } ] } ================================================ FILE: examples/18_Export_and_run_models_with_ONNX.ipynb ================================================ { "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "provenance": [], "collapsed_sections": [] }, "kernelspec": { "name": "python3", "display_name": "Python 3" }, "accelerator": "GPU" }, "cells": [ { "cell_type": "markdown", "metadata": { "id": "4Pjmz-RORV8E" }, "source": [ "# Export and run models with ONNX\n", "\n", "The [ONNX runtime](https://onnx.ai/) provides a common serialization format for machine learning models. ONNX supports a number of [different platforms/languages](https://onnxruntime.ai/docs/how-to/install.html#requirements) and has features built in to help reduce inference time. \n", "\n", "PyTorch has robust support for exporting Torch models to ONNX. This enables exporting Hugging Face Transformer and/or other downstream models directly to ONNX. \n", "\n", "ONNX opens an avenue for direct inference using a number of languages and platforms. For example, a model could be run directly on Android to limit data sent to a third party service. ONNX is an exciting development with a lot of promise. Microsoft has also released [Hummingbird](https://github.com/microsoft/hummingbird) which enables exporting traditional models (sklearn, decision trees, logistical regression..) to ONNX. \n", "\n", "This notebook will cover how to export models to ONNX using txtai. These models will then be directly run in Python, JavaScript, Java and Rust. Currently, txtai supports all these languages through it's API and that is still the recommended approach. " ] }, { "cell_type": "markdown", "metadata": { "id": "Dk31rbYjSTYm" }, "source": [ "# Install dependencies\n", "\n", "Install `txtai` and all dependencies. Since this notebook uses ONNX quantization, we need to install the pipeline extras package." ] }, { "cell_type": "code", "metadata": { "id": "XMQuuun2R06J" }, "source": [ "%%capture\n", "!pip install datasets git+https://github.com/neuml/txtai#egg=txtai[pipeline]" ], "execution_count": 25, "outputs": [] }, { "cell_type": "markdown", "metadata": { "id": "PNPJ95cdTKSS" }, "source": [ "# Run a model with ONNX\n", "\n", "Let's get right to it! The following example exports a sentiment analysis model to ONNX and runs an inference session.\n", "\n" ] }, { "cell_type": "code", "metadata": { "id": "USb4JXZHxqTA", "colab": { "base_uri": "https://localhost:8080/" }, "outputId": "28d3e70e-efa9-4b07-a602-6ffd89d1279f" }, "source": [ "import numpy as np\n", "\n", "from onnxruntime import InferenceSession, SessionOptions\n", "from transformers import AutoTokenizer\n", "from txtai.pipeline import HFOnnx\n", "\n", "# Normalize logits using sigmoid function\n", "sigmoid = lambda x: 1.0 / (1.0 + np.exp(-x))\n", "\n", "# Export to ONNX\n", "onnx = HFOnnx()\n", "model = onnx(\"distilbert-base-uncased-finetuned-sst-2-english\", \"text-classification\")\n", "\n", "# Start inference session\n", "options = SessionOptions()\n", "session = InferenceSession(model, options)\n", "\n", "# Tokenize\n", "tokenizer = AutoTokenizer.from_pretrained(\"distilbert-base-uncased-finetuned-sst-2-english\")\n", "tokens = tokenizer([\"I am happy\", \"I am mad\"], return_tensors=\"np\")\n", "\n", "# Print results\n", "outputs = session.run(None, dict(tokens))\n", "print(sigmoid(outputs[0]))" ], "execution_count": 26, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "[[0.01295124 0.9909526 ]\n", " [0.9874723 0.0297817 ]]\n" ] } ] }, { "cell_type": "markdown", "metadata": { "id": "jkmQoQvlmHfQ" }, "source": [ "And just like that, there are results! The text classification model is judging sentiment using two labels, 0 for negative to 1 for positive. The results above shows the probability of each label per text snippet.\n", "\n", "The ONNX pipeline loads the model, converts the graph to ONNX and returns. Note that no output file was provided, in this case the ONNX model is returned as a byte array. If an output file is provided, this method returns the output path." ] }, { "cell_type": "markdown", "metadata": { "id": "yFAOHVmXml8o" }, "source": [ "# Train and Export a model for Text Classification\n", "\n", "Next we'll combine the ONNX pipeline with a Trainer pipeline to create a \"train and export to ONNX\" workflow." ] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 579 }, "id": "Wh8TkszumlIe", "outputId": "864f2074-ae50-40d6-bc34-2b1d86a71488" }, "source": [ "from datasets import load_dataset\n", "from txtai.pipeline import HFTrainer\n", "\n", "trainer = HFTrainer()\n", "\n", "# Hugging Face dataset\n", "ds = load_dataset(\"glue\", \"sst2\")\n", "data = ds[\"train\"].select(range(10000)).flatten_indices()\n", "\n", "# Train new model using 10,000 SST2 records (in-memory)\n", "model, tokenizer = trainer(\"google/electra-base-discriminator\", data, columns=(\"sentence\", \"label\"))\n", "\n", "# Export model trained in-memory to ONNX (still in-memory)\n", "output = onnx((model, tokenizer), \"text-classification\", quantize=True)\n", "\n", "# Start inference session\n", "options = SessionOptions()\n", "session = InferenceSession(output, options)\n", "\n", "# Tokenize\n", "tokens = tokenizer([\"I am happy\", \"I am mad\"], return_tensors=\"np\")\n", "\n", "# Print results\n", "outputs = session.run(None, dict(tokens))\n", "print(sigmoid(outputs[0]))" ], "execution_count": 27, "outputs": [ { "output_type": "stream", "name": "stderr", "text": [ "WARNING:datasets.arrow_dataset:Loading cached processed dataset at /root/.cache/huggingface/datasets/glue/sst2/1.0.0/dacbe3125aa31d7f70367a07a8a9e72a5a0bfeb5fc42e75c9db75b96da6053ad/cache-e28d0e20a676bad0.arrow\n", "WARNING:datasets.arrow_dataset:Loading cached processed dataset at /root/.cache/huggingface/datasets/glue/sst2/1.0.0/dacbe3125aa31d7f70367a07a8a9e72a5a0bfeb5fc42e75c9db75b96da6053ad/cache-d7b5d80ca22204f9.arrow\n", "Some weights of the model checkpoint at google/electra-base-discriminator were not used when initializing ElectraForSequenceClassification: ['discriminator_predictions.dense_prediction.weight', 'discriminator_predictions.dense_prediction.bias', 'discriminator_predictions.dense.weight', 'discriminator_predictions.dense.bias']\n", "- This IS expected if you are initializing ElectraForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n", "- This IS NOT expected if you are initializing ElectraForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n", "Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at google/electra-base-discriminator and are newly initialized: ['classifier.out_proj.bias', 'classifier.dense.weight', 'classifier.out_proj.weight', 'classifier.dense.bias']\n", "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n", "/usr/local/lib/python3.7/dist-packages/transformers/optimization.py:310: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use the PyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning\n", " FutureWarning,\n", "You're using a ElectraTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.\n" ] }, { "output_type": "display_data", "data": { "text/html": [ "\n", "

\n", " \n", " \n", " [3750/3750 07:56, Epoch 3/3]\n", "
\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
StepTraining Loss
5000.396800
10000.330900
15000.232400
20000.188200
25000.173600
30000.068600
35000.069800

" ] }, "metadata": {} }, { "output_type": "stream", "name": "stdout", "text": [ "[[0.01525715 0.975399 ]\n", " [0.97395283 0.04432926]]\n" ] } ] }, { "cell_type": "markdown", "metadata": { "id": "lE7dPj3tsn5S" }, "source": [ "The results are similar to the previous step, although this model is only trained on a fraction of the sst2 dataset. Lets save this model for later." ] }, { "cell_type": "code", "metadata": { "id": "Q_kAFYd_s_Bi" }, "source": [ "onnx = HFOnnx()\n", "text = onnx((model, tokenizer), \"text-classification\", \"text-classify.onnx\", quantize=True)" ], "execution_count": 29, "outputs": [] }, { "cell_type": "markdown", "metadata": { "id": "ugNZO4c-uAS-" }, "source": [ "# Export a Sentence Embeddings model\n", "\n", "The ONNX pipeline also supports exporting sentence embeddings models trained with the [sentence-transformers](https://github.com/UKPLab/sentence-transformers) package. " ] }, { "cell_type": "code", "metadata": { "id": "x9B7qOk_uQRN" }, "source": [ "embeddings = onnx(\"sentence-transformers/paraphrase-MiniLM-L6-v2\", \"pooling\", \"embeddings.onnx\", quantize=True)" ], "execution_count": 30, "outputs": [] }, { "cell_type": "markdown", "metadata": { "id": "rirMSM2kvgJF" }, "source": [ "Now let's run the model with ONNX." ] }, { "cell_type": "code", "metadata": { "id": "6MBraENcu8Oz", "colab": { "base_uri": "https://localhost:8080/" }, "outputId": "ba4528d2-6d6a-4181-e9c4-3d2a98d6663a" }, "source": [ "from sklearn.metrics.pairwise import cosine_similarity\n", "\n", "options = SessionOptions()\n", "session = InferenceSession(embeddings, options)\n", "\n", "tokens = tokenizer([\"I am happy\", \"I am glad\"], return_tensors=\"np\")\n", "\n", "outputs = session.run(None, dict(tokens))[0]\n", "\n", "print(cosine_similarity(outputs))" ], "execution_count": 31, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "[[0.99999994 0.8474637 ]\n", " [0.8474637 0.9999997 ]]\n" ] } ] }, { "cell_type": "markdown", "metadata": { "id": "pwgU4vu8vk0T" }, "source": [ "The code above tokenizes two separate text snippets (\"I am happy\" and \"I am glad\") and runs it through the ONNX model. \n", "\n", "This outputs two embeddings arrays and those arrays are compared using cosine similarity. As we can see, the two text snippets have close semantic meaning." ] }, { "cell_type": "markdown", "metadata": { "id": "t_OQaQeIb7UB" }, "source": [ "# Load an ONNX model with txtai\n", "\n", "txtai has built-in support for ONNX models. Loading an ONNX model is seamless and Embeddings and Pipelines support it. The following section shows how to load a classification pipeline and embeddings model backed by ONNX." ] }, { "cell_type": "code", "metadata": { "id": "vhsFzCRBby-h", "colab": { "base_uri": "https://localhost:8080/" }, "outputId": "745d69b0-035b-4e44-a881-57f9ece171ab" }, "source": [ "from txtai.embeddings import Embeddings\n", "from txtai.pipeline import Labels\n", "\n", "labels = Labels((\"text-classify.onnx\", \"google/electra-base-discriminator\"), dynamic=False)\n", "print(labels([\"I am happy\", \"I am mad\"]))\n", "\n", "embeddings = Embeddings({\"path\": \"embeddings.onnx\", \"tokenizer\": \"sentence-transformers/paraphrase-MiniLM-L6-v2\"})\n", "print(embeddings.similarity(\"I am happy\", [\"I am glad\"]))" ], "execution_count": 32, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "[[(1, 0.999687910079956), (0, 0.0003121310146525502)], [(0, 0.9991233944892883), (1, 0.0008765518432483077)]]\n", "[(0, 0.8298245072364807)]\n" ] } ] }, { "cell_type": "markdown", "metadata": { "id": "Xx8G29hkwdNY" }, "source": [ "# JavaScript\n", "\n", "So far, we've exported models to ONNX and run them through Python. This already has a lot of advantages, which include fast inference times, quantization and less software dependencies. But ONNX really shines when we run a model trained in Python in other languages/platforms.\n", "\n", "Let's try running the models trained above in JavaScript. First step is getting the Node.js environment and dependencies setup.\n" ] }, { "cell_type": "code", "metadata": { "id": "_RK79O9c4Z_y" }, "source": [ "%%capture\n", "import os\n", "\n", "os.chdir(\"/content\")\n", "!mkdir js\n", "os.chdir(\"/content/js\")\n", "\n", "# Copy ONNX models\n", "!cp ../text-classify.onnx .\n", "!cp ../embeddings.onnx .\n", "\n", "# Get tokenizers project\n", "!git clone https://github.com/huggingface/tokenizers.git\n", "\n", "os.chdir(\"/content/js/tokenizers/bindings/node\")\n", "\n", "# Install Rust to compile tokenizer bindings\n", "!apt-get install rustc cargo\n", "\n", "# Build tokenizers package locally as binary version on npm doesn't work for latest version of Node.js\n", "!npm install --also=dev\n", "!npm run dev\n", "\n", "os.chdir(\"/content/js\")" ], "execution_count": 33, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "0HtVEl74xrZ7", "colab": { "base_uri": "https://localhost:8080/" }, "outputId": "7ffea868-6dd7-4603-e04c-ce8d4c557ff6" }, "source": [ "%%writefile package.json\n", "{\n", " \"name\": \"onnx-test\",\n", " \"private\": true,\n", " \"version\": \"1.0.0\",\n", " \"description\": \"ONNX Runtime Node.js test\",\n", " \"main\": \"index.js\",\n", " \"dependencies\": {\n", " \"onnxruntime-node\": \">=1.12.1\",\n", " \"tokenizers\": \"file:tokenizers/bindings/node\"\n", " }\n", "}" ], "execution_count": 34, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Writing package.json\n" ] } ] }, { "cell_type": "code", "source": [ "%%capture\n", "\n", "# Install all dependencies\n", "!npm install" ], "metadata": { "id": "4naPtk-iBI-g" }, "execution_count": 35, "outputs": [] }, { "cell_type": "markdown", "metadata": { "id": "At85iA8U63iV" }, "source": [ "Next we'll write the inference code in JavaScript to an index.js file." ] }, { "cell_type": "code", "metadata": { "id": "RImohEnFyFg0", "colab": { "base_uri": "https://localhost:8080/" }, "outputId": "094ef937-0650-4d5b-f4e3-79f114bd9807", "cellView": "form" }, "source": [ "#@title\n", "%%writefile index.js\n", "const ort = require('onnxruntime-node');\n", "const { promisify } = require('util');\n", "const { Tokenizer } = require(\"tokenizers/dist/bindings/tokenizer\");\n", "\n", "function sigmoid(data) {\n", " return data.map(x => 1 / (1 + Math.exp(-x)))\n", "}\n", "\n", "function softmax(data) { \n", " return data.map(x => Math.exp(x) / (data.map(y => Math.exp(y))).reduce((a,b) => a+b)) \n", "}\n", "\n", "function similarity(v1, v2) {\n", " let dot = 0.0;\n", " let norm1 = 0.0;\n", " let norm2 = 0.0;\n", "\n", " for (let x = 0; x < v1.length; x++) {\n", " dot += v1[x] * v2[x];\n", " norm1 += Math.pow(v1[x], 2);\n", " norm2 += Math.pow(v2[x], 2);\n", " }\n", "\n", " return dot / (Math.sqrt(norm1) * Math.sqrt(norm2));\n", "}\n", "\n", "function tokenizer() {\n", " let tokenizer = Tokenizer.fromPretrained(\"bert-base-uncased\");\n", " return promisify(tokenizer.encode.bind(tokenizer));\n", "}\n", "\n", "async function predict(session, text) {\n", " try {\n", " // Tokenize input\n", " let encode = tokenizer();\n", " let output = await encode(text);\n", "\n", " let ids = output.getIds().map(x => BigInt(x))\n", " let mask = output.getAttentionMask().map(x => BigInt(x))\n", " let tids = output.getTypeIds().map(x => BigInt(x))\n", "\n", " // Convert inputs to tensors \n", " let tensorIds = new ort.Tensor('int64', BigInt64Array.from(ids), [1, ids.length]);\n", " let tensorMask = new ort.Tensor('int64', BigInt64Array.from(mask), [1, mask.length]);\n", " let tensorTids = new ort.Tensor('int64', BigInt64Array.from(tids), [1, tids.length]);\n", "\n", " let inputs = null;\n", " if (session.inputNames.length > 2) {\n", " inputs = { input_ids: tensorIds, attention_mask: tensorMask, token_type_ids: tensorTids};\n", " }\n", " else {\n", " inputs = { input_ids: tensorIds, attention_mask: tensorMask};\n", " }\n", "\n", " return await session.run(inputs);\n", " } catch (e) {\n", " console.error(`failed to inference ONNX model: ${e}.`);\n", " }\n", "}\n", "\n", "async function main() {\n", " let args = process.argv.slice(2);\n", " if (args.length > 1) {\n", " // Run sentence embeddings\n", " const session = await ort.InferenceSession.create('./embeddings.onnx');\n", "\n", " let v1 = await predict(session, args[0]);\n", " let v2 = await predict(session, args[1]);\n", "\n", " // Unpack results\n", " v1 = v1.embeddings.data;\n", " v2 = v2.embeddings.data;\n", "\n", " // Print similarity\n", " console.log(similarity(Array.from(v1), Array.from(v2)));\n", " }\n", " else {\n", " // Run text classifier\n", " const session = await ort.InferenceSession.create('./text-classify.onnx');\n", " let results = await predict(session, args[0]);\n", "\n", " // Normalize results using softmax and print\n", " console.log(softmax(results.logits.data));\n", " }\n", "}\n", "\n", "main();" ], "execution_count": 36, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Writing index.js\n" ] } ] }, { "cell_type": "markdown", "metadata": { "id": "rZI9PJzi6_bO" }, "source": [ "## Run Text Classification in JavaScript with ONNX" ] }, { "cell_type": "code", "metadata": { "id": "bdz68KZT1Jfm", "colab": { "base_uri": "https://localhost:8080/" }, "outputId": "48c4a427-3108-436c-eac9-564835ea061c" }, "source": [ "!node . \"I am happy\"\n", "!node . \"I am mad\"" ], "execution_count": 37, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Float32Array(2) [ \u001b[33m0.0003121308400295675\u001b[39m, \u001b[33m0.9996878504753113\u001b[39m ]\n", "Float32Array(2) [ \u001b[33m0.9991234540939331\u001b[39m, \u001b[33m0.0008765519596636295\u001b[39m ]\n" ] } ] }, { "cell_type": "markdown", "metadata": { "id": "swSEmqto33VP" }, "source": [ "First off, have to say this is 🔥🔥🔥! Just amazing that this model can be fully run in JavaScript. It's a great time to be in NLP!\n", "\n", "The steps above installed a JavaScript environment with dependencies to run ONNX and tokenize data in JavaScript. The text classification model previously created is loaded into the JavaScript ONNX runtime and inference is run.\n", "\n", "As a reminder, the text classification model is judging sentiment using two labels, 0 for negative to 1 for positive. The results above shows the probability of each label per text snippet." ] }, { "cell_type": "markdown", "metadata": { "id": "5Az9YaDc6u9P" }, "source": [ "## Build sentence embeddings and compare similarity in JavaScript with ONNX" ] }, { "cell_type": "code", "metadata": { "id": "10jcUbUx6MAI", "colab": { "base_uri": "https://localhost:8080/" }, "outputId": "ac751cee-5f44-4dad-c164-5d124be75ec3" }, "source": [ "!node . \"I am happy\", \"I am glad\"" ], "execution_count": 38, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "\u001b[33m0.8285076844387538\u001b[39m\n" ] } ] }, { "cell_type": "markdown", "metadata": { "id": "8Jyk-9Ko78Ma" }, "source": [ "Once again....wow!! The sentence embeddings model produces vectors that can be used to compare semantic similarity, -1 being most dissimilar and 1 being most similar.\n", "\n", "While the results don't match the exported model exactly, it's very close. Worth mentioning again that this is 100% JavaScript, no API or remote calls, all within node." ] }, { "cell_type": "markdown", "metadata": { "id": "BQeMBNWO9Hpr" }, "source": [ "# Java\n", "\n", "Let's try the same thing with Java. The following sections initialize a Java build environment and writes out the code necessary to run the ONNX inference." ] }, { "cell_type": "code", "source": [ "%%capture\n", "import os\n", "\n", "os.chdir(\"/content\")\n", "!mkdir java\n", "os.chdir(\"/content/java\")\n", "\n", "# Copy ONNX models\n", "!cp ../text-classify.onnx .\n", "!cp ../embeddings.onnx .\n", "\n", "# Save copy of Bert Tokenizer\n", "tokenizer.save_pretrained(\"bert\")\n", "\n", "!mkdir -p src/main/java\n", "\n", "# Install gradle\n", "!wget https://services.gradle.org/distributions/gradle-7.5.1-bin.zip\n", "!unzip -o gradle-7.5.1-bin.zip" ], "metadata": { "id": "L1YMoO7WwkEk" }, "execution_count": 39, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "gjZ2p7Jf9mOV", "colab": { "base_uri": "https://localhost:8080/" }, "outputId": "0cc3f4ef-bac6-4c13-dc6b-8482353bb741" }, "source": [ "%%writefile build.gradle\n", "apply plugin: \"java\"\n", "\n", "repositories {\n", " mavenCentral()\n", "}\n", "\n", "dependencies {\n", " implementation \"com.robrua.nlp:easy-bert:1.0.3\"\n", " implementation \"com.microsoft.onnxruntime:onnxruntime:1.12.1\"\n", "}\n", "\n", "java {\n", " toolchain {\n", " languageVersion = JavaLanguageVersion.of(8)\n", " }\n", "}\n", "\n", "jar {\n", " archiveBaseName = \"onnxjava\"\n", "}\n", "\n", "task onnx(type: JavaExec) {\n", " description = \"Runs ONNX demo\"\n", " classpath = sourceSets.main.runtimeClasspath\n", " main = \"OnnxDemo\"\n", "}" ], "execution_count": 40, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Writing build.gradle\n" ] } ] }, { "cell_type": "code", "metadata": { "id": "9wlVWVky9NZ3" }, "source": [ "%%capture\n", "\n", "# Create environment\n", "!gradle-7.5.1/bin/gradle wrapper" ], "execution_count": 41, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "vnxKGSuz_fnj", "colab": { "base_uri": "https://localhost:8080/" }, "outputId": "c1b73bf8-c5f4-46fa-df90-29bd60507287", "cellView": "form" }, "source": [ "#@title\n", "%%writefile src/main/java/OnnxDemo.java\n", "import java.io.File;\n", "\n", "import java.nio.LongBuffer;\n", "\n", "import java.util.Arrays;\n", "import java.util.ArrayList;\n", "import java.util.HashMap;\n", "import java.util.List;\n", "import java.util.Map;\n", "\n", "import ai.onnxruntime.OnnxTensor;\n", "import ai.onnxruntime.OrtEnvironment;\n", "import ai.onnxruntime.OrtSession;\n", "import ai.onnxruntime.OrtSession.Result;\n", "\n", "import com.robrua.nlp.bert.FullTokenizer;\n", "\n", "class Tokens {\n", " public long[] ids;\n", " public long[] mask;\n", " public long[] types;\n", "}\n", "\n", "class Tokenizer {\n", " private FullTokenizer tokenizer;\n", "\n", " public Tokenizer(String path) {\n", " File vocab = new File(path);\n", " this.tokenizer = new FullTokenizer(vocab, true);\n", " }\n", "\n", " public Tokens tokenize(String text) {\n", " // Build list of tokens\n", " List tokensList = new ArrayList();\n", " tokensList.add(\"[CLS]\"); \n", " tokensList.addAll(Arrays.asList(tokenizer.tokenize(text)));\n", " tokensList.add(\"[SEP]\");\n", "\n", " int[] ids = tokenizer.convert(tokensList.toArray(new String[0]));\n", "\n", " Tokens tokens = new Tokens();\n", "\n", " // input ids \n", " tokens.ids = Arrays.stream(ids).mapToLong(i -> i).toArray();\n", "\n", " // attention mask\n", " tokens.mask = new long[ids.length];\n", " Arrays.fill(tokens.mask, 1);\n", "\n", " // token type ids\n", " tokens.types = new long[ids.length];\n", " Arrays.fill(tokens.types, 0);\n", "\n", " return tokens;\n", " }\n", "}\n", "\n", "class Inference {\n", " private Tokenizer tokenizer;\n", " private OrtEnvironment env;\n", " private OrtSession session;\n", "\n", " public Inference(String model) throws Exception {\n", " this.tokenizer = new Tokenizer(\"bert/vocab.txt\");\n", " this.env = OrtEnvironment.getEnvironment();\n", " this.session = env.createSession(model, new OrtSession.SessionOptions());\n", " }\n", "\n", " public float[][] predict(String text) throws Exception {\n", " Tokens tokens = this.tokenizer.tokenize(text);\n", "\n", " Map inputs = new HashMap();\n", " inputs.put(\"input_ids\", OnnxTensor.createTensor(env, LongBuffer.wrap(tokens.ids), new long[]{1, tokens.ids.length}));\n", " inputs.put(\"attention_mask\", OnnxTensor.createTensor(env, LongBuffer.wrap(tokens.mask), new long[]{1, tokens.mask.length}));\n", " inputs.put(\"token_type_ids\", OnnxTensor.createTensor(env, LongBuffer.wrap(tokens.types), new long[]{1, tokens.types.length}));\n", "\n", " return (float[][])session.run(inputs).get(0).getValue();\n", " }\n", "}\n", "\n", "class Vectors {\n", " public static double similarity(float[] v1, float[] v2) {\n", " double dot = 0.0;\n", " double norm1 = 0.0;\n", " double norm2 = 0.0;\n", "\n", " for (int x = 0; x < v1.length; x++) {\n", " dot += v1[x] * v2[x];\n", " norm1 += Math.pow(v1[x], 2);\n", " norm2 += Math.pow(v2[x], 2);\n", " }\n", "\n", " return dot / (Math.sqrt(norm1) * Math.sqrt(norm2));\n", " }\n", "\n", " public static float[] softmax(float[] input) {\n", " double[] t = new double[input.length];\n", " double sum = 0.0;\n", "\n", " for (int x = 0; x < input.length; x++) {\n", " double val = Math.exp(input[x]);\n", " sum += val;\n", " t[x] = val;\n", " }\n", "\n", " float[] output = new float[input.length];\n", " for (int x = 0; x < output.length; x++) {\n", " output[x] = (float) (t[x] / sum);\n", " }\n", "\n", " return output;\n", " }\n", "}\n", "\n", "public class OnnxDemo {\n", " public static void main(String[] args) {\n", " try {\n", " if (args.length < 2) {\n", " Inference inference = new Inference(\"text-classify.onnx\");\n", "\n", " float[][] v1 = inference.predict(args[0]);\n", "\n", " System.out.println(Arrays.toString(Vectors.softmax(v1[0])));\n", " }\n", " else {\n", " Inference inference = new Inference(\"embeddings.onnx\");\n", " float[][] v1 = inference.predict(args[0]);\n", " float[][] v2 = inference.predict(args[1]);\n", "\n", " System.out.println(Vectors.similarity(v1[0], v2[0]));\n", " }\n", " }\n", " catch (Exception ex) {\n", " ex.printStackTrace();\n", " }\n", " }\n", "}" ], "execution_count": 42, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Writing src/main/java/OnnxDemo.java\n" ] } ] }, { "cell_type": "markdown", "metadata": { "id": "qQuuXw97Z_I7" }, "source": [ "## Run Text Classification in Java with ONNX" ] }, { "cell_type": "code", "metadata": { "id": "hFXyH96gAZpu", "colab": { "base_uri": "https://localhost:8080/" }, "outputId": "efd5b783-b23a-407c-8577-c18e3a6cb984" }, "source": [ "!./gradlew -q --console=plain onnx --args='\"I am happy\"' 2> /dev/null\n", "!./gradlew -q --console=plain onnx --args='\"I am mad\"' 2> /dev/null" ], "execution_count": 43, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "[3.1213084E-4, 0.99968785]\n", "\u001b[m[0.99912345, 8.7655196E-4]\n", "\u001b[m" ] } ] }, { "cell_type": "markdown", "metadata": { "id": "pE3FSsAAaJHe" }, "source": [ "The command above tokenizes the input and runs inference with a text classification model previously created using a Java ONNX inference session. \n", "\n", "As a reminder, the text classification model is judging sentiment using two labels, 0 for negative to 1 for positive. The results above shows the probability of each label per text snippet." ] }, { "cell_type": "markdown", "metadata": { "id": "Bux8v0C4aDyP" }, "source": [ "## Build sentence embeddings and compare similarity in Java with ONNX" ] }, { "cell_type": "code", "metadata": { "id": "f6zE9VrwCcUa", "colab": { "base_uri": "https://localhost:8080/" }, "outputId": "988e59d0-943f-45b6-d37e-5fc1ebbbcefe" }, "source": [ "!./gradlew -q --console=plain onnx --args='\"I am happy\" \"I am glad\"' 2> /dev/null" ], "execution_count": 44, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "0.8298244656285757\n", "\u001b[m" ] } ] }, { "cell_type": "markdown", "metadata": { "id": "0uepOZvJDOCB" }, "source": [ "The sentence embeddings model produces vectors that can be used to compare semantic similarity, -1 being most dissimilar and 1 being most similar. \n", "\n", "This is 100% Java, no API or remote calls, all within the JVM. Still think it's amazing!" ] }, { "cell_type": "markdown", "metadata": { "id": "faRu9EAJDUXw" }, "source": [ "# Rust\n", "\n", "Last but not least, let's try Rust. The following sections initialize a Rust build environment and writes out the code necessary to run the ONNX inference." ] }, { "cell_type": "code", "metadata": { "id": "X3Xp1KLhelqw" }, "source": [ "%%capture\n", "import os\n", "\n", "os.chdir(\"/content\")\n", "!mkdir rust\n", "os.chdir(\"/content/rust\")\n", "\n", "# Copy ONNX models\n", "!cp ../text-classify.onnx .\n", "!cp ../embeddings.onnx .\n", "\n", "# Install Rust\n", "!apt-get install rustc cargo\n", "\n", "!mkdir -p src" ], "execution_count": 45, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "c7hz--Gne6Oa", "colab": { "base_uri": "https://localhost:8080/" }, "outputId": "d98ad709-5675-4193-e598-e6cbe12edda3" }, "source": [ "%%writefile Cargo.toml\n", "[package]\n", "name = \"onnx-test\"\n", "version = \"1.0.0\"\n", "description = \"\"\"\n", "ONNX Runtime Rust test\n", "\"\"\"\n", "edition = \"2018\"\n", "\n", "[dependencies]\n", "onnxruntime = { version = \"0.0.14\"}\n", "tokenizers = { version = \"0.13.1\"}" ], "execution_count": 46, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Writing Cargo.toml\n" ] } ] }, { "cell_type": "code", "metadata": { "id": "_8fdRvO1fFBm", "colab": { "base_uri": "https://localhost:8080/" }, "outputId": "53168684-12c2-46fc-e0e6-54c4c6d03cb1", "cellView": "form" }, "source": [ "#@title\n", "%%writefile src/main.rs\n", "use onnxruntime::environment::Environment;\n", "use onnxruntime::GraphOptimizationLevel;\n", "use onnxruntime::ndarray::{Array2, Axis};\n", "use onnxruntime::tensor::OrtOwnedTensor;\n", "\n", "use std::env;\n", "\n", "use tokenizers::tokenizer::{Result, Tokenizer};\n", "\n", "fn tokenize(text: String, inputs: usize) -> Vec> {\n", " // Load tokenizer from HF Hub\n", " let tokenizer = Tokenizer::from_pretrained(\"bert-base-uncased\", None).unwrap();\n", "\n", " // Encode input text\n", " let encoding = tokenizer.encode(text, true).unwrap();\n", "\n", " let v1: Vec = encoding.get_ids().to_vec().into_iter().map(|x| x as i64).collect();\n", " let v2: Vec = encoding.get_attention_mask().to_vec().into_iter().map(|x| x as i64).collect();\n", " let v3: Vec = encoding.get_type_ids().to_vec().into_iter().map(|x| x as i64).collect();\n", "\n", " let ids = Array2::from_shape_vec((1, v1.len()), v1).unwrap();\n", " let mask = Array2::from_shape_vec((1, v2.len()), v2).unwrap();\n", " let tids = Array2::from_shape_vec((1, v3.len()), v3).unwrap();\n", "\n", " return if inputs > 2 { vec![ids, mask, tids] } else { vec![ids, mask] };\n", "}\n", "\n", "fn predict(text: String, softmax: bool) -> Vec {\n", " // Start onnx session\n", " let environment = Environment::builder()\n", " .with_name(\"test\")\n", " .build().unwrap();\n", "\n", " // Derive model path\n", " let model = if softmax { \"text-classify.onnx\" } else { \"embeddings.onnx\" };\n", "\n", " let mut session = environment\n", " .new_session_builder().unwrap()\n", " .with_optimization_level(GraphOptimizationLevel::Basic).unwrap()\n", " .with_number_threads(1).unwrap()\n", " .with_model_from_file(model).unwrap();\n", "\n", " let inputs = tokenize(text, session.inputs.len());\n", "\n", " // Run inference and print result\n", " let outputs: Vec> = session.run(inputs).unwrap();\n", " let output: &OrtOwnedTensor = &outputs[0];\n", "\n", " let probabilities: Vec;\n", " if softmax {\n", " probabilities = output\n", " .softmax(Axis(1))\n", " .iter()\n", " .copied()\n", " .collect::>();\n", " }\n", " else {\n", " probabilities= output\n", " .iter()\n", " .copied()\n", " .collect::>();\n", " }\n", "\n", " return probabilities;\n", "}\n", "\n", "fn similarity(v1: &Vec, v2: &Vec) -> f64 {\n", " let mut dot = 0.0;\n", " let mut norm1 = 0.0;\n", " let mut norm2 = 0.0;\n", "\n", " for x in 0..v1.len() {\n", " dot += v1[x] * v2[x];\n", " norm1 += v1[x].powf(2.0);\n", " norm2 += v2[x].powf(2.0);\n", " }\n", "\n", " return dot as f64 / (norm1.sqrt() * norm2.sqrt()) as f64\n", "}\n", "\n", "fn main() -> Result<()> {\n", " // Tokenize input string\n", " let args: Vec = env::args().collect();\n", "\n", " if args.len() <= 2 {\n", " let v1 = predict(args[1].to_string(), true);\n", " println!(\"{:?}\", v1);\n", " }\n", " else {\n", " let v1 = predict(args[1].to_string(), false);\n", " let v2 = predict(args[2].to_string(), false);\n", " println!(\"{:?}\", similarity(&v1, &v2));\n", " }\n", "\n", " Ok(())\n", "}" ], "execution_count": 47, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Writing src/main.rs\n" ] } ] }, { "cell_type": "markdown", "metadata": { "id": "OdfQFY-MiA-n" }, "source": [ "## Run Text Classification in Rust with ONNX" ] }, { "cell_type": "code", "metadata": { "id": "b0ymX4ftgWcT", "colab": { "base_uri": "https://localhost:8080/" }, "outputId": "84b42d3c-82d4-46fc-fb84-94967bd5330f" }, "source": [ "!cargo run \"I am happy\" 2> /dev/null\n", "!cargo run \"I am mad\" 2> /dev/null" ], "execution_count": 48, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "[0.00030939875, 0.9996906]\n", "[0.99912345, 0.0008765513]\n" ] } ] }, { "cell_type": "markdown", "metadata": { "id": "NKccz6bBiIgW" }, "source": [ "The command above tokenizes the input and runs inference with a text classification model previously created using a Rust ONNX inference session. \n", "\n", "As a reminder, the text classification model is judging sentiment using two labels, 0 for negative to 1 for positive. The results above shows the probability of each label per text snippet." ] }, { "cell_type": "markdown", "metadata": { "id": "1D1kN0yNiEg7" }, "source": [ "## Build sentence embeddings and compare similarity in Rust with ONNX" ] }, { "cell_type": "code", "metadata": { "id": "A9p6F_ODhenH", "colab": { "base_uri": "https://localhost:8080/" }, "outputId": "b43ad47e-e1f3-4748-d854-a0dc2024b780" }, "source": [ "!cargo run \"I am happy\" \"I am glad\" 2> /dev/null" ], "execution_count": 49, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "0.8298246060854143\n" ] } ] }, { "cell_type": "markdown", "metadata": { "id": "TQ7Wvn0OiRr4" }, "source": [ "The sentence embeddings model produces vectors that can be used to compare semantic similarity, -1 being most dissimilar and 1 being most similar. \n", "\n", "Once again, this is 100% Rust, no API or remote calls. And yes, still think it's amazing!" ] }, { "cell_type": "markdown", "metadata": { "id": "-_FNKUWtjLsO" }, "source": [ "# Wrapping up\n", "\n", "This notebook covered how to export models to ONNX using txtai. These models were then run in Python, JavaScript, Java and Rust. Golang was also evaluated but there doesn't currently appear to be a stable enough ONNX runtime available. \n", "\n", "This method provides a way to train and run machine learning models using a number of programming languages on a number of platforms.\n", "\n", "The following is a non-exhaustive list of use cases. \n", "\n", "* Build locally executed models for mobile/edge devices\n", "* Run models with Java/JavaScript/Rust development stacks when teams prefer not to add Python to the mix\n", "* Export models to ONNX for Python inference to improve CPU performance and/or reduce number of software dependencies" ] } ] } ================================================ FILE: examples/19_Train_a_QA_model.ipynb ================================================ { "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "provenance": [] }, "kernelspec": { "name": "python3", "display_name": "Python 3" }, "accelerator": "GPU" }, "cells": [ { "cell_type": "markdown", "metadata": { "id": "4Pjmz-RORV8E" }, "source": [ "# Train a QA model\n", "\n", "The [Hugging Face Model Hub](https://huggingface.co/models) has a wide range of models that can handle many tasks. While these models perform well, the best performance is often found when fine-tuning a model with task-specific data. \n", "\n", "Hugging Face provides a [number of full-featured examples](https://github.com/huggingface/transformers/tree/master/examples) to assist with training task-specific models. When building models from the command line, these scripts are a great way to get started.\n", "\n", "txtai provides a training pipeline that can be used to train new models programatically using the Transformers Trainer framework.\n", "\n", "This example trains a small QA model and then further fine-tunes it with a couple new examples (few-shot learning)." ] }, { "cell_type": "markdown", "metadata": { "id": "Dk31rbYjSTYm" }, "source": [ "# Install dependencies\n", "\n", "Install `txtai` and all dependencies." ] }, { "cell_type": "code", "metadata": { "id": "XMQuuun2R06J" }, "source": [ "%%capture\n", "!pip install git+https://github.com/neuml/txtai#egg=txtai[pipeline-train] datasets pandas" ], "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "metadata": { "id": "r6nmtieHdMfr" }, "source": [ "# Train a SQuAD 2.0 Model\n", "\n", "The first step is training a SQuAD 2.0 model. SQuAD is a question-answer dataset that poses a question with a context along with the identified answer. It's also possible to not have an answer. See the [SQuAD dataset website](https://rajpurkar.github.io/SQuAD-explorer/) for more information.\n", "\n", "We'll use a tiny Bert model with a portion of SQuAD 2.0 for efficiency purposes." ] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 297 }, "id": "pg9-tUxEdRfk", "outputId": "06195c45-4b39-46e5-a462-af566f437ade" }, "source": [ "from datasets import load_dataset\n", "from txtai.pipeline import HFTrainer\n", "\n", "ds = load_dataset(\"squad_v2\")\n", "\n", "trainer = HFTrainer()\n", "trainer(\"google/bert_uncased_L-2_H-128_A-2\", ds[\"train\"].select(range(3000)), task=\"question-answering\", output_dir=\"bert-tiny-squadv2\")\n", "print(\"Training complete\")" ], "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stderr", "text": [ "Reusing dataset squad_v2 (/root/.cache/huggingface/datasets/squad_v2/squad_v2/2.0.0/09187c73c1b837c95d9a249cd97c2c3f1cebada06efe667b4427714b27639b1d)\n", "Loading cached processed dataset at /root/.cache/huggingface/datasets/squad_v2/squad_v2/2.0.0/09187c73c1b837c95d9a249cd97c2c3f1cebada06efe667b4427714b27639b1d/cache-73bbe029cf3366fc.arrow\n", "Some weights of the model checkpoint at google/bert_uncased_L-2_H-128_A-2 were not used when initializing BertForQuestionAnswering: ['cls.predictions.decoder.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight']\n", "- This IS expected if you are initializing BertForQuestionAnswering from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n", "- This IS NOT expected if you are initializing BertForQuestionAnswering from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n", "Some weights of BertForQuestionAnswering were not initialized from the model checkpoint at google/bert_uncased_L-2_H-128_A-2 and are newly initialized: ['qa_outputs.bias', 'qa_outputs.weight']\n", "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n" ] }, { "output_type": "display_data", "data": { "text/html": [ "\n", "

\n", " \n", " \n", " [1131/1131 00:50, Epoch 3/3]\n", "
\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
StepTraining Loss
5004.501800
10003.875900

" ], "text/plain": [ "" ] }, "metadata": {} }, { "output_type": "stream", "name": "stdout", "text": [ "Training complete\n" ] } ] }, { "cell_type": "markdown", "metadata": { "id": "zZtHxNSwFNGC" }, "source": [ "# Fine-tune with new data\n", "\n", "Next we'll add a few additional examples. Fine-tuning a QA model will help with framing a certain type of question or improve performance for a specific use-case. \n", "\n", "For smaller models with a narrow use case, this helps the model zero in on the types of questions that are to be asked. In this case, we want to tell the model exactly the types of information we're looking for when asking for ingredients. This will help improve confidence in the answers the model is generating.\n" ] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 75 }, "id": "JBeScS5dFNeW", "outputId": "13596524-b476-4f55-f430-d25eeda9301f" }, "source": [ "# Training data\n", "data = [\n", " {\"question\": \"What ingredient?\", \"context\": \"Pour 1 can whole tomatoes\", \"answers\": \"tomatoes\"},\n", " {\"question\": \"What ingredient?\", \"context\": \"Dice 1 yellow onion\", \"answers\": \"onion\"},\n", " {\"question\": \"What ingredient?\", \"context\": \"Cut 1 red pepper\", \"answers\": \"pepper\"},\n", " {\"question\": \"What ingredient?\", \"context\": \"Peel and dice 1 clove garlic\", \"answers\": \"garlic\"},\n", " {\"question\": \"What ingredient?\", \"context\": \"Put 1/2 lb beef\", \"answers\": \"beef\"},\n", "]\n", "\n", "model, tokenizer = trainer(\"bert-tiny-squadv2\", data, task=\"question-answering\", num_train_epochs=10)" ], "execution_count": null, "outputs": [ { "output_type": "display_data", "data": { "text/html": [ "\n", "

\n", " \n", " \n", " [10/10 00:00, Epoch 10/10]\n", "
\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
StepTraining Loss

" ], "text/plain": [ "" ] }, "metadata": {} } ] }, { "cell_type": "markdown", "metadata": { "id": "V7nAl3WtkBNK" }, "source": [ "# Test the model\n", "\n", "Now we're ready to test the results! The following sections run a question against the original model only trained with SQuAD 2.0 and the further fine-tuned model." ] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "46fMiJrAIBu4", "outputId": "fb92ca0a-d433-486f-b61c-054f4e4a9b36" }, "source": [ "from transformers import pipeline\n", "\n", "questions = pipeline(\"question-answering\", model=\"bert-tiny-squadv2\")\n", "questions(\"What ingredient?\", \"Peel and dice 1 shallot\")" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "{'answer': 'dice 1 shallot',\n", " 'end': 23,\n", " 'score': 0.05128436163067818,\n", " 'start': 9}" ] }, "metadata": {}, "execution_count": 57 } ] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "nWQMRQm0NwdN", "outputId": "a1f15b5c-1cf8-4fe5-daa8-e19adc1700e1" }, "source": [ "from transformers import pipeline\n", "\n", "questions = pipeline(\"question-answering\", model=model.to(\"cpu\"), tokenizer=tokenizer)\n", "questions(\"What ingredient?\", \"Peel and dice 1 shallot\")" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "{'answer': 'shallot', 'end': 23, 'score': 0.13187439739704132, 'start': 16}" ] }, "metadata": {}, "execution_count": 58 } ] }, { "cell_type": "markdown", "metadata": { "id": "wJoYksLbkZTJ" }, "source": [ "See how the results are more confident and have a better answer. This method allows using a smaller model with a narrow set of functionality with the upside of increased speed. Give it a try with your own data!" ] } ] } ================================================ FILE: examples/20_Extractive_QA_to_build_structured_data.ipynb ================================================ { "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "provenance": [] }, "kernelspec": { "name": "python3", "display_name": "Python 3" } }, "cells": [ { "cell_type": "markdown", "metadata": { "id": "vwELCooy4ljr" }, "source": [ "# Extractive QA to build structured data\n", "\n", "Traditional ETL/data parsing systems establish rules to extract information of interest. Regular expressions, string parsing and similar methods define fixed rules. This works in many cases but what if you are working with unstructured data containing numerous variations? The rules can be cumbersome and hard to maintain over time.\n", "\n", "This notebook uses machine learning and extractive question-answering (QA) to utilize the vast knowledge built into large language models. These models have been trained on extremely large datasets, learning the many variations of natural language. " ] }, { "cell_type": "markdown", "metadata": { "id": "ew7orE2O441o" }, "source": [ "# Install dependencies\n", "\n", "Install `txtai` and all dependencies." ] }, { "cell_type": "code", "metadata": { "id": "LPQTb25tASIG" }, "source": [ "%%capture\n", "!pip install git+https://github.com/neuml/txtai#egg=txtai[pipeline-train]" ], "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "metadata": { "id": "_YnqorRKAbLu" }, "source": [ "# Train a QA model with few-shot learning\n", "\n", "The code below trains a new QA model using a few examples. These examples gives the model hints on the type of questions that will be asked and the type of answers to look for. It doesn't take a lot of examples to do this as shown below." ] }, { "cell_type": "code", "metadata": { "id": "OUc9gqTyAYnm", "colab": { "base_uri": "https://localhost:8080/", "height": 75 }, "outputId": "7e7f93c7-5ad5-46a6-d04d-c7450d246f6c" }, "source": [ "import pandas as pd\n", "from txtai.pipeline import HFTrainer, Questions, Labels\n", "\n", "# Training data for few-shot learning\n", "data = [\n", " {\"question\": \"What is the url?\",\n", " \"context\": \"Faiss (https://github.com/facebookresearch/faiss) is a library for efficient similarity search.\",\n", " \"answers\": \"https://github.com/facebookresearch/faiss\"},\n", " {\"question\": \"What is the url\", \"context\": \"The last release was Wed Sept 25 2021\", \"answers\": None},\n", " {\"question\": \"What is the date?\", \"context\": \"The last release was Wed Sept 25 2021\", \"answers\": \"Wed Sept 25 2021\"},\n", " {\"question\": \"What is the date?\", \"context\": \"The order total comes to $44.33\", \"answers\": None},\n", " {\"question\": \"What is the amount?\", \"context\": \"The order total comes to $44.33\", \"answers\": \"$44.33\"},\n", " {\"question\": \"What is the amount?\", \"context\": \"The last release was Wed Sept 25 2021\", \"answers\": None},\n", "]\n", "\n", "# Fine-tune QA model\n", "trainer = HFTrainer()\n", "model, tokenizer = trainer(\"distilbert-base-cased-distilled-squad\", data, task=\"question-answering\")" ], "execution_count": null, "outputs": [ { "output_type": "display_data", "data": { "text/html": [ "\n", "

\n", " \n", " \n", " [3/3 00:03, Epoch 3/3]\n", "
\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
StepTraining Loss

" ], "text/plain": [ "" ] }, "metadata": {} } ] }, { "cell_type": "markdown", "metadata": { "id": "1hzPnmrTaUjH" }, "source": [ "# Parse data into a structured table\n", "\n", "The next section takes a series of rows of text and runs a set of questions against each row. The answers are then used to build a pandas DataFrame." ] }, { "cell_type": "code", "metadata": { "id": "4X5z3UjnAGe7", "colab": { "base_uri": "https://localhost:8080/", "height": 206 }, "outputId": "41015023-c7b7-4515-ad30-f1b2d8143ea2" }, "source": [ "# Input data\n", "context = [\"Released on 6/03/2021\",\n", " \"Release delayed until the 11th of August\",\n", " \"Documentation can be found here: neuml.github.io/txtai\",\n", " \"The stock price fell to three dollars\",\n", " \"Great day: closing price for March 23rd is $33.11, for details - https://finance.google.com\"]\n", "\n", "# Define column queries\n", "queries = [\"What is the url?\", \"What is the date?\", \"What is the amount?\"]\n", "\n", "# Extract fields\n", "questions = Questions(path=(model, tokenizer), gpu=True)\n", "results = [questions([question] * len(context), context) for question in queries]\n", "results.append(context)\n", "\n", "# Load into DataFrame\n", "pd.DataFrame(list(zip(*results)), columns=[\"URL\", \"Date\", \"Amount\", \"Text\"])" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/html": [ "\n", "

\n", "
\n", "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
URLDateAmountText
0None6/03/2021NoneReleased on 6/03/2021
1None11th of AugustNoneRelease delayed until the 11th of August
2neuml.github.io/txtaiNoneNoneDocumentation can be found here: neuml.github....
3NoneNonethree dollarsThe stock price fell to three dollars
4https://finance.google.comMarch 23rd$33.11Great day: closing price for March 23rd is $33...
\n", "
\n", " \n", " \n", " \n", "\n", " \n", "
\n", "
\n", " " ], "text/plain": [ " URL ... Text\n", "0 None ... Released on 6/03/2021\n", "1 None ... Release delayed until the 11th of August\n", "2 neuml.github.io/txtai ... Documentation can be found here: neuml.github....\n", "3 None ... The stock price fell to three dollars\n", "4 https://finance.google.com ... Great day: closing price for March 23rd is $33...\n", "\n", "[5 rows x 4 columns]" ] }, "metadata": {}, "execution_count": 7 } ] }, { "cell_type": "markdown", "metadata": { "id": "mY1Le-pve5yi" }, "source": [ "# Add additional columns\n", "\n", "This method can be combined with other models to categorize, group or otherwise derive additional columns. The code below derives an additional sentiment column." ] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 479 }, "id": "0kyJUcrKe43a", "outputId": "cea0d481-3315-4ec8-a7f9-8a9c4942db10" }, "source": [ "# Add sentiment\n", "labels = Labels(path=\"distilbert-base-uncased-finetuned-sst-2-english\", dynamic=False)\n", "labels = [\"POSITIVE\" if x[0][0] == 1 else \"NEGATIVE\" for x in labels(context)]\n", "results.insert(len(results) - 1, labels)\n", "\n", "# Load into DataFrame\n", "pd.DataFrame(list(zip(*results)), columns=[\"URL\", \"Date\", \"Amount\", \"Sentiment\", \"Text\"])" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/html": [ "\n", "
\n", "
\n", "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
URLDateAmountSentimentText
0None6/03/2021NonePOSITIVEReleased on 6/03/2021
1None11th of AugustNoneNEGATIVERelease delayed until the 11th of August
2neuml.github.io/txtaiNoneNoneNEGATIVEDocumentation can be found here: neuml.github....
3NoneNonethree dollarsNEGATIVEThe stock price fell to three dollars
4https://finance.google.comMarch 23rd$33.11POSITIVEGreat day: closing price for March 23rd is $33...
\n", "
\n", " \n", " \n", " \n", "\n", " \n", "
\n", "
\n", " " ], "text/plain": [ " URL ... Text\n", "0 None ... Released on 6/03/2021\n", "1 None ... Release delayed until the 11th of August\n", "2 neuml.github.io/txtai ... Documentation can be found here: neuml.github....\n", "3 None ... The stock price fell to three dollars\n", "4 https://finance.google.com ... Great day: closing price for March 23rd is $33...\n", "\n", "[5 rows x 5 columns]" ] }, "metadata": {}, "execution_count": 8 } ] } ] } ================================================ FILE: examples/21_Export_and_run_other_machine_learning_models.ipynb ================================================ { "nbformat": 4, "nbformat_minor": 0, "metadata": { "accelerator": "GPU", "colab": { "name": "21 - Export and run other machine learning models", "provenance": [], "collapsed_sections": [] }, "kernelspec": { "display_name": "Python 3", "name": "python3" } }, "cells": [ { "cell_type": "markdown", "metadata": { "id": "4Pjmz-RORV8E" }, "source": [ "# Export and run other machine learning models\n", "\n", "txtai primarily has support for [Hugging Face Transformers](https://github.com/huggingface/transformers) and [ONNX](https://github.com/microsoft/onnxruntime) models. This enables txtai to hook into the rich model framework available in Python, export this functionality via the API to other languages (JavaScript, Java, Go, Rust) and even export and natively load models with ONNX.\n", "\n", "What about other machine learning frameworks? Say we have an existing TF-IDF + Logistic Regression model that has been well tuned. Can this model be exported to ONNX and used in txtai for labeling and similarity queries? Or what about a simple PyTorch text classifier? Yes, both of these can be done!\n", "\n", "With the [onnxmltools](https://github.com/onnx/onnxmltools) library, traditional models from [scikit-learn](https://scikit-learn.org/stable/), [XGBoost](https://xgboost.readthedocs.io/en/latest/) and others can be exported to ONNX and loaded with txtai. Additionally, Hugging Face's trainer module can train generic PyTorch modules. This notebook will walk through all these examples.\n", "\n" ] }, { "cell_type": "markdown", "metadata": { "id": "Dk31rbYjSTYm" }, "source": [ "# Install dependencies\n", "\n", "Install `txtai` and all dependencies." ] }, { "cell_type": "code", "metadata": { "id": "XMQuuun2R06J" }, "source": [ "%%capture\n", "!pip install git+https://github.com/neuml/txtai#egg=txtai[pipeline,similarity] datasets" ], "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "metadata": { "id": "r6nmtieHdMfr" }, "source": [ "# Train a TF-IDF + Logistic Regression model\n", "\n", "For this example, we'll load the emotion dataset from Hugging Face datasets and build a TF-IDF + Logistic Regression model with scikit-learn.\n", "\n", "The emotion dataset has the following labels:\n", "\n", "- sadness (0)\n", "- joy (1)\n", "- love (2)\n", "- anger (3)\n", "- fear (4)\n", "- surprise (5)\n" ] }, { "cell_type": "code", "metadata": { "id": "pg9-tUxEdRfk" }, "source": [ "from datasets import load_dataset\n", "\n", "from sklearn.feature_extraction.text import TfidfVectorizer\n", "from sklearn.linear_model import LogisticRegression\n", "from sklearn.pipeline import Pipeline\n", "\n", "ds = load_dataset(\"emotion\")\n", "\n", "# Train the model\n", "pipeline = Pipeline([\n", " ('tfidf', TfidfVectorizer()),\n", " ('lr', LogisticRegression(max_iter=250))\n", "])\n", "\n", "pipeline.fit(ds[\"train\"][\"text\"], ds[\"train\"][\"label\"])\n", "\n", "# Determine accuracy on validation set\n", "results = pipeline.predict(ds[\"validation\"][\"text\"])\n", "labels = ds[\"validation\"][\"label\"]\n", "\n", "results = [results[x] == label for x, label in enumerate(labels)]\n", "print(\"Accuracy =\", sum(results) / len(ds[\"validation\"]))" ], "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stderr", "text": [ "Using custom data configuration default\n", "Reusing dataset emotion (/root/.cache/huggingface/datasets/emotion/default/0.0.0/348f63ca8e27b3713b6c04d723efe6d824a56fb3d1449794716c0f0296072705)\n" ] }, { "output_type": "stream", "name": "stdout", "text": [ "Accuracy = 0.8595\n" ] } ] }, { "cell_type": "markdown", "metadata": { "id": "49jZD4jQgdBg" }, "source": [ "86% accuracy - not too bad! While we all get caught up in deep learning and advanced methods, good ole TF-IDF + Logistic Regression is still a solid performer and runs much faster. If that level of accuracy works, no reason to overcomplicate things." ] }, { "cell_type": "markdown", "metadata": { "id": "zZtHxNSwFNGC" }, "source": [ "# Export and load with txtai\n", "\n", "The next section exports this model to ONNX and shows how the model can be used for similarity queries. " ] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "JBeScS5dFNeW", "outputId": "e1b5cbf4-87dd-4598-e7ee-14e36cf31a7c" }, "source": [ "from txtai.pipeline import Labels, MLOnnx, Similarity\n", "\n", "def tokenize(inputs, **kwargs):\n", " if isinstance(inputs, str):\n", " inputs = [inputs]\n", "\n", " return {\"input_ids\": [[x] for x in inputs]}\n", "\n", "def query(model, tokenizer, multilabel=False):\n", " # Load models into similarity pipeline\n", " similarity = Similarity((model, tokenizer), dynamic=False)\n", "\n", " # Add labels to model\n", " similarity.pipeline.model.config.id2label = {0: \"sadness\", 1: \"joy\", 2: \"love\", 3: \"anger\", 4: \"fear\", 5: \"surprise\"}\n", " similarity.pipeline.model.config.label2id = dict((v, k) for k, v in similarity.pipeline.model.config.id2label.items())\n", "\n", " inputs = [\"that caught me off guard\", \"I didn t see that coming\", \"i feel bad\", \"What a wonderful goal!\"]\n", " scores = similarity(\"joy\", inputs, multilabel)\n", " for uid, score in scores[:5]:\n", " print(inputs[uid], score)\n", "\n", "# Export to ONNX\n", "onnx = MLOnnx()\n", "model = onnx(pipeline)\n", "\n", "# Create labels pipeline using scikit-learn ONNX model\n", "sklabels = Labels((model, tokenize), dynamic=False)\n", "\n", "# Add labels to model\n", "sklabels.pipeline.model.config.id2label = {0: \"sadness\", 1: \"joy\", 2: \"love\", 3: \"anger\", 4: \"fear\", 5: \"surprise\"}\n", "sklabels.pipeline.model.config.label2id = dict((v, k) for k, v in sklabels.pipeline.model.config.id2label.items())\n", "\n", "# Run test query using model\n", "query(model, tokenize, None)" ], "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "What a wonderful goal! 0.909473717212677\n", "I didn t see that coming 0.47113093733787537\n", "that caught me off guard 0.42067453265190125\n", "i feel bad 0.019547615200281143\n" ] } ] }, { "cell_type": "markdown", "metadata": { "id": "d-y8gFJwCwKN" }, "source": [ "txtai can use a standard text classification model for similarity queries, where the label(s) are a list of fixed queries. The output above shows the best results for the query \"joy\"." ] }, { "cell_type": "markdown", "metadata": { "id": "cbqwX7GgKBkf" }, "source": [ "# Train a PyTorch model\n", "\n", "The next section defines a simple PyTorch text classifier. The transformers library has a trainer package that supports training PyTorch models, assuming some standard conventions/naming is used. " ] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 239 }, "id": "k8PkTlBLKBTy", "outputId": "4f48bfb2-2f16-45e3-d3e6-f1a2a747fd09" }, "source": [ "# Set predictable seeds\n", "import os\n", "import random\n", "import torch\n", "\n", "import numpy as np\n", "\n", "from torch import nn\n", "from torch.nn import CrossEntropyLoss\n", "from transformers import AutoConfig, AutoTokenizer\n", "\n", "from txtai.models import Registry\n", "from txtai.pipeline import HFTrainer\n", "\n", "from transformers.modeling_outputs import SequenceClassifierOutput\n", "\n", "def seed(seed=42):\n", " random.seed(seed)\n", " os.environ['PYTHONHASHSEED'] = str(seed)\n", " np.random.seed(seed)\n", " torch.manual_seed(seed)\n", " torch.cuda.manual_seed(seed)\n", " torch.backends.cudnn.deterministic = True\n", "\n", "class Simple(nn.Module):\n", " def __init__(self, vocab, dimensions, labels):\n", " super().__init__()\n", "\n", " self.config = AutoConfig.from_pretrained(\"bert-base-uncased\")\n", " self.labels = labels\n", "\n", " self.embedding = nn.EmbeddingBag(vocab, dimensions)\n", " self.classifier = nn.Linear(dimensions, labels)\n", " self.init_weights()\n", "\n", " def init_weights(self):\n", " initrange = 0.5\n", " self.embedding.weight.data.uniform_(-initrange, initrange)\n", " self.classifier.weight.data.uniform_(-initrange, initrange)\n", " self.classifier.bias.data.zero_()\n", "\n", " def forward(self, input_ids=None, labels=None, **kwargs):\n", " embeddings = self.embedding(input_ids)\n", " logits = self.classifier(embeddings)\n", "\n", " loss = None\n", " if labels is not None:\n", " loss_fct = CrossEntropyLoss()\n", " loss = loss_fct(logits.view(-1, self.labels), labels.view(-1))\n", "\n", " return SequenceClassifierOutput(\n", " loss=loss,\n", " logits=logits,\n", " )\n", "\n", "# Set seed for reproducibility\n", "seed()\n", "\n", "# Define model\n", "tokenizer = AutoTokenizer.from_pretrained(\"bert-base-uncased\")\n", "model = Simple(tokenizer.vocab_size, 128, len(ds[\"train\"].unique(\"label\")))\n", "\n", "# Train model\n", "train = HFTrainer()\n", "model, tokenizer = train((model, tokenizer), ds[\"train\"], per_device_train_batch_size=8, learning_rate=1e-3, num_train_epochs=15, logging_steps=10000)\n", "\n", "# Register custom model to fully support pipelines\n", "Registry.register(model)\n", "\n", "# Create labels pipeline using PyTorch model\n", "thlabels = Labels((model, tokenizer), dynamic=False)\n", "\n", "# Determine accuracy on validation set\n", "results = [row[\"label\"] == thlabels(row[\"text\"])[0][0] for row in ds[\"validation\"]]\n", "print(\"Accuracy = \", sum(results) / len(ds[\"validation\"]))" ], "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stderr", "text": [ "Loading cached processed dataset at /root/.cache/huggingface/datasets/emotion/default/0.0.0/348f63ca8e27b3713b6c04d723efe6d824a56fb3d1449794716c0f0296072705/cache-a983327c4471f5aa.arrow\n" ] }, { "output_type": "display_data", "data": { "text/html": [ "\n", "
\n", " \n", " \n", " [30000/30000 02:28, Epoch 15/15]\n", "
\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
StepTraining Loss
100001.017600
200000.286200
300000.152500

" ], "text/plain": [ "" ] }, "metadata": {} }, { "output_type": "stream", "name": "stdout", "text": [ "Accuracy = 0.883\n" ] } ] }, { "cell_type": "markdown", "metadata": { "id": "nHQoJnrj60Pz" }, "source": [ "88% accuracy this time. Pretty good for such a simple network and something that could definitely be improved upon. \n", "\n", "Once again let's run similarity queries using this model." ] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "W5_NDInF5lFN", "outputId": "38a2c126-63e9-40dc-f309-a29826b5b937" }, "source": [ "query(model, tokenizer)" ], "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "What a wonderful goal! 1.0\n", "that caught me off guard 0.9998751878738403\n", "I didn t see that coming 0.7328283190727234\n", "i feel bad 5.2972134609891875e-19\n" ] } ] }, { "cell_type": "markdown", "metadata": { "id": "KmcsdIltDTwj" }, "source": [ "Same result order as with the scikit-learn model with scoring variations which is expected given this is a completely different model." ] }, { "cell_type": "markdown", "metadata": { "id": "-fNTi2jb68rv" }, "source": [ "# Pooled embeddings\n", "\n", "The PyTorch model above consists of an embeddings layer with a linear classifier on top of it. What if we take that embeddings layer and use it for similarity queries? Let's give it a try." ] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "J1yhfHKC7N7L", "outputId": "11567948-769a-44df-9057-9fe9837a73dd" }, "source": [ "from txtai.embeddings import Embeddings\n", "\n", "class SimpleEmbeddings(nn.Module):\n", " def __init__(self, embeddings):\n", " super().__init__()\n", "\n", " self.embeddings = embeddings\n", "\n", " def forward(self, input_ids=None, **kwargs):\n", " return (self.embeddings(input_ids),)\n", "\n", "embeddings = Embeddings({\"method\": \"pooling\", \"path\": SimpleEmbeddings(model.embedding), \"tokenizer\": \"bert-base-uncased\"})\n", "print(embeddings.similarity(\"mad\", [\"Glad you found it\", \"Happy to see you\", \"I'm angry\"]))" ], "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "[(2, 0.8323876857757568), (1, -0.11010512709617615), (0, -0.16152513027191162)]\n" ] } ] }, { "cell_type": "markdown", "metadata": { "id": "0kTUEIcmBNuV" }, "source": [ "Definitely looks like the embeddings have stored knowledge. Could these embeddings be good enough to build a semantic search index, especially for sentiment based data, given the training dataset? Possibly. It certainly would run faster than a standard transformer model (see below). " ] }, { "cell_type": "markdown", "metadata": { "id": "V7nAl3WtkBNK" }, "source": [ "# Train a transformer model and compare accuracy/speed\n", "\n", "Let's train a standard transformer sequence classifier and compare the accuracy/speed between the two. " ] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 274 }, "id": "46fMiJrAIBu4", "outputId": "f0512cf8-3bc2-41ed-caff-e1541403f2a5" }, "source": [ "train = HFTrainer()\n", "model, tokenizer = train(\"microsoft/xtremedistil-l6-h384-uncased\", ds[\"train\"], logging_steps=2000)\n", "\n", "tflabels = Labels((model, tokenizer), dynamic=False)\n", "\n", "# Determine accuracy on validation set\n", "results = [row[\"label\"] == tflabels(row[\"text\"])[0][0] for row in ds[\"validation\"]]\n", "print(\"Accuracy = \", sum(results) / len(ds[\"validation\"]))" ], "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stderr", "text": [ "Loading cached processed dataset at /root/.cache/huggingface/datasets/emotion/default/0.0.0/348f63ca8e27b3713b6c04d723efe6d824a56fb3d1449794716c0f0296072705/cache-98b7ef31bf6ca944.arrow\n", "Some weights of BertForSequenceClassification were not initialized from the model checkpoint at microsoft/xtremedistil-l6-h384-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']\n", "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n" ] }, { "output_type": "display_data", "data": { "text/html": [ "\n", "

\n", " \n", " \n", " [6000/6000 07:13, Epoch 3/3]\n", "
\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
StepTraining Loss
20000.635500
40000.281700
60000.192600

" ], "text/plain": [ "" ] }, "metadata": {} }, { "output_type": "stream", "name": "stdout", "text": [ "Accuracy = 0.926\n" ] } ] }, { "cell_type": "markdown", "metadata": { "id": "ycvozGzPmlbS" }, "source": [ "As expected, the accuracy is better. The model above is a distilled model and even better accuracy can be obtained with a model like \"roberta-base\" with the tradeoff being increased training/inference time. \n", "\n", "Speaking of speed, let's compare the speed of these models." ] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "nWQMRQm0NwdN", "outputId": "4a49406c-b4eb-46b1-edab-de01c15fdccb" }, "source": [ "import time\n", "\n", "# Test inputs\n", "inputs = ds[\"test\"][\"text\"]\n", "print(\"Testing speed of %d items\" % len(inputs))\n", "\n", "start = time.time()\n", "r1 = sklabels(inputs, multilabel=None)\n", "print(\"TF-IDF + Logistic Regression time =\", time.time() - start)\n", "\n", "start = time.time()\n", "r2 = thlabels(inputs)\n", "print(\"PyTorch time =\", time.time() - start)\n", "\n", "start = time.time()\n", "r3 = tflabels(inputs)\n", "print(\"Transformers time =\", time.time() - start, \"\\n\")\n", "\n", "# Compare model results\n", "for x in range(5):\n", " print(\"index: %d\" % x)\n", " print(r1[x][0])\n", " print(r2[x][0])\n", " print(r3[x][0], \"\\n\")" ], "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Testing speed of 2000 items\n", "TF-IDF + Logistic Regression time = 1.0483319759368896\n" ] }, { "output_type": "stream", "name": "stdout", "text": [ "PyTorch time = 2.0001697540283203\n", "Transformers time = 13.71584439277649 \n", "\n", "index: 0\n", "(0, 0.7258279323577881)\n", "(0, 1.0)\n", "(0, 0.998375654220581) \n", "\n", "index: 1\n", "(0, 0.854256272315979)\n", "(0, 1.0)\n", "(0, 0.9983494281768799) \n", "\n", "index: 2\n", "(0, 0.6306578516960144)\n", "(0, 0.9999700784683228)\n", "(0, 0.9982945322990417) \n", "\n", "index: 3\n", "(1, 0.554378092288971)\n", "(1, 0.9998960494995117)\n", "(1, 0.99846351146698) \n", "\n", "index: 4\n", "(0, 0.8961835503578186)\n", "(0, 1.0)\n", "(0, 0.9984095692634583) \n", "\n" ] } ] }, { "cell_type": "markdown", "metadata": { "id": "1YMTyqIWDiOB" }, "source": [ "# Wrapping up\n", "\n", "This notebook showed how frameworks outside of Transformers and ONNX can be used as models in txtai.\n", "\n", "As seen in the section above, TF-IDF + Logistic Regression is 16 times faster than a distilled Transformers model. A simple PyTorch network is 8 times faster. Depending on your accuracy requirements, it may make sense to use a simpler model to get better runtime performance." ] } ] } ================================================ FILE: examples/22_Transform_tabular_data_with_composable_workflows.ipynb ================================================ { "nbformat": 4, "nbformat_minor": 0, "metadata": { "accelerator": "GPU", "colab": { "name": "22 - Transform tabular data with composable workflows", "provenance": [], "collapsed_sections": [] }, "kernelspec": { "display_name": "Python 3", "name": "python3" } }, "cells": [ { "cell_type": "markdown", "metadata": { "id": "4Pjmz-RORV8E" }, "source": [ "# Transform tabular data with composable workflows\n", "\n", "txtai has support for processing both unstructured and structured data. Structured or tabular data is grouped into rows and columns. This can be a spreadsheet, an API call that returns JSON or XML or even list of key-value pairs.\n", "\n", "This notebook will walk through examples on how to use workflows with the tabular pipeline to transform and index structured data.\n" ] }, { "cell_type": "markdown", "metadata": { "id": "Dk31rbYjSTYm" }, "source": [ "# Install dependencies\n", "\n", "Install `txtai` and all dependencies. We will install the api, pipeline and workflow optional extras packages. " ] }, { "cell_type": "code", "metadata": { "id": "XMQuuun2R06J" }, "source": [ "%%capture\n", "!pip install git+https://github.com/neuml/txtai#egg=txtai[api,pipeline,workflow] sacremoses" ], "execution_count": 66, "outputs": [] }, { "cell_type": "markdown", "metadata": { "id": "NSYrP0hjtR_E" }, "source": [ "# CSV Workflow\n", "\n", "The first example will transform and index a CSV file. The [COVID-19 Open Research Dataset](https://allenai.org/data/cord-19) (CORD-19) is a repository of medical articles covering COVID-19. This workflow reads the input CSV and builds a semantic search index.\n", "\n", "The first step is downloading the dataset locally." ] }, { "cell_type": "code", "metadata": { "id": "BoPJIKWoTibk" }, "source": [ "%%capture\n", "# Get CORD-19 metadata file\n", "!wget https://ai2-semanticscholar-cord-19.s3-us-west-2.amazonaws.com/2021-11-01/metadata.csv\n", "!head -1 metadata.csv > input.csv\n", "!tail -10000 metadata.csv >> input.csv" ], "execution_count": 67, "outputs": [] }, { "cell_type": "markdown", "metadata": { "id": "Q1ivX4eBuU8T" }, "source": [ "The next section creates a simple workflow consisting of a tabular pipeline. The tabular pipeline builds a list of (id, text, tag) tuples that can be easily loaded into an Embeddings index. For this example, we'll use the `url` column as the id and the `title` column as the text column. The textcolumns parameter takes a list of columns to support indexing text content from multiple columns. \n", "\n", "The file input.csv is processed and the first 5 rows are shown." ] }, { "cell_type": "code", "metadata": { "id": "-pi2QU3TSlM_", "colab": { "base_uri": "https://localhost:8080/" }, "outputId": "a30ffeb2-1d85-467b-a0a3-134c8cbac19f" }, "source": [ "from txtai.pipeline import Tabular\n", "from txtai.workflow import Task, Workflow\n", "\n", "# Create tabular instance mapping input.csv fields\n", "tabular = Tabular(\"url\", [\"title\"])\n", "\n", "# Create workflow\n", "workflow = Workflow([Task(tabular)])\n", "\n", "# Print 5 rows of input.csv via workflow\n", "list(workflow([\"input.csv\"]))[:5]" ], "execution_count": 68, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "[('https://doi.org/10.1016/j.cmpb.2021.106469; https://www.ncbi.nlm.nih.gov/pubmed/34715516/',\n", " 'Computer simulation of the dynamics of a spatial susceptible-infected-recovered epidemic model with time delays in transmission and treatment.',\n", " None),\n", " ('https://www.ncbi.nlm.nih.gov/pubmed/34232002/; https://doi.org/10.36849/jdd.5544',\n", " 'Understanding the Potential Role of Abrocitinib in the Time of SARS-CoV-2',\n", " None),\n", " ('https://doi.org/10.1186/1471-2458-8-42; https://www.ncbi.nlm.nih.gov/pubmed/18234083/',\n", " \"Can the concept of Health Promoting Schools help to improve students' health knowledge and practices to combat the challenge of communicable diseases: Case study in Hong Kong?\",\n", " None),\n", " ('https://www.ncbi.nlm.nih.gov/pubmed/32983582/; https://www.sciencedirect.com/science/article/pii/S2095809920302514?v=s5; https://api.elsevier.com/content/article/pii/S2095809920302514; https://doi.org/10.1016/j.eng.2020.07.018',\n", " 'Buying time for an effective epidemic response: The impact of a public holiday for outbreak control on COVID-19 epidemic spread',\n", " None),\n", " ('https://doi.org/10.1093/pcmedi/pbab016',\n", " 'The SARS-CoV-2 spike L452R-E484Q variant in the Indian B.1.617 strain showed significant reduction in the neutralization activity of immune sera',\n", " None)]" ] }, "metadata": {}, "execution_count": 68 } ] }, { "cell_type": "markdown", "metadata": { "id": "UYYKnwNhu0hv" }, "source": [ "Next, we take the workflow output, build an Embeddings index and run a search query." ] }, { "cell_type": "code", "metadata": { "id": "G7M34puLWeZm", "colab": { "base_uri": "https://localhost:8080/" }, "outputId": "0e9e771b-d53d-4c4e-8ac8-257c33992f66" }, "source": [ "from txtai.embeddings import Embeddings\n", "\n", "# Embeddings with sentence-transformers backend\n", "embeddings = Embeddings({\"method\": \"transformers\", \"path\": \"sentence-transformers/paraphrase-mpnet-base-v2\"})\n", "\n", "# Index subset of CORD-19 data\n", "data = list(workflow([\"input.csv\"]))\n", "embeddings.index(data)\n", "\n", "for uid, _ in embeddings.search(\"insulin\"):\n", " title = [text for url, text, _ in data if url == uid][0]\n", " print(title, uid)" ], "execution_count": 69, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Importance of diabetes management during the COVID-19 pandemic. https://doi.org/10.1080/00325481.2021.1978704; https://www.ncbi.nlm.nih.gov/pubmed/34602003/\n", "Position Statement on How to Manage Patients with Diabetes and COVID-19 https://www.ncbi.nlm.nih.gov/pubmed/33442169/; https://doi.org/10.15605/jafes.035.01.03\n", "Successful blood glucose management of a severe COVID-19 patient with diabetes: A case report https://www.ncbi.nlm.nih.gov/pubmed/32590779/; https://doi.org/10.1097/md.0000000000020844\n" ] } ] }, { "cell_type": "markdown", "metadata": { "id": "vPQv7GJSu9Uq" }, "source": [ "The example searched for the term `insulin`. The top results mention diabetes and blood glucose which are a closely associated terms for diabetes." ] }, { "cell_type": "markdown", "source": [ "# Workflow with stored content\n", "\n", "Next we'll re-run the same example adding in full content storage. Full content storage enables SQL queries." ], "metadata": { "id": "BQeLRC1md6GR" } }, { "cell_type": "code", "source": [ "import json\n", "\n", "# Create tabular instance mapping input.csv fields\n", "tabular = Tabular(\"url\", [\"title\"], True)\n", "\n", "# Create workflow\n", "workflow = Workflow([Task(tabular)])\n", "\n", "# Embeddings with sentence-transformers backend\n", "embeddings = Embeddings({\"method\": \"transformers\", \"path\": \"sentence-transformers/paraphrase-mpnet-base-v2\", \"content\": True})\n", "\n", "# Index subset of CORD-19 data\n", "data = list(workflow([\"input.csv\"]))\n", "embeddings.index(data)\n", "\n", "for result in embeddings.search(\"select title, abstract, authors, doi from txtai where similar('insulin')\"):\n", " print(json.dumps(result, default=str, indent=2))" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "Ln61lB7QeDEq", "outputId": "ae364e17-36a5-480e-c614-c7e58d8b8462" }, "execution_count": 70, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "{\n", " \"title\": \"Importance of diabetes management during the COVID-19 pandemic.\",\n", " \"abstract\": \"Uncontrolled diabetes and/or hyperglycemia is associated with severe COVID-19 disease and increased mortality. It is now known that poor glucose control before hospital admission can be associated with a high risk of in-hospital death. By achieving and maintaining glycemic control, primary care physicians (PCPs) play a critical role in limiting this potentially devastating outcome. Further, despite the hope that mass vaccination will help control the pandemic, genetic variants of the virus are causing surges in some countries. As such, PCPs will treat an increasing number of patients with diabetes who have symptoms of post-COVID-19 infection, or even have new-onset type 2 diabetes as a result of COVID-19 infection. However, much of the literature published focuses on the effects of COVID-19 in hospitalized patients, with few publications providing information and advice to those caring for people with diabetes in the primary care setting. This manuscript reviews the current knowledge of the risk and outcomes of individuals with diabetes who are infected with COVID-19 and provides information for PCPs on the importance of glucose control, appropriate treatment, and use of telemedicine and online prescription delivery systems to limit the potentially devastating effects of COVID-19 in people with hyperglycemia.\",\n", " \"authors\": \"Pettus, Jeremy; Skolnik, Neil\",\n", " \"doi\": \"10.1080/00325481.2021.1978704\"\n", "}\n", "{\n", " \"title\": \"Position Statement on How to Manage Patients with Diabetes and COVID-19\",\n", " \"abstract\": null,\n", " \"authors\": null,\n", " \"doi\": \"10.15605/jafes.035.01.03\"\n", "}\n", "{\n", " \"title\": \"Successful blood glucose management of a severe COVID-19 patient with diabetes: A case report\",\n", " \"abstract\": \"RATIONALE: Coronavirus disease 2019 (COVID-19) has emerged as a rapidly spreading communicable disease affecting individuals worldwide. Patients with diabetes are more vulnerable to the disease, and the mortality is higher than in those without diabetes. We reported a severe COVID-19 patient with diabetes and shared our experience with blood glucose management. PATIENT CONCERNS: A 64-year-old female diabetes patient was admitted to the intensive care unit due to productive coughing for 8 days without any obvious cause. The results of blood gas analysis indicated that the partial pressure of oxygen was 84 mm Hg with oxygen 8 L/min, and the oxygenation index was less than 200 mm Hg. In addition, postprandial blood glucose levels were abnormal (29.9 mmol/L). DIAGNOSES: The patient was diagnosed with COVID-19 (severe type) and type 2 diabetes. INTERVENTIONS: Comprehensive interventions including establishing a multidisciplinary team, closely monitoring her blood glucose level, an individualized diabetes diet, early activities, psychological care, etc, were performed to control blood glucose while actively treating COVID-19 infection. OUTCOMES: After the comprehensive measures, the patient's blood glucose level gradually became stable, and the patient was discharged after 20 days of hospitalization. LESSONS: This case indicated that the comprehensive measures performed by a multidisciplinary team achieved good treatment effects on a COVID-19 patient with diabetes. Targeted treatment and nursing methods should be performed based on patients\\u2019 actual situations in clinical practice.\",\n", " \"authors\": \"Hu, Rujun; Gao, Huiming; Huang, Di; Jiang, Deyu; Chen, Fang; Fu, Bao; Yuan, Xiaoli; Li, Jin; Jiang, Zhixia\",\n", " \"doi\": \"10.1097/md.0000000000020844\"\n", "}\n" ] } ] }, { "cell_type": "markdown", "source": [ "Note how the same results are returned with additional content fields." ], "metadata": { "id": "yTvn0O1v_eHT" } }, { "cell_type": "markdown", "metadata": { "id": "gzFmQDXfvniJ" }, "source": [ "# JSON Service Workflow\n", "\n", "The next example builds a workflow that runs a query against a remote URL, retrieves the results, then transforms and indexes the tabular data. This example gets the top results from the [Hacker News front page](https://news.ycombinator.com/). \n", "\n", "Below shows how to build the ServiceTask and prints the first JSON result. Details on how to configure the ServiceTask can be found in [txtai's documentation](https://neuml.github.io/txtai/workflows/)." ] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "bA8SihkeZqbJ", "outputId": "05962fa0-7e79-49a8-b6ac-3d535ace3fad" }, "source": [ "from txtai.workflow import ServiceTask\n", "\n", "service = ServiceTask(url=\"https://hn.algolia.com/api/v1/search\", method=\"get\", params={\"tags\": None}, batch=False, extract=\"hits\")\n", "workflow = Workflow([service])\n", "\n", "list(workflow([\"front_page\"]))[0][2]" ], "execution_count": 71, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "{'_highlightResult': {'author': {'matchLevel': 'none',\n", " 'matchedWords': [],\n", " 'value': 'cheesestain'},\n", " 'title': {'matchLevel': 'none',\n", " 'matchedWords': [],\n", " 'value': 'Ante: A low-level functional language'},\n", " 'url': {'matchLevel': 'none',\n", " 'matchedWords': [],\n", " 'value': 'https://antelang.org/'}},\n", " '_tags': ['story', 'author_cheesestain', 'story_31775216', 'front_page'],\n", " 'author': 'cheesestain',\n", " 'comment_text': None,\n", " 'created_at': '2022-06-17T07:39:40.000Z',\n", " 'created_at_i': 1655451580,\n", " 'num_comments': 109,\n", " 'objectID': '31775216',\n", " 'parent_id': None,\n", " 'points': 207,\n", " 'story_id': None,\n", " 'story_text': None,\n", " 'story_title': None,\n", " 'story_url': None,\n", " 'title': 'Ante: A low-level functional language',\n", " 'url': 'https://antelang.org/'}" ] }, "metadata": {}, "execution_count": 71 } ] }, { "cell_type": "markdown", "metadata": { "id": "Lv_ybw1VwK1N" }, "source": [ "Next we'll map the JSON data using the tabular pipeline. `url` will be used as the id column and `title` as the text to index." ] }, { "cell_type": "code", "metadata": { "id": "YAbwhsaveKo1", "colab": { "base_uri": "https://localhost:8080/" }, "outputId": "15c1b28a-3c16-4d38-c586-6b8eae244dbb" }, "source": [ "from txtai.workflow import Task\n", "\n", "# Create tabular instance mapping input.csv fields\n", "tabular = Tabular(\"url\", [\"title\"])\n", "\n", "# Recreate service applying the tabular pipeline to each result\n", "service = ServiceTask(action=tabular, url=\"https://hn.algolia.com/api/v1/search\", method=\"get\", params={\"tags\": None}, batch=False, extract=\"hits\")\n", "workflow = Workflow([service])\n", "\n", "list(workflow([\"front_page\"]))[2]" ], "execution_count": 72, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "('https://antelang.org/', 'Ante: A low-level functional language', None)" ] }, "metadata": {}, "execution_count": 72 } ] }, { "cell_type": "markdown", "metadata": { "id": "SbjMuN5lw63c" }, "source": [ "As we did previously, let's build an Embeddings index and run a search query." ] }, { "cell_type": "code", "metadata": { "id": "5lx9pa65e23E", "colab": { "base_uri": "https://localhost:8080/" }, "outputId": "99e05da7-b013-4d17-d82b-c7e13e79a563" }, "source": [ "# Embeddings with sentence-transformers backend\n", "embeddings = Embeddings({\"method\": \"transformers\", \"path\": \"sentence-transformers/paraphrase-mpnet-base-v2\"})\n", "\n", "# Index Hacker News front page\n", "data = list(workflow([\"front_page\"]))\n", "embeddings.index(data)\n", "\n", "for uid, _ in embeddings.search(\"programming\"):\n", " title = [text for url, text, _ in data if url == uid][0]\n", " print(title, uid)" ], "execution_count": 73, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Bundling binary tools in Python wheels https://simonwillison.net/2022/May/23/bundling-binary-tools-in-python-wheels/\n", "Ante: A low-level functional language https://antelang.org/\n", "Adding a Rust compiler front end to GCC [video] https://www.youtube.com/watch?v=R8Pr21nlhig\n" ] } ] }, { "cell_type": "markdown", "metadata": { "id": "yq26KqfgxJ6Q" }, "source": [ "# XML Service workflow\n", "\n", "txtai's ServiceTask can consume both JSON and XML. This example runs a query against the [arXiv API](https://arxiv.org/), transforms the results and indexes them for search.\n", "\n", "Below shows how to build the ServiceTask and prints the first XML result." ] }, { "cell_type": "code", "metadata": { "id": "K6CbS2QwltGi", "colab": { "base_uri": "https://localhost:8080/" }, "outputId": "2d9a0336-9680-4ca1-e2cd-68f6c5b4d1d3" }, "source": [ "service = ServiceTask(url=\"http://export.arxiv.org/api/query\", method=\"get\", params={\"search_query\": None, \"max_results\": 25}, batch=False, extract=[\"feed\", \"entry\"])\n", "workflow = Workflow([service])\n", "\n", "list(workflow([\"all:aliens\"]))[0][:1]" ], "execution_count": 74, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "[{'arxiv:comment': {'#text': 'To appear in Astrophysical Journal',\n", " '@xmlns:arxiv': 'http://arxiv.org/schemas/atom'},\n", " 'arxiv:doi': {'#text': '10.3847/1538-4357/ac2369',\n", " '@xmlns:arxiv': 'http://arxiv.org/schemas/atom'},\n", " 'arxiv:primary_category': {'@scheme': 'http://arxiv.org/schemas/atom',\n", " '@term': 'q-bio.OT',\n", " '@xmlns:arxiv': 'http://arxiv.org/schemas/atom'},\n", " 'author': [{'name': 'Robin Hanson'},\n", " {'name': 'Daniel Martin'},\n", " {'name': 'Calvin McCarter'},\n", " {'name': 'Jonathan Paulson'}],\n", " 'category': [{'@scheme': 'http://arxiv.org/schemas/atom',\n", " '@term': 'q-bio.OT'},\n", " {'@scheme': 'http://arxiv.org/schemas/atom', '@term': 'physics.pop-ph'}],\n", " 'id': 'http://arxiv.org/abs/2102.01522v3',\n", " 'link': [{'@href': 'http://dx.doi.org/10.3847/1538-4357/ac2369',\n", " '@rel': 'related',\n", " '@title': 'doi'},\n", " {'@href': 'http://arxiv.org/abs/2102.01522v3',\n", " '@rel': 'alternate',\n", " '@type': 'text/html'},\n", " {'@href': 'http://arxiv.org/pdf/2102.01522v3',\n", " '@rel': 'related',\n", " '@title': 'pdf',\n", " '@type': 'application/pdf'}],\n", " 'published': '2021-02-01T18:27:12Z',\n", " 'summary': \"If life on Earth had to achieve n 'hard steps' to reach humanity's level,\\nthen the chance of this event rose as time to the n-th power. Integrating this\\nover habitable star formation and planet lifetime distributions predicts >99%\\nof advanced life appears after today, unless n<3 and max planet duration\\n<50Gyr. That is, we seem early. We offer this explanation: a deadline is set by\\n'loud' aliens who are born according to a hard steps power law, expand at a\\ncommon rate, change their volumes' appearances, and prevent advanced life like\\nus from appearing in their volumes. 'Quiet' aliens, in contrast, are much\\nharder to see. We fit this three-parameter model of loud aliens to data: 1)\\nbirth power from the number of hard steps seen in Earth history, 2) birth\\nconstant by assuming a inform distribution over our rank among loud alien birth\\ndates, and 3) expansion speed from our not seeing alien volumes in our sky. We\\nestimate that loud alien civilizations now control 40-50% of universe volume,\\neach will later control ~10^5 - 3x10^7 galaxies, and we could meet them in\\n~200Myr - 2Gyr. If loud aliens arise from quiet ones, a depressingly low\\ntransition chance (~10^-4) is required to expect that even one other quiet\\nalien civilization has ever been active in our galaxy. Which seems bad news for\\nSETI. But perhaps alien volume appearances are subtle, and their expansion\\nspeed lower, in which case we predict many long circular arcs to find in our\\nsky.\",\n", " 'title': 'If Loud Aliens Explain Human Earliness, Quiet Aliens Are Also Rare',\n", " 'updated': '2021-09-06T14:18:23Z'}]" ] }, "metadata": {}, "execution_count": 74 } ] }, { "cell_type": "markdown", "metadata": { "id": "aWFwkxjQyscc" }, "source": [ "Next we'll map the XML data using the tabular pipeline. `id` will be used as the id column and `title` as the text to index." ] }, { "cell_type": "code", "metadata": { "id": "DyIetJ7OmJjP", "colab": { "base_uri": "https://localhost:8080/" }, "outputId": "7e730b22-256d-4b1b-9064-2479e26d50c9" }, "source": [ "from txtai.workflow import Task\n", "\n", "# Create tablular pipeline with new mapping\n", "tabular = Tabular(\"id\", [\"title\"])\n", "\n", "# Recreate service applying the tabular pipeline to each result\n", "service = ServiceTask(action=tabular, url=\"http://export.arxiv.org/api/query\", method=\"get\", params={\"search_query\": None, \"max_results\": 25}, batch=False, extract=[\"feed\", \"entry\"])\n", "workflow = Workflow([service])\n", "\n", "list(workflow([\"all:aliens\"]))[:1]" ], "execution_count": 75, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "[('http://arxiv.org/abs/2102.01522v3',\n", " 'If Loud Aliens Explain Human Earliness, Quiet Aliens Are Also Rare',\n", " None)]" ] }, "metadata": {}, "execution_count": 75 } ] }, { "cell_type": "markdown", "metadata": { "id": "7pFnW7mCyycy" }, "source": [ "As we did previously, let's build an Embeddings index and run a search query." ] }, { "cell_type": "code", "metadata": { "id": "NX2oR5dhm_99", "colab": { "base_uri": "https://localhost:8080/" }, "outputId": "399c4316-bce6-443b-c383-aedd7ff9a1ec" }, "source": [ "# Embeddings with sentence-transformers backend\n", "embeddings = Embeddings({\"method\": \"transformers\", \"path\": \"sentence-transformers/paraphrase-mpnet-base-v2\"})\n", "\n", "# Index Hacker News front page\n", "data = list(workflow([\"all:aliens\"]))\n", "embeddings.index(data)\n", "\n", "for uid, _ in embeddings.search(\"alien radio signals\"):\n", " title = [text for url, text, _ in data if url == uid][0]\n", " print(title, uid)" ], "execution_count": 76, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Calculating the probability of detecting radio signals from alien\n", " civilizations http://arxiv.org/abs/0707.0011v2\n", "Field Trial of Alien Wavelengths on GARR Optical Network http://arxiv.org/abs/1805.04278v1\n", "Aliens on Earth. Are reports of close encounters correct? http://arxiv.org/abs/1203.6805v2\n" ] } ] }, { "cell_type": "markdown", "metadata": { "id": "8xknLo2ey0vZ" }, "source": [ "# Build a workflow with no code!\n", "\n", "The next example shows how one of the same workflows above can be constructed via API configuration. This is a no-code way to build a txtai indexing workflow!" ] }, { "cell_type": "code", "metadata": { "id": "1eF5IJlzpNbw", "colab": { "base_uri": "https://localhost:8080/" }, "outputId": "4ff0bfdc-fb01-46a6-b61a-142a370dc273" }, "source": [ "%%writefile workflow.yml\n", "# Index settings\n", "writable: true\n", "embeddings:\n", " path: sentence-transformers/nli-mpnet-base-v2\n", "\n", "# Tabular pipeline\n", "tabular:\n", " idcolumn: id\n", " textcolumns: \n", " - title\n", "\n", "# Workflow definitions\n", "workflow:\n", " index:\n", " tasks:\n", " - task: service\n", " action: tabular\n", " url: http://export.arxiv.org/api/query?max_results=25\n", " method: get\n", " params:\n", " search_query: null\n", " batch: false\n", " extract: [feed, entry]\n", " - action: upsert" ], "execution_count": 77, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Writing workflow.yml\n" ] } ] }, { "cell_type": "markdown", "metadata": { "id": "dxi5w3IezR7Q" }, "source": [ "This workflow once again runs an arXiv query and indexes article titles. The workflow configures the same actions that were configured in Python previously. \n", "\n", "Let's start an API instance " ] }, { "cell_type": "code", "metadata": { "id": "B1DQyB5ErIzr", "colab": { "base_uri": "https://localhost:8080/" }, "outputId": "7e170445-bc93-4660-b944-8b10b8b02d98" }, "source": [ "!killall -9 uvicorn\n", "!CONFIG=workflow.yml nohup uvicorn \"txtai.api:app\" &> api.log &\n", "!sleep 30\n", "!cat api.log" ], "execution_count": 78, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "INFO: Started server process [754]\n", "2022-06-17 15:05:58,554 [INFO] serve: Started server process [754]\n", "INFO: Waiting for application startup.\n", "2022-06-17 15:05:58,554 [INFO] startup: Waiting for application startup.\n", "INFO: Application startup complete.\n", "2022-06-17 15:06:07,707 [INFO] startup: Application startup complete.\n", "INFO: Uvicorn running on http://127.0.0.1:8000 (Press CTRL+C to quit)\n", "2022-06-17 15:06:07,707 [INFO] _log_started_message: Uvicorn running on http://127.0.0.1:8000 (Press CTRL+C to quit)\n" ] } ] }, { "cell_type": "markdown", "metadata": { "id": "45JaR7Nr0Zmg" }, "source": [ "Next we'll execute the workflow. txtai has API bindings for [JavaScript](https://github.com/neuml/txtai.js), [Java](https://github.com/neuml/txtai.java), [Rust](https://github.com/neuml/txtai.rs) and [Golang](https://github.com/neuml/txtai.go). But to keep things simple, we'll just run the commands via cURL. " ] }, { "cell_type": "code", "metadata": { "id": "xt_qL6eA0SrS", "colab": { "base_uri": "https://localhost:8080/" }, "outputId": "3ca70a31-cf48-42b4-949f-f01aac14505c" }, "source": [ "# Execute workflow via API call\n", "!curl -X POST \"http://localhost:8000/workflow\" -H \"accept: application/json\" -H \"Content-Type: application/json\" -d \"{\\\"name\\\":\\\"index\\\",\\\"elements\\\":[\\\"all:aliens\\\"]}\"" ], "execution_count": 79, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "[[\"http://arxiv.org/abs/2102.01522v3\",\"If Loud Aliens Explain Human Earliness, Quiet Aliens Are Also Rare\",null],[\"http://arxiv.org/abs/cs/0306071v1\",\"AliEnFS - a Linux File System for the AliEn Grid Services\",null],[\"http://arxiv.org/abs/physics/0306103v1\",\"AliEn - EDG Interoperability in ALICE\",null],[\"http://arxiv.org/abs/2103.05559v1\",\"Oumuamua Is Not a Probe Sent to our Solar System by an Alien\\n Civilization\",null],[\"http://arxiv.org/abs/1403.3979v1\",\"Robust transitivity and density of periodic points of partially\\n hyperbolic diffeomorphisms\",null],[\"http://arxiv.org/abs/1712.09210v1\",\"Sampling alien species inside and outside protected areas: does it\\n matter?\",null],[\"http://arxiv.org/abs/cs/0306067v1\",\"The AliEn system, status and perspectives\",null],[\"http://arxiv.org/abs/0707.0011v2\",\"Calculating the probability of detecting radio signals from alien\\n civilizations\",null],[\"http://arxiv.org/abs/1805.04278v1\",\"Field Trial of Alien Wavelengths on GARR Optical Network\",null],[\"http://arxiv.org/abs/1808.00529v1\",\"Open Category Detection with PAC Guarantees\",null],[\"http://arxiv.org/abs/1206.3640v1\",\"The Study of Climate on Alien Worlds\",null],[\"http://arxiv.org/abs/1203.6805v2\",\"Aliens on Earth. Are reports of close encounters correct?\",null],[\"http://arxiv.org/abs/1604.05078v1\",\"The Imprecise Search for Habitability\",null],[\"http://arxiv.org/abs/1006.2613v1\",\"Resurgence, Stokes phenomenon and alien derivatives for level-one linear\\n differential systems\",null],[\"http://arxiv.org/abs/1307.0653v1\",\"General and alien solutions of a functional equation and of a functional\\n inequality\",null],[\"http://arxiv.org/abs/1705.03394v1\",\"That is not dead which can eternal lie: the aestivation hypothesis for\\n resolving Fermi's paradox\",null],[\"http://arxiv.org/abs/1701.02294v1\",\"Alien Calculus and non perturbative effects in Quantum Field Theory\",null],[\"http://arxiv.org/abs/1801.06180v1\",\"Are Alien Civilizations Technologically Advanced?\",null],[\"http://arxiv.org/abs/1902.05387v1\",\"Simultaneous x, y Pixel Estimation and Feature Extraction for Multiple\\n Small Objects in a Scene: A Description of the ALIEN Network\",null],[\"http://arxiv.org/abs/0711.4034v1\",\"The q-analogue of the wild fundamental group (II)\",null],[\"http://arxiv.org/abs/2111.07895v1\",\"Research Programs Arising from 'Oumuamua Considered as an Alien Craft\",null],[\"http://arxiv.org/abs/2112.15226v1\",\"Variations on the Resurgence of the Gamma Function\",null],[\"http://arxiv.org/abs/astro-ph/0501119v1\",\"Expanding advanced civilizations in the universe\",null],[\"http://arxiv.org/abs/cs/0306068v1\",\"AliEn Resource Brokers\",null],[\"http://arxiv.org/abs/hep-ph/9403231v2\",\"The Renormalization of Composite Operators in Yang-Mills Theories Using\\n General Covariant Gauge\",null]]" ] } ] }, { "cell_type": "markdown", "metadata": { "id": "_bwn4KBt1Cos" }, "source": [ "The data is now indexed. Note that the index configuration has an `upsert` action. Each workflow call will insert new rows or update existing rows. This call could be scheduled with a system cron to execute periodically and build an index of arXiv article titles. \n", "\n", "Now that the index is ready, let's run a search." ] }, { "cell_type": "code", "metadata": { "id": "qbteIueJ1Fds", "colab": { "base_uri": "https://localhost:8080/" }, "outputId": "ad8ab84b-36a3-4346-d387-64321b933d25" }, "source": [ "# Run a search\n", "!curl -X GET \"http://localhost:8000/search?query=radio&limit=3\" -H \"accept: application/json\"" ], "execution_count": 80, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "[{\"id\":\"http://arxiv.org/abs/0707.0011v2\",\"score\":0.40350058674812317},{\"id\":\"http://arxiv.org/abs/1805.04278v1\",\"score\":0.3406212031841278},{\"id\":\"http://arxiv.org/abs/1902.05387v1\",\"score\":0.22262491285800934}]" ] } ] }, { "cell_type": "markdown", "metadata": { "id": "C5_Tt6EA3Cxb" }, "source": [ "# Add a translation step to workflow\n", "\n", "Next we'll recreate the workflow, adding one additional step, translating the text into French before indexing. This workflow runs an arXiv query, translates the results and builds an semantic index of titles in French. " ] }, { "cell_type": "code", "metadata": { "id": "j8rBVl17293q", "colab": { "base_uri": "https://localhost:8080/" }, "outputId": "215dc2cf-3a57-4deb-820d-e8ac03e3e041" }, "source": [ "%%writefile workflow.yml\n", "# Index settings\n", "writable: true\n", "embeddings:\n", " path: sentence-transformers/nli-mpnet-base-v2\n", "\n", "# Tabular pipeline\n", "tabular:\n", " idcolumn: id\n", " textcolumns: \n", " - title\n", "\n", "# Translation pipeline\n", "translation:\n", "\n", "# Workflow definitions\n", "workflow:\n", " index:\n", " tasks:\n", " - task: service\n", " action: tabular\n", " url: http://export.arxiv.org/api/query?max_results=25\n", " method: get\n", " params:\n", " search_query: null\n", " batch: false\n", " extract: [feed, entry]\n", " - action: translation\n", " args: [fr]\n", " - action: upsert" ], "execution_count": 81, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Overwriting workflow.yml\n" ] } ] }, { "cell_type": "code", "metadata": { "id": "UQWvvgb2CwgG", "colab": { "base_uri": "https://localhost:8080/" }, "outputId": "3b0c99d7-b5f6-4c81-eb6e-ac9055eaaae4" }, "source": [ "!killall -9 uvicorn\n", "!CONFIG=workflow.yml nohup uvicorn \"txtai.api:app\" &> api.log &\n", "!sleep 30\n", "!cat api.log" ], "execution_count": 82, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "INFO: Started server process [775]\n", "2022-06-17 15:06:29,397 [INFO] serve: Started server process [775]\n", "INFO: Waiting for application startup.\n", "2022-06-17 15:06:29,397 [INFO] startup: Waiting for application startup.\n", "INFO: Application startup complete.\n", "2022-06-17 15:06:40,198 [INFO] startup: Application startup complete.\n", "INFO: Uvicorn running on http://127.0.0.1:8000 (Press CTRL+C to quit)\n", "2022-06-17 15:06:40,199 [INFO] _log_started_message: Uvicorn running on http://127.0.0.1:8000 (Press CTRL+C to quit)\n" ] } ] }, { "cell_type": "markdown", "metadata": { "id": "v1y4heYx679i" }, "source": [ "Same as before, we'll run the index workflow and a search" ] }, { "cell_type": "code", "metadata": { "id": "npW3rjCw6_nx", "colab": { "base_uri": "https://localhost:8080/" }, "outputId": "fba0bd30-2340-4197-d68c-4fc925ce80b8" }, "source": [ "# Execute workflow via API call\n", "!curl -s -X POST \"http://localhost:8000/workflow\" -H \"accept: application/json\" -H \"Content-Type: application/json\" -d \"{\\\"name\\\":\\\"index\\\",\\\"elements\\\":[\\\"all:aliens\\\"]}\" > /dev/null\n", "\n", "# Run a search\n", "!curl -X GET \"http://localhost:8000/search?query=radio&limit=3\" -H \"accept: application/json\"" ], "execution_count": 83, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "[{\"id\":\"http://arxiv.org/abs/0707.0011v2\",\"score\":0.532800555229187},{\"id\":\"http://arxiv.org/abs/0711.4034v1\",\"score\":0.24413327872753143},{\"id\":\"http://arxiv.org/abs/2102.01522v3\",\"score\":0.22881504893302917}]" ] } ] }, { "cell_type": "markdown", "metadata": { "id": "OsQO5DG9zBQF" }, "source": [ "# Run YAML workflow in Python\n", "\n", "Workflow YAML files can also be directly executed in Python. In this case, all input data is passed locally in Python and not through network interfaces. The following section shows how to do this!" ] }, { "cell_type": "code", "metadata": { "id": "iXmkLDTlzPT3", "colab": { "base_uri": "https://localhost:8080/" }, "outputId": "6b795963-2954-42e2-c42d-39c3f88c9ac3" }, "source": [ "import yaml\n", "\n", "from txtai.app import Application\n", "\n", "with open(\"workflow.yml\") as config:\n", " workflow = yaml.safe_load(config)\n", "\n", "app = Application(workflow)\n", "\n", "# Run the workflow\n", "data = list(app.workflow(\"index\", [\"all:aliens\"]))\n", "\n", "# Run a search\n", "for result in app.search(\"radio\", None):\n", " text = [row[1] for row in data if row[0] == result[\"id\"]][0]\n", " print(result[\"id\"], result[\"score\"], text)" ], "execution_count": 84, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "http://arxiv.org/abs/0707.0011v2 0.532800555229187 Calcul de la probabilité de détection des signaux radio de l'étrangercivilisations\n", "http://arxiv.org/abs/0711.4034v1 0.24413327872753143 Le q-analogue du groupe fondamental sauvage (II)\n", "http://arxiv.org/abs/2102.01522v3 0.22881504893302917 Si les étrangers louds expliquent le début de l'humanité, les étrangers tranquilles sont aussi rares\n" ] } ] }, { "cell_type": "markdown", "metadata": { "id": "EoQFEi_61P9O" }, "source": [ "# Wrapping up\n", "\n", "This notework demonstrated how to transform, index and search tabular data from a variety of sources. txtai offers maximum flexibility in building composable workflows to maximize the number of ways data can be indexed for semantic search. " ] } ] } ================================================ FILE: examples/23_Tensor_workflows.ipynb ================================================ { "nbformat": 4, "nbformat_minor": 0, "metadata": { "accelerator": "GPU", "colab": { "name": "23 - Tensor workflows", "provenance": [], "collapsed_sections": [] }, "kernelspec": { "display_name": "Python 3", "name": "python3" } }, "cells": [ { "cell_type": "markdown", "metadata": { "id": "4Pjmz-RORV8E" }, "source": [ "# Tensor workflows\n", "\n", "Many of the examples and use cases for txtai focus on transforming text. Makes sense as txt is even in the name! But that doesn't mean txtai only works with text.\n", "\n", "This notebook will cover examples of how to efficiently process tensors using txtai workflows." ] }, { "cell_type": "markdown", "metadata": { "id": "Dk31rbYjSTYm" }, "source": [ "# Install dependencies\n", "\n", "Install `txtai` and all dependencies. We will install the api, pipeline and workflow optional extras packages, along with the datasets package. " ] }, { "cell_type": "code", "metadata": { "id": "XMQuuun2R06J" }, "source": [ "%%capture\n", "!pip install git+https://github.com/neuml/txtai#egg=txtai[api,pipeline,workflow] datasets" ], "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "metadata": { "id": "NSYrP0hjtR_E" }, "source": [ "# Transform large tensor arrays\n", "\n", "The first section attempts to apply a simple transform to a very large memory-mapped array (2,000,000 x 1024)." ] }, { "cell_type": "code", "metadata": { "id": "BoPJIKWoTibk", "colab": { "base_uri": "https://localhost:8080/", "height": 220 }, "outputId": "143a6e4d-fe56-4353-e8ee-595ddfc12249" }, "source": [ "import numpy as np\n", "import torch\n", "\n", "# Generate large memory-mapped array\n", "rows, cols = 2000000, 1024\n", "data = np.memmap(\"data.npy\", dtype=np.float32, mode=\"w+\", shape=(rows, cols))\n", "del data\n", "\n", "# Open memory-mapped array\n", "data = np.memmap(\"data.npy\", dtype=np.float32, shape=(rows, cols))\n", "\n", "# Create tensor\n", "tensor = torch.from_numpy(data).to(\"cuda:0\")\n", "\n", "# Apply tanh transform to tensor\n", "torch.tanh(tensor).shape" ], "execution_count": null, "outputs": [ { "output_type": "error", "ename": "RuntimeError", "evalue": "ignored", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mRuntimeError\u001b[0m Traceback (most recent call last)", "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 14\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 15\u001b[0m \u001b[0;31m# Apply tanh transform to tensor\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 16\u001b[0;31m \u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtanh\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtensor\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", "\u001b[0;31mRuntimeError\u001b[0m: CUDA out of memory. Tried to allocate 7.63 GiB (GPU 0; 11.17 GiB total capacity; 7.63 GiB already allocated; 3.04 GiB free; 7.63 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF" ] } ] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "O8mKzPP01d_m", "outputId": "929226a8-6948-4d17-ab70-025da2081abd" }, "source": [ "!ls -l --block-size=MB data.npy" ], "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "-rw-r--r-- 1 root root 8192MB Dec 6 23:24 data.npy\n" ] } ] }, { "cell_type": "markdown", "metadata": { "id": "vuObmAJ9FaJe" }, "source": [ "Not surprisingly this runs out of CUDA memory. The array needs `2,000,000 * 1024 * 4 = 8GB` which exceeds the amount of GPU memory available.\n", "\n", "One of the great things about NumPy and PyTorch arrays is that they can be sliced without having to copy data. Additionally, PyTorch has methods to work directly on NumPy arrays without copying data, in other words both NumPy arrays and PyTorch arrays can share the same memory. This opens the door to efficient processing of tensor data in place. \n", "\n", "Let's try applying a simple tanh transform in batches over the array." ] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "ciD6unQYD-bJ", "outputId": "d3b6a0c5-aea5-451d-d3e3-60d04ef33a9e" }, "source": [ "def process(x):\n", " print(x.shape)\n", " return torch.tanh(torch.from_numpy(x).to(\"cuda:0\")).cpu().numpy()\n", "\n", "# Split into 250,000 rows per call\n", "batch = 250000\n", "count = 0\n", "for x in range(0, len(data), batch):\n", " for row in process(data[x : x + batch]):\n", " count += 1\n", "\n", "print(count)" ], "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "(250000, 1024)\n", "(250000, 1024)\n", "(250000, 1024)\n", "(250000, 1024)\n", "(250000, 1024)\n", "(250000, 1024)\n", "(250000, 1024)\n", "(250000, 1024)\n", "2000000\n" ] } ] }, { "cell_type": "markdown", "metadata": { "id": "uZBzEMsRHpsi" }, "source": [ "Iterating over the data array and selecting slices to operate on allows the transform to complete successfully! Each `torch.from_numpy` call is building a view of a portion the existing large NumPy data array. " ] }, { "cell_type": "markdown", "metadata": { "id": "Oe7X17vbHJRV" }, "source": [ "# Enter workflows\n", "\n", "The next section takes the same array and shows how workflows can apply transformations to tensors. " ] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "ymqr92kW9hxd", "outputId": "e4a4c7d1-be54-46c2-bc7c-dd849adfeb7e" }, "source": [ "from txtai.workflow import Task, Workflow\n", "\n", "# Create workflow with a single task calling process for each batch\n", "task = Task(process)\n", "workflow = Workflow([task], batch)\n", "\n", "# Run workflow\n", "count = 0\n", "for row in workflow(data):\n", " count += 1\n", "\n", "print(count)" ], "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "(250000, 1024)\n", "(250000, 1024)\n", "(250000, 1024)\n", "(250000, 1024)\n", "(250000, 1024)\n", "(250000, 1024)\n", "(250000, 1024)\n", "(250000, 1024)\n", "2000000\n" ] } ] }, { "cell_type": "markdown", "metadata": { "id": "B9qC8qUbHjfk" }, "source": [ "Workflows process the data in the same fashion as the code in the previous section. On top of that, workflows can handle text, images, video, audio, document, tensors and more. Workflow graphs can also be connected together to handle complex use cases." ] }, { "cell_type": "markdown", "metadata": { "id": "wCRD9ERoJvsG" }, "source": [ "# Workflows with PyTorch models\n", "\n", "The next example applies a PyTorch model to the same data. The model applies a series of transforms and outputs a single float per row." ] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "N9UTfSTTIDaO", "outputId": "000c7e13-a249-43d0-f419-28ddd62e8ba1" }, "source": [ "from torch import nn\n", "\n", "class Model(nn.Module):\n", " def __init__(self):\n", " super().__init__()\n", "\n", " self.gelu = nn.ReLU()\n", " self.linear1 = nn.Linear(1024, 512)\n", " self.dropout = nn.Dropout(0.5)\n", " self.norm = nn.LayerNorm(512)\n", " self.linear2 = nn.Linear(512, 1)\n", "\n", " def forward(self, inputs):\n", " outputs = self.gelu(inputs)\n", " outputs = self.linear1(outputs)\n", " outputs = self.dropout(outputs)\n", " outputs = self.norm(outputs)\n", " outputs = self.linear2(outputs)\n", "\n", " return outputs\n", "\n", "model = Model().to(\"cuda:0\")\n", "\n", "def process(x):\n", " with torch.no_grad():\n", " outputs = model(torch.from_numpy(x).to(\"cuda:0\")).cpu().numpy()\n", " print(outputs.shape)\n", " return outputs\n", "\n", "# Create workflow with a single task calling model for each batch\n", "task = Task(process)\n", "workflow = Workflow([task], batch)\n", "\n", "# Run workflow\n", "count = 0\n", "for row in workflow(data):\n", " count += 1\n", "\n", "print(count)" ], "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "(250000, 1)\n", "(250000, 1)\n", "(250000, 1)\n", "(250000, 1)\n", "(250000, 1)\n", "(250000, 1)\n", "(250000, 1)\n", "(250000, 1)\n", "2000000\n" ] } ] }, { "cell_type": "markdown", "metadata": { "id": "Q1ivX4eBuU8T" }, "source": [ "Once again the data can be processed in batches using workflows, even with a more complex model. Let's try a more interesting example." ] }, { "cell_type": "markdown", "metadata": { "id": "KoSB1mKzUnb0" }, "source": [ "# Workflows in parallel\n", "\n", "Workflows consist of a series of tasks. Each task can output one to many outputs per input element. Multi-output tasks have options available to [merge the data](https://neuml.github.io/txtai/workflow/task/#multi-action-task-merges) for downstream tasks.\n", "\n", "The following example builds a workflow with a task having three separate actions. Each action takes text as an input an applies a sentiment classifier. This is followed by a task that merges the three outputs for each row using a mean transform. Essentially, this workflow builds a weighted sentiment classifier using the outputs of three models. " ] }, { "cell_type": "code", "metadata": { "id": "JlCdVgo_LXOl" }, "source": [ "import time\n", "\n", "from datasets import load_dataset\n", "from transformers import AutoTokenizer, AutoModelForSequenceClassification\n", "\n", "class Tokens:\n", " def __init__(self, texts):\n", " tokenizer = AutoTokenizer.from_pretrained(\"distilbert-base-uncased-finetuned-sst-2-english\")\n", " tokens = tokenizer(texts, padding=True, return_tensors=\"pt\").to(\"cuda:0\")\n", "\n", " self.inputs, self.attention = tokens[\"input_ids\"], tokens[\"attention_mask\"]\n", "\n", " def __len__(self):\n", " return len(self.inputs)\n", "\n", " def __getitem__(self, value):\n", " return (self.inputs[value], self.attention[value])\n", "\n", "class Classify:\n", " def __init__(self, model):\n", " self.model = model\n", "\n", " def __call__(self, tokens):\n", " with torch.no_grad():\n", " inputs, attention = tokens\n", " outputs = self.model(input_ids=inputs, attention_mask=attention)\n", " outputs = outputs[\"logits\"]\n", "\n", " return outputs\n", "\n", "# Load reviews from the rotten tomatoes dataset\n", "ds = load_dataset(\"rotten_tomatoes\")\n", "texts = ds[\"train\"][\"text\"]\n", "\n", "tokens = Tokens(texts)\n", "\n", "model1 = AutoModelForSequenceClassification.from_pretrained(\"M-FAC/bert-tiny-finetuned-sst2\")\n", "model1 = model1.to(\"cuda:0\")\n", "\n", "model2 = AutoModelForSequenceClassification.from_pretrained(\"howey/electra-base-sst2\")\n", "model2 = model2.to(\"cuda:0\")\n", "\n", "model3 = AutoModelForSequenceClassification.from_pretrained(\"philschmid/MiniLM-L6-H384-uncased-sst2\")\n", "model3 = model3.to(\"cuda:0\")\n", "\n", "task1 = Task([Classify(model1), Classify(model2), Classify(model3)])\n", "task2 = Task([lambda x: torch.sigmoid(x).mean(axis=1).cpu().numpy()])\n", "\n", "workflow = Workflow([task1, task2], 250)\n", "\n", "start = time.time()\n", "for x in workflow(tokens):\n", " pass\n", "\n", "print(f\"Took {time.time() - start} seconds\")" ], "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stderr", "text": [ "Using custom data configuration default\n", "Reusing dataset rotten_tomatoes_movie_review (/root/.cache/huggingface/datasets/rotten_tomatoes_movie_review/default/1.0.0/40d411e45a6ce3484deed7cc15b82a53dad9a72aafd9f86f8f227134bec5ca46)\n" ] }, { "output_type": "stream", "name": "stdout", "text": [ "Took 84.73194456100464 seconds\n" ] } ] }, { "cell_type": "markdown", "metadata": { "id": "eBQzLrtAVUtB" }, "source": [ "Note that while the task actions are parallel, that doesn't necessarily mean the operations are concurrent. In the case above, the actions are are executed sequentially.\n", "\n", "Workflows have an additional option to run task actions concurrently. The two supported modes are \"thread\" and \"process\". I/O bound actions will do better with multithreading and CPU bound actions will do better with multiprocessing. More can be read in the [txtai documentation](https://neuml.github.io/txtai/workflow/task/#multi-action-task-concurrency). " ] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "AB0onoOlVT-e", "outputId": "a072d8a6-3b3b-4066-8881-8af9a8b96608" }, "source": [ "task1 = Task([Classify(model1), Classify(model2), Classify(model3)], concurrency=\"thread\")\n", "task2 = Task([lambda x: torch.sigmoid(x).mean(axis=1).cpu().numpy()])\n", "\n", "workflow = Workflow([task1, task2], 250)\n", "\n", "start = time.time()\n", "for x in workflow(tokens):\n", " pass\n", "\n", "print(f\"Took {time.time() - start} seconds\")" ], "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Took 85.21102929115295 seconds\n" ] } ] }, { "cell_type": "markdown", "metadata": { "id": "5s2KexhG_udx" }, "source": [ "In this case, concurrency doesn't improve performance. While the [GIL](https://wiki.python.org/moin/GlobalInterpreterLock) is a factor, a bigger factor is that the GPU is already fully loaded. This method would be more beneficial if the system had a second GPU or the primary GPU had idle cycles. " ] }, { "cell_type": "markdown", "metadata": { "id": "EoQFEi_61P9O" }, "source": [ "# Wrapping up\n", "\n", "This notebook introduced a number of different ways to work with large-scale tensor data and process it efficiently. This notebook purposely didn't cover embeddings and pipelines to demonstrate how workflows can stand on their own. In addition to workflows, this notebook covered efficient methods to work with large tensor arrays in PyTorch and NumPy." ] } ] } ================================================ FILE: examples/24_Whats_new_in_txtai_4_0.ipynb ================================================ { "nbformat": 4, "nbformat_minor": 0, "metadata": { "kernelspec": { "name": "python3", "display_name": "Python 3", "language": "python" }, "language_info": { "name": "python", "version": "3.7.6", "mimetype": "text/x-python", "codemirror_mode": { "name": "ipython", "version": 3 }, "pygments_lexer": "ipython3", "nbconvert_exporter": "python", "file_extension": ".py" }, "colab": { "name": "24 - Whats new in txtai 4.0", "provenance": [], "collapsed_sections": [] } }, "cells": [ { "cell_type": "markdown", "metadata": { "id": "POWZoSJR6XzK" }, "source": [ "# 💡 What's new in txtai 4.0\n", "\n", "txtai 4.0 brings a number of major feature enhancements, most importantly the capability to store full document content and text right in txtai. This notebook will cover all the changes with examples." ] }, { "cell_type": "markdown", "metadata": { "id": "qa_PPKVX6XzN" }, "source": [ "# Install dependencies\n", "\n", "Install `txtai` and all dependencies." ] }, { "cell_type": "code", "metadata": { "_uuid": "8f2839f25d086af736a60e9eeb907d3b93b6e0e5", "_cell_guid": "b1076dfc-b9ad-4769-8c92-a6c4dae69d19", "trusted": true, "_kg_hide-output": true, "id": "24q-1n5i6XzQ" }, "source": [ "%%capture\n", "!pip install git+https://github.com/neuml/txtai" ], "execution_count": 23, "outputs": [] }, { "cell_type": "markdown", "source": [ "# Content storage\n", "Up to now with txtai, once text was vectorized, it was no longer possible to trace back to the input text. Only document ids and vectors were stored. Results consisted of ids and scores. It was the responsibility of the developer to resolve matches with an external data store. \n", "\n", "txtai 4.0 brings a major paradigm shift. Content can now be stored alongside embeddings vectors. This opens up a number of exciting possibilities with txtai!\n", "\n", "Let's see with the classic txtai example below." ], "metadata": { "id": "0p3WCDniUths" } }, { "cell_type": "code", "metadata": { "_uuid": "d629ff2d2480ee46fbb7e2d37f6b5fab8052498a", "_cell_guid": "79c7e3d0-c299-4dcb-8224-4455121ee9b0", "trusted": true, "id": "2j_CFGDR6Xzp", "colab": { "base_uri": "https://localhost:8080/" }, "outputId": "641a6b9e-dfe4-4c77-94d2-00f87c84b8f5" }, "source": [ "from txtai.embeddings import Embeddings\n", "\n", "data = [\"US tops 5 million confirmed virus cases\",\n", " \"Canada's last fully intact ice shelf has suddenly collapsed, forming a Manhattan-sized iceberg\",\n", " \"Beijing mobilises invasion craft along coast as Taiwan tensions escalate\",\n", " \"The National Park Service warns against sacrificing slower friends in a bear attack\",\n", " \"Maine man wins $1M from $25 lottery ticket\",\n", " \"Make huge profits without work, earn up to $100,000 a day\"]\n", "\n", "# Create embeddings index with content enabled. The default behavior is to only store indexed vectors.\n", "embeddings = Embeddings({\"path\": \"sentence-transformers/nli-mpnet-base-v2\", \"content\": True, \"objects\": True})\n", "\n", "# Create an index for the list of text\n", "embeddings.index([(uid, text, None) for uid, text in enumerate(data)])\n", "\n", "print(\"%-20s %s\" % (\"Query\", \"Best Match\"))\n", "print(\"-\" * 50)\n", "\n", "# Run an embeddings search for each query\n", "for query in (\"feel good story\", \"climate change\", \"public health story\", \"war\", \"wildlife\", \"asia\", \"lucky\", \"dishonest junk\"):\n", " # Extract text field from result\n", " text = embeddings.search(query, 1)[0][\"text\"]\n", "\n", " # Print text\n", " print(\"%-20s %s\" % (query, text))" ], "execution_count": 24, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Query Best Match\n", "--------------------------------------------------\n", "feel good story Maine man wins $1M from $25 lottery ticket\n", "climate change Canada's last fully intact ice shelf has suddenly collapsed, forming a Manhattan-sized iceberg\n", "public health story US tops 5 million confirmed virus cases\n", "war Beijing mobilises invasion craft along coast as Taiwan tensions escalate\n", "wildlife The National Park Service warns against sacrificing slower friends in a bear attack\n", "asia Beijing mobilises invasion craft along coast as Taiwan tensions escalate\n", "lucky Maine man wins $1M from $25 lottery ticket\n", "dishonest junk Make huge profits without work, earn up to $100,000 a day\n" ] } ] }, { "cell_type": "markdown", "source": [ "The only change above is setting the *content* flag to True. This enables storing text and metadata content (if provided) alongside the index. Note how the text is pulled right from the query result!" ], "metadata": { "id": "hHGvhZm-ZTzL" } }, { "cell_type": "markdown", "source": [ "# Query with SQL\n", "\n", "When content is enabled, the entire dictionary will be stored and can be queried. In addition to similarity queries, txtai accepts SQL queries. This enables combined queries using both a similarity index and content stored in a database backend." ], "metadata": { "id": "BYWUFBUGyKyY" } }, { "cell_type": "code", "source": [ "# Create an index for the list of text\n", "embeddings.index([(uid, {\"text\": text, \"length\": len(text)}, None) for uid, text in enumerate(data)])\n", "\n", "# Filter by score\n", "print(embeddings.search(\"select text, score from txtai where similar('hiking danger') and score >= 0.15\"))\n", "\n", "# Filter by metadata field 'length'\n", "print(embeddings.search(\"select text, length, score from txtai where similar('feel good story') and score >= 0.05 and length >= 40\"))\n", "\n", "# Run aggregate queries\n", "print(embeddings.search(\"select count(*), min(length), max(length), sum(length) from txtai\"))\n", "print()\n", "for x in embeddings.search(\"select count(*), min(length), max(length), sum(length), text, score from txtai group by text limit 10\"):\n", " print(x)" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "aPH-dnV2ZuL1", "outputId": "f5e45e94-15c1-4635-8050-66c17c572dbb" }, "execution_count": 25, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "[{'text': 'The National Park Service warns against sacrificing slower friends in a bear attack', 'score': 0.3151373267173767}]\n", "[{'text': 'Maine man wins $1M from $25 lottery ticket', 'length': 42, 'score': 0.08329004049301147}]\n", "[{'count(*)': 6, 'min(length)': 39, 'max(length)': 94, 'sum(length)': 387}]\n", "\n", "{'count(*)': 1, 'min(length)': 72, 'max(length)': 72, 'sum(length)': 72, 'text': 'Beijing mobilises invasion craft along coast as Taiwan tensions escalate', 'score': None}\n", "{'count(*)': 1, 'min(length)': 94, 'max(length)': 94, 'sum(length)': 94, 'text': \"Canada's last fully intact ice shelf has suddenly collapsed, forming a Manhattan-sized iceberg\", 'score': None}\n", "{'count(*)': 1, 'min(length)': 42, 'max(length)': 42, 'sum(length)': 42, 'text': 'Maine man wins $1M from $25 lottery ticket', 'score': None}\n", "{'count(*)': 1, 'min(length)': 57, 'max(length)': 57, 'sum(length)': 57, 'text': 'Make huge profits without work, earn up to $100,000 a day', 'score': None}\n", "{'count(*)': 1, 'min(length)': 83, 'max(length)': 83, 'sum(length)': 83, 'text': 'The National Park Service warns against sacrificing slower friends in a bear attack', 'score': None}\n", "{'count(*)': 1, 'min(length)': 39, 'max(length)': 39, 'sum(length)': 39, 'text': 'US tops 5 million confirmed virus cases', 'score': None}\n" ] } ] }, { "cell_type": "markdown", "source": [ "This example above adds a simple additional field, text length. Starting with txtai 4.0, the index method accepts dictionaries in the data field. \n", "\n", "Note the second query is filtering on the metadata field length along with a similarity query clause. This gives a great blend of similarity search with traditional filtering to help identify the best results." ], "metadata": { "id": "oH4Yd9BOlo5u" } }, { "cell_type": "markdown", "source": [ "# Object storage\n", "\n", "In addition to metadata, binary content can also be associated with documents. The example below downloads an image, upserts it along with associated text into the embeddings index." ], "metadata": { "id": "lGmiYXyqyjtQ" } }, { "cell_type": "code", "source": [ "import urllib\n", "\n", "from IPython.display import Image\n", "\n", "# Get an image\n", "request = urllib.request.urlopen(\"https://raw.githubusercontent.com/neuml/txtai/master/demo.gif\")\n", "\n", "# Upsert new record having both text and an object\n", "embeddings.upsert([(\"txtai\", {\"text\": \"txtai executes machine-learning workflows to transform data and build AI-powered semantic search applications.\", \"object\": request.read()}, None)])\n", "\n", "# Query txtai for the most similar result to \"machine learning\" and get associated object\n", "result = embeddings.search(\"select object from txtai where similar('machine learning') limit 1\")[0][\"object\"]\n", "\n", "# Display image\n", "Image(result.getvalue(), width=600)" ], "metadata": { "id": "Ef4-Gd8ZtzUF", "colab": { "base_uri": "https://localhost:8080/", "height": 420 }, "outputId": "a633fb4d-ff88-4a8f-ca15-673e9f8dec30" }, "execution_count": 26, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "" ], "image/png": "R0lGODlhmwQYA/UAABITFMzMzBMUFb29vYKDg8DAwLGxsY2OjltcXLu7u4SEhcvLy8fHx8TExJWWlqWlpaenpyYnJ6Kjo6qrqzk5OkdHSD4/PywtLri4uCAhIq2urhobHJ6en1ZWVyorLEtMTJCRkZqbm3Fxcnd3eGRlZpaWlxYXGIqKimJiY15eX1dYWWNkZH5+f1BRUW5ub2pqa19gYLa2tnR1dYeHh1RUVX9/gGdnaKOjpDs8PLS0tHp6epKTk5iYmZ+fn7i4uSEiIiH/C05FVFNDQVBFMi4wAwEAAAAh+QQEFAD/ACwAAAAAmwQYAwAG/0CAcEgsGo/IpHLJbDqf0Kh0Sq1ar9isdsvter/gsHhMLpvP6LR6zW673/C4fE6v2+/4vH7P7/v/gIGCg4SFhoeIiYqLjI2Oj5CRkpOUlZaXmJmam5ydnp+goaKjpKWmp6ipqqusra6vsLGys7S1tre4ubq7vL2+v8DBwsPExcbHyMnKy8zNzs/Q0dLT1NXW19jZ2tvc3d7f4OHi4+Tl5uforAHr7O3u6fDx8vP0x+737fX6+/z9/qP4Av4bSLCgwYN/AuJDyLChw4cQtyi8F7GixYsY63BYcKERhXVODgSwQGbiu4woU6pcqaVCgAOOPhYQ0OSjBCs7TL7UyY6lz/+fQIMKmTAy5gIGTxws+FAlp0kQPEEKnUq1KkKXDx7JfPJhQQiaVDKIzWBhHcmxG6IGsMq2rVt5IklolQp2SYEAHbHIpFBE7du/gANzY4A3iQUQGI5O0MHXyAelDDAo8OC4BtHIJxobOdpxRA7MlIkcTlyAQwoTYLe2kNCgwIEIdYsoCOAiy97YfgXr3s1bWdcYSTocVbwOhJET+M4KKYtPxZGjFi6zq0BkRPMhMle4M4D6SIoAN2ybxR21t/nz6HdZN45kwEuxACKkkCEb+gYBEZAnuL88RAfYQoiwTl5EKFaADRd4cBh1Aa4jA4AUgGADduzAgNp3ATh3xFZ6jVf/YHnphSjiiKpsVNuGdCERgX1EQJDhEiEsMGFfHj5HW2xF4OBgXVCdgOMQNVZx24c8kWjkkUhuYsCLR6xYWBI2gIejC+8pISABmy2gwI9CqJAiik82eFN7TFoxJJAgJqnmmmwmchcNSSxpAAwE1ocBB3jmuWQPRbzA2nDFZdkClwDUUKVhX7YgJZmnXXGmELm1KemklOZBGFNcUpBAOyBgOkQPgN4DAXYxrHMnnqXCRKNmx720BIdDKDoBoRqUKWSQAERa6a689opGqRoGN8OSR4nQooxMKKUBq1Sy92Gd9WGpBKxdLnpEYsHeGiakafrq7bfgYkGUXDVBBiAAPRKK/yarhR66bhKGfjVtotYK2uG2uXYb7r789nuESCxA8eYQL9QLZndDLKnqs8F9aQS1AMjKpZPQTvFovkX6q/HG+6LglcBFCeHBUQg0KfKARCjqLrcVf3giEhBLfPC97OrK8c04SwoxqQgAmN+X1iErsoCeEiVBaF7e5ey7UDoIoQIzAhDzAhogcaV4+Nqc89Zck1grgzm2c9c6g9aXHIXu3HD1qksE7Q65UtNb9bW2aluzvl3nrTdvBc/AJQ3D3hVDZjAjFhkENSjXn+DGNptlyzkeQFoPKPAXdwNFyBz2TFjfnfHeoIcOWGSWd0LlCOpmobXorLf+k3WVf7IpfGGs7v/67bhfVAAGqVPi8cu145378MQz9IEIkFuCAPDBf17889BHP4bt0ldv/fVOUI/99txjr3334Ic//Pfil2/++einr/767Lfv/vvwxy///PTXb//9+Oev//789+///wAMoAAHSMACGvCACEygAhfIwAY68IEQjKAEJ0jBClrwghjMoAY3yMEOevCDIAyhCEdIwhKa8IQoTKEKV8jCFrrwhTCMoQxnSMMa2vCGOMyhDnfIwx768IdADKIQh0jEIhrxiEhMohKXyMQmOvGJUIyiFKdIxSpa8YpYzKIWt8jFLnrxi2AMoxjHSMYymvGMaEyjGtfIxja68Y1wjKMc50jHOtr/8Y54zKMe98jHPvrxj4AMpCAHSchCGvKQiEykIhfJyEY68pGQjKQkJ0nJSlrykpjMpCY3yclOevKToAylKEdJylKa8pSoTKUqV8nKVrrylbCMpSxnScta2vKWuMylLnfJy1768pfADKYwh0nMYhrzmMhMpjKXycxmOvOZ0IymNKdJzWpa85rYzKY2t8nNbnrzm+AMpzjHSc5ymvOc6EynOtfJzna6853wjKc850nPetrznvjMpz73yc9++vOfAA2oQAdK0IIa9KAITahCF8rQhjr0oRCNqEQnStGKWvSiGM2oRjfK0Y569KMgDalIR0rSkpr0pChNqUpXytKWuvSl/zCNqUxnStOa2vSmOM2pTnfK05769KdADapQh0rUohr1qEhNqlKXytSmOvWpUI2qVKdK1apa9apYzapWt8rVrnr1q2ANq1jHStaymvWsaE2rWtfK1ra69a1wjatc50rXutr1rnjNq173yte++vWvgA2sYAdL2MIa9rCITaxiF8vYxjr2sZCNrGQnS9nKWvaymM2sZjfL2c569rOgDa1oR0va0pr2tKhNrWpXy9rWuva1sI2tbGdL29ra9ra4za1ud8vb3vr2t8ANrnCHS9ziGve4yE2ucpfL3OY697nQja50p0vd6lr3utjNrna3y93ueve74A2veMdL3vKa97zoTf+vetfL3va6973wja9850vf+tr3vvjNr373y9/++ve/AA6wgAdM4AIb+MAITrCCF8zgBjv4wRCOsIQnTOEKW/jCGM6whjfM4Q57+MMgDrGIR0ziEpv4xChOsYpXzOIWu/jFMI6xjGdM4xrb+MY4zrGOd8zjHvv4x0AOspCHTOQiG/nISE6ykpfM5CY7+clQjrKUp0zlKlv5yljOspa3zOUue/nLYA6zmMdM5jKb+cxoTrOa18zmNrv5zXCOs5znTOc62/nOeM6znvfM5z77+c+ADrSgB03oQhv60IhOtKIXzehGO/rRkI60pCdN6Upb+tKYzrSmN83pTnv606AOtaj/R03qUpv61KhOtapXzepWu/rVsI61rGdN61rb+ta4zrWud83rXvv618AOtrCHTexiG/vYyE62spfN7GY7+9nQjra0p03talv72tjOtra3ze1ue/vb4A63uMdN7nKb+9zoTre6183udrv73fCOt7znTe962/ve+M63vvfN7377+98AD7jAB07wghv84AhPuMIXzvCGO/zhEI+4xCdO8Ypb/OIYz7jGN87xjnv84yAPuchHTvKSm/zkKE+5ylfO8pa7/OUwj7nMZ07zmtv85jjPuc53zvOe+/znQA+60IdO9KIb/ehIT7rSl870pjv96VCPutSnTvWqW/3qWM+61rfO+/Wue/3rYA+72MdO9rKb/exoT7va1872trv97XCPu9znTve62/3ueM+73vfO9777/e+AD7zgB0/4whv+8IhPvOIXz/jGO/7xkI+85CdP+cpb/vKYz7zmN8/5znv+86APvehHT/rSm/70qE+96lfP+ta7/vWwj73sZ0/72tv+9rjPve53z/ve+/73wA++8IdP/OIb//jIT77yl8/85jv/+dCPvvSnT/3qW//62M++9rfP/e57//vgD7/4x0/+8pv//OhPv/rXz/72u//98I+//OdP//rb//74z7/+98///vv//wAYgAI4gARYgAZ4gAiYgAq4gAzYgA7oSUEAACH5BAUNACYALJwADAAdACAAAAahQIBwSCwGjshkssgkKp/IphQKlTapTysTq9QauVHvEBwWA8hHsxAdUJ/RxZGGUZAgBGO40FJ4gvBvZEIRAwEHF0IfCQEkeGxCBIZFHwEYjnoNARZbAYiPF0cSHKOkdBWBYAB8XKePFWlan7BWjwKZHbF6CpUbTIi2eh59EJuJIQ9reqqFTxCXgkQiE8MljcnQZrXZymLa3dxe3uHgudjfZEEAIfkEBQ0ABAAsqwAMAB0AIAAABotAgHBIJAaOyKSyyBwqn8lmcwGFSplV69WYXXaUFmw3WuGYqeHiOErEBNLcdaCYeIvldLtaPm/r42t5cE58gneBfoNCfH2EigBUeImHY4Z7kkN1j4yWgJWTl4hDEgEklF1FIgExFwKeqJcDCRCLhUUXIAVHPrWYW5C2v8C+W5Giv5zCw8fFwcjOzXJBACH5BAUNAAQALLkADAAdACAAAAaHQIBwSCwGjshkssgkKp/IphQKlTapTysTqxQakpwrNwo4cTjf8HZ8LKYCaiM74IaL2fW4c56/j/treERvekNzdIN2gX+JhUKHgHKCQ4R+XJF7k0KVi5eNllhFFW0CpYZ8ciKmp5qbSBgDHY+obhoLRwizrVqQWpmMvrrAwb3BAMXEtMm7VodBACH5BAUNAAIALMgADAAdACAAAAJFhI8Yy+0Jj5stRootxFQnPnkKWIkGWZooY55r0AIv3M6xXb+3nq+730P9hEHS0FgEHZVJztLZzEQ701D1cU2JcCped1UAACH5BAUGAAMALNYADAAdACAAAAahQIBwSCwuAsikMllsEpdQptMZjU6p1eW1mdVun13pV3gMB8ZDMxJNVrMB5TDAEehwGI8LZYL3tM1zSxMxBUchf3J0hxeGJh4NCxtwbnQIQnwolwEXk4B0FZofQp+diQEWo6cCgaBxXaypoLBqZ6QCO6qzlLm2B6q0sMG+osC9vL+7qMLInseywwLFzrGlr2yuWW/A127cgN5y4NZo2FXaakEAIfkEBQYABAAs5QAMABwAIAAABqNAgHBIJAaOyGSyyBwqn8hmEwqVMqlPaxGr1Bq5Ua8QHBaTj+LxOQ04B4ikW4GhEW2ca0Bk8tRE1GRCEgETFgICFA8BDoBgAi2Ed0MRDAEXbWslAXZFHAEpmIEDhBylphiboY5gI6pcJkeGVm6YLVq0PAEKt2sdR5dFHoe0AoMDNJIWCgx3xHtUzXlCKZ50EiLAxGbSXtrd3LyBad7hjuPgs2dBACH5BAUNAAIALPMADAAdACAAAAJFhI8Yy+0Jj5stRootxFQnPnkKWIkGWZooY55r0AIv3M6xXb+3nq+730P9hEHS0FgEHZVJztLZzEQ701D1cU2JcCped1UAACH5BAUGAAgALAEBDAAdACAAAAaxQIBwSCwGjshkssgkKp/IphQKlTapTysTq9QauVHvEBwWA8hHsxAdUJ/RwwphMljEThQnHGBJFpAqY3s4IR0RQyJHHmt7Ak0hASSMZFoyAQSTYEQvEnVJJ45sQh4xRzE3HBylB5lcQg4BGnmIl61YjLNDBLVvlBdpRQa8ohGKRB9HILZUQnQSF0IqDH+svZp8C0o3lsrWrqMlfwYiGy7DjWKiaupm7OnoXu7x8Fry9WhBACH5BAUNAAIALBABDAAdACAAAAJFhI8Yy+0Jj5stRootxFQnPnkKWIkGWZooY55r0AIv3M6xXb+3nq+730P9hEHS0FgEHZVJztLZzEQ701D1cU2JcCped1UAACH5BAUGAAMALB4BDAAdACAAAAaKQIBwSCwGjshkssgkKp/IphQKlTapTysTq2y2JIwjT8hdFj/IWGxGLh+ZoYBCYHQHmJhAZWvnW/huQjZYIW2BAl8cHEcTihwjhmV+gJJ1OJRck3WHTnqYWJqdnEMFnpuVnZenmZafVKGkfa2roLOiqLGqt6xasaO9dne9vrjAssMAwciRvMPKy8FBACH5BAUTABQALC0BDAAdACAAAAahQIBwSCQGjsikssgcKp/JZhNKlTKpUGsR+9QauVGvEBwWk5Hi8TmQBqzZw48jsYgpLs718AS1qM9CMgEYKhsAEXwYhm+HDAs4RRMBKW56NgESTC4BB5WAIIMcoqMGmJ5kABxcD6dgqZNajKAEsZamVoweC7BMEa1cgUcuGUIXIgMVv1h7VMmMcSAYjhA1fspVac9metnc24Dd4N+o4eTjYEEAIfkEBRQACAAsOwEMAB0AIAAABqRAgHBILAaOyGSyyCQqn8imFAqVNqlPKxOr1Bq5Ue8QHBYDyEezEB1Qn9FEyinGGDgsTrgQtXiSxnoVRy4Ze30Ua3oaAX9EOgEHiWQAFGlFFgEYkmAAfAwcoKESlmwCIpxvk6chXqV8Gq1wOEceWqUCBpBNF5tcQh9HJ7wAETAaI71YQ55+AqmoQhcEuQUaBzQbyVRut2bdYt+xk2rhtnre5+BoQQAh+QQFEwADACxKAQwAHQAgAAAGiECAcEgkBo7IpLLIHCqfyWYTSpUyqVBrEfsEKJZXbtTGKR+nYiTxcg6nA+v29g0fsuvzdxxvpO/RenZyfYFCd4Bpf26JgnxOfo2IYop5jIaDj4UAHphCdI4LARSLk0QcASEblaVDFqELGBgEnpBEFhIMRye0mlqfWoSWwL/AvMK+tcPJyL1Wn0EAIfkEBQYAAgAsWAEMAB0AIAAAAkWEjxnL3QmRmyxaSm3EU0PueApYiQdZmgC6qAYbuCsrwzXt2jmu6j1v8gWBImGR6DEmkRplk3mBbqQf6gh1w+60P+4QVQAAIfkEBQYAAwAsZwEMABwAIAAABsBAgHBIJAaOyGSyyBwqn8hmEwqVMqlPaxGr1Bq5Ua8QHBaTj+LxOQ04B4aWUKEQoqACITVZ+FkoCXh6YGo7EQAeEn55bWsuARgmQxEMgYx7NwEyRQ6VbowtRTKda0cWoaN7paeLng0BoEQiqIOJmkQls1wAjjECvoeUrGsZcwcbhxzBgroALUgxfiy5WHBydHbTVFaiNcvUUg+v3trNKhm/0j7HloO7RxgafgWm41UAFgTxCzEKF197aTwFHGbmTBAAIfkEBRQABgAsdQEMAB0AIAAABqJAgHBIJAaOyKSyyBwqn8lmE0qVMqlQaxH71Bq5Ua8QHBaTkeLxOZAGrNlD16TRmLxMzvVwrnxs1GdCM0cVAgIVBQEEgGQAEQsBLUQdRxluepQFeF+SbwAyARyGRA8BIpeBBKFMDoqojaogrK6eoBKzp56Zo3mdeo+Rk5WvYIKEhoiuxFx7TxB/y1hEL3x20NFVaZ7aetyB3o3gxeLM5NLmVEEAIfkEBQYABAAsgwEMAB0AIAAABqhAgHBILAaOyGSyyCQqn8imFAqVNqlPKxOr1Bq5Ue8QHBYDyEezEB1Qn9FEHAizwBwoTrhQtXh2xnoXfScZAhEHRx5rejUBGgJEBgEji2RCkjZFL46VYIsWRRVpb5YRiaGnbACmAReoraSeG0egcaOqADkBJJqcsVxCLL5DkjqdwKsMAYSriAyuv1hDfH6AlkMWc3UgeNayZrjgeuLX5N9i4ejj6uXsYEEAIfkEBQ0ABwAskgEMAB0AIAAABp5AgHBIJAaOyKSyyBwqn8lmcwGFSplV69WYXUovR2HX2wQvxGPk1YxOB9ZhgDtejs/fQs8Jw5jQ2HJzeQNKBHaCAA8BExdCJ2qBbgAWR41DGoeSMotFL5lpiQEsRRWfY6EipKZdqKpnkaAOoq5toJsSnatZk5VEBrpVQoqMjpB3g4XAWgB6CX0tgMdbTojTsKfW16zZ0tbd099b4Vd3QQAh+QQFDQACACygAQwAHQAgAAACRYSPGcvdCZGbLFpKbcRTQ+54CliJB1maALqoBhu4KyvDNe3aOa7qPW/yBYEiYZHoMSaRGmWTeYFupB/qCHXD7rQ/7hBVAAAh+QQFDQAJACyvAQwAHQAgAAAGoECAcEgkBo7IpLLIHCqfyWYTSpUyqVBrEfvUGrlRrxAcFpOR4vE5kAas2UIPwcBgGE4fgfocHyAxC0dOa0IgAQ8XQx8Hg3wAgHl6V4SPAQhWb4VHLhSSX44ZD0k9JCaSmYosokccp5RMMkcte2RWDgEjtGC2uLpcQjGcki9HFr5YtH6bjbUAFSMPgAM8kcdVaahmr17Z3Nta3eDfmONSb0EAIfkEBQYABAAsvQEMAB0AIAAABo1AgHBILAaOyGSyyCQqn8imFAqVNqlPKxOr1Bq5UeLLFbmCj1+c+ZwWbM+B9pv94pYB8HiLw18EJnx8GUJ5cl9sTgFqc2CGiYhDBYprjYmLh5WRk4xcjpqQhJuYnUQxAS2UpEMgfxucWEUWfgwaDxODeHBMFRwDs3eFXp+ZwsHCuaBexsW6x8jEys3HeUEAIfkEBRMABwAszAEMABwAIAAABqJAgHBIJAaOyGSyyBwqn8hmEwqVMqlPaxGr1Bq5Ua8QHBaTj+LxOQ04B4YRQo4x4LS+ZCGl8CQ41yYGARMXQi9Hd21rLUceRAqDanmQDkV7ARGKeTcBIlcBFppgAIIon3dupJ2nolyqnlugrVgAlJaNs1QAjAuOQ5ATkqOBg4UmNojCrgCXSgp/eUJxMQsFdnjDZoDa0dzZXqlp4d7L5LTiZ0EAIfkEBQ0ABAAs2gEMAB0AIAAABoJAgHBIJAaOyKSyyBwqn8lmE0qVMqlQaxH71Bq5Ua8QHBaTkeLxOZAWrNnDUAAlSE0Yg5DlTZTDTkojfHEBcjUUERYuIoNCcnNXa30BMQKRZ5MslluShCRTnY6Ql2STMKCYnqilqqRgpquvrZyponSuXG2NXrtavVa/UsGxuWnDuFhBACH5BAUUAAkALOgBDAAdACAAAAatQIBwSCwGjshkssgkKp/IphQKlTapTysTq9QauVHvEBwWA8hHsxAdUJ/RgFaaCQoQ3uR14LPd48FCNXZFCAEGenkAFHNDHAEuiIBCGn5CEQsLHpFcQzYBHJ2fY3CWmBGTlX+cjY+KjKpYRHIaADKDo4mjFDGpsFRFgjeGAsSbsUSLRzJfuawBFMySRLYTfc2oNtbSQskZ2qusB1ekIryvTuRHHNDj11psbvBqbEEAIfkEBa0AAwAsDAAMAPkBQAAABv9AgHBILBqPyKRyyWw6n9CodEqtWq/YrHbL7Xq/yYF4TC6bz+i0es1uu9/wuHxOr9vv+Lx+z+/79WCBgoOEhYaHiImKi4yNjo+QkZKTlJWWg3+ZmpucnZ6foKGio6RqTqWoqaqrrK2ur7Bmp7G0tba3uLm6frO7vr/AwcLDm73Ex8jJysu4xszP0NHS03bO1NfY2drENU/b3+Dh4q7W4+bn6OlvKmTl6u/w8eju8vX298MAaPT4/f7/tUJEAUiwoEFSAdLwO8iwocM9Cx9KnEixzAwpFTNq3GgB48aPIAESUNgkpMmT8fSVETAQpcuX8iLCnElzmsyaOHP6IkFFp8//n8Ii9ARKtOivm0aTKh2FdKnTp5maQp1Ktdqlq1izat3KtaukqmDD9nESoKzZs2e9ql3Ltq3bt13QyjULt67du3jzXp07V6/fv4ADC6bCV+7gw4gTK65bGO3ix5AjS3bUOO3ky5gzayZcuezmz6BDb+7sWbTp06gDkw6QurXr12tXw55Nu3Yk2bZz694NBjfv38CDP/EtvLjx4sSPK19eOznz59BNO49Ovfrk6dazax+Mfbv373a7gx9Pvqv48ujTUzqvvr17Rezfy5/fmzT9+/gHxc/Pv3+T/f4FKOAQAA5oIH8FHqjgfAku6KB6DT4o4XgRTmihdhVeqGF0GW7oBKFyQQAAIfkEBV0APwAsDAAsAFwCYAAABv9AgHBILBqPyKRyyWw6n9CodEqtWq/YrHbL7Xq/4LB4TC6bz+i0es1uu9/wuHxOr9vv+Lx+z+/7/4CBgoOEhYaHiImKi4yNdDEBFY5xkJJ2Iwp4LRIMAQEhgphzBp6eHJNxJwEyVjieAmSdkUUYniRVlahvuZczSRYvDgOeFEUhC6vGplUfpTExvoGiRxcyDwULCSCWRM2l3yNTBxwcpKdZLy4RSzgiHYURLi9NCsnMr1vxLkayIN2lK6qEMGBBl5tyBXshiYCs4SwiPErBGoLtk0B7hKYVqeAQ3MZvHqvAsJgFG44lJBY4KEQBH0oDt+4FmJilJQN+AUhtGMIiJ0b/g0DNxNPQ8MGIC0QYcnBHTFmOhwCaQdqBCyqgoUWPDmnGQcWFCBU42PpnIIPZsztFksRiEuVaQTbNWHBZk66QTiOgYnggNmDQv2PmZssRQ+wCGwubQlyF8QCDvFSptA0kWMMzw4iFXChGKxvZLineWpmcZIVoQHHLpNaymmIkBv6iruq7lcWEYdA4//vGjYgNiy9IYQh35IOwbAqQRmnZNUEBVnkHzOs2rkEDDVqJxN1k/YCHKPVEJJGxgIAQwaV6D/m90oVw4kU8EIB0/cQHycmmpbhPTfGQiDaUY8JdDohwmlsgFYXTVyMI5103PWBjAAvrDMGcac8JEN10UZAC/90JQuyH4IDn5QTagUNEUAN9XRXxQkcgVQiALAn+dN4JLCbkokVhSZiCEETFpJ0nMjZxXI05FNERK0oEhxsBusk202ISpLUeJwxgZyUARyYYw4yRgHATADoE4AFtUiaowkbkGKZeiJ+I6VA0iyWooxM2fdOlji96GaVNKTmkwZZMoIAilwEgcCM5nbwJAHtyNgRiUhUVUItnUlyAqUZJaGoVojYYWlALibpwKBKbMJpTm/C5ZsFtDvFHpp8WJrjncptm4gQCdEl14jn9JWjeVm2a0iawXJIj3LEcwgmjkOt9Eto3EMA5gRHklQBesXGSM+l/qjIZbIKf/afSlhHAKv+ojNaoeiyIjdIwy16g3tgBu/515uhvjcJCQL7kYfAOACqaCIVNBakySw0YkWBfWheIWVat2aCw07RrPtFaZ8oRwcuOWVry7wIdI3pDx8ZNIZgQnCb2aQnJJKPKBqbGNgVpRyBzzAtfWfCwlDFRcNu1JS7cFMPi4mlwy0r8+8Bulw5X5BShIdvZBMr12ULOnxphGg+/2HJWnyf55lMIFYCVQmwMmakvFRsjEd5CkGjATQpfUkziMU87lZDQp8XdL2xzzYMmTYtB63HXj3qytWuKaobMnQJAkOjBBvsqm82Iu0oxC4t964R1U0teQOcAfGy2VbVEDiTjmR5NZ6f51sv/yQYNgFJzVWUj0VCUiw9rOn8t5a05R5w30bYkTLuccdEgYQB8FFWjisx3PB2I8xFfJxHkjgeAnDxNwiS9Mtx2yW1jtAOYFfaUwhBNlvumlwyA4Eb/+93h3K//+r52EZO42IOtUxmBOSUa1P0MyAkaUCxK3YOCWPhjoH1s7giqYx/iBLg4113BMY/hgehcBrydNa5MazJQZG4Gu7uUh4TKqEECZ7jAKoHHEzUIwQiTEJEbIA4YFlhHNZCRNypU7wj1WMn8FuQWUASLUHPBAMjsVy6W+U9pY1rC3DB4Rb1JjH7mEo8yfjSkLHquA8uo15UqhUMkZNBaRfgX6IQgsWMp/+tcGsMjAmuYlAZZ5xtkXOCUtmLAAopxaFZMmucK2LfszdGKz5meOL4hNdq9zB4NMcvu8NM7rj3ugJgymw33SMpCHiFSOYFSk9JXnLEYEUXXECMRagemTvZPW/2DjR2vMUg4KmEYOtoe5szIwyvS8iOfcBwXNbDLWsiyfm/DnT3Q5AH6FOaO4eMiAJXoSHPVKI1KO0cpqwU9A0hAVTDQWxV3RRKf7OQYjnpj47iZyM6UQgJ+qQIFrMEbGCbudU7cJAsppyTYofF0O1LgOCUnv+XwMz3WY4AtvYdHqsHyctFcHEHBp75v2kWg6huW5tC3AHr4z1OlQ2ZRwOhCj7ZKkP9MfBSFXheT40AQRfIkIBF68khONEufJFmoCx8URnUSUoFPaJtsVDCvXmr0lnFcXwVso6ArYIICNg1WCTE6VUuAtEMtrGVEGclQGmqqoVK4KswYUDpvOEp9NrSo1SCHk08usjQoUiE7x/cRhHIQqMQMqVbZYYpqBmCHx3xiRv8pTeD9i6+pY5xOu0nHLmJOnEHNXPpIYTFoHlUKT5GPFKVzVjdKVjQ8bZoyrTANzg6WsRzl5C/d1sqSJrSsfMwtFVp7GGQ68AlJVMtcaVrXmOIVl14zWKEKSZStdYKKwzSpIodHWLcJxq5ibQL+JEkblHYGsjlFbcOU24rM+jC3vDr/b4pcCdN1OsExH5hQskg1u6cml56zeiQSymfVaDjmpdSFrSh5N1voEqx2xxiWUEtbhWn8tWjplGBFpcDURkYVv4IpnTBjC0qSNeGrt6zwFTwlXdNa8H0T6Un7QodF4zI2X6TC6Wnxm1rTeTBFlzWrZhNAIkh2tr2fpR5j3sHUGWDUCOGlsWXV2OBJ3UZxAS7qfQmchOtRVHiCJJ55cbtbXT2ZYidmZ1ir2+PdYA+S2mzilanBYUt+2QoesLIWrxiwlBp1A+Yw8wE7F2fa3pWxQ8MeU7EB3hlHVb8bct8Q34qEBS+AaG0bVgYCduSNkQqpeXzugeWMZEPv9CcxUEe0/8ZcHA1EThSBsp9ZEAZGmGWmzWDd6J+LMOigBRq3QtWYqXeCasV4yjxoYal00qY2IqIOCng5tjmyxl4qEcq3ET2Bcoby0hdBVhmsBO2SmYwTgkykA5h2RS/F7UEsfdJntu3MSzVtBDShpyJGWUu6uDWBNv14svWsE4xkpd0t69ZUIKHqjy0dSmQrlygI1cyxOlFvctxbvPoVlKRU5oxaOKe3i0NSZV89YNlW2c9IpBWu/Z3pnFicvfz6pm6+ielXemIAAivjkhpdFL6k1DDXDHPjilKKapsyxteGgoEGAwHOgaBNF++Bt9jkJfdGp2Pp+uazhw7zemd3jLXxEc3k7f/R8SpZvzeyuFFkHTbM9o3BIeLlA9ZkQs/6Eqzw6QmyPPVNHUTr691okNj57VB1DcABBFVX9DYOVY/7DuSlllANiuRotCrvoTq7U6BUTizhFACfz6Yawo+smfnobGCowjniefKUNsbH8/XB7jxHR+pd+T3jGseW2pPjXjyLZm24waeBV/B6wPg+FTv8vRhYEHzht4EjCTe+8pdPh+YxnwvOf/4ZhAF26Vv/+mOIPvabvP00HNTA3Q+/+JfD9/HDrfzmzwd9Xpj+9rv//fD/w8ufGf/62//++M+//vfP//77//8AGIACOIAEWIAGeIAImIAKuIAM2IAO+IAQGIESOIH/FFiBFniBGJiBGriBHNiBHviBIBiCIjiCJFiCJniCKJiCKriCLNiCLviCMBiDMjiDNFiDNniDOJiDOriDPNiDPviDQBiEQjiERFiERniESJiESriETNiETviEUBiFUjiFVFiFVniFWJiFWriFXNiFXviFYBiGYjiGZFiGZniGaJiGariGbNiGbviG7td13wCHdPh/cigRdZiH+neH2aaHfph+fOhUfziI5heIhIiGYgF+hBA36kN2TWCIh1iGyOMIjNgfccVCdxiJZXgbknQIlXhKrXd4maiJYsgRFrYIn2gEXFEFkEiKYPhfk5CKLmZwo+iKX8huxSEnMKcDkmQc/09CRVMFK7lxeAzyFBhAVFnXCS2iN9xRAMhoBEZGf0/QirbIhSOVXBV3GgoTIzL3R83mKoJnFWXyLGDWERNTeFJAjdWohZBxZfSTAooUML9VMFJELPcyamfmGjvTMz+zczJQIVj1auLWAJ2FMTSXblGgjuuIhfxTRpm3Xp/yZsUEZb/jPEpgE/rlGMVnApODiXK4kFzoWq+FBPiWb+OxPsiBV8k3kqtnWqr3iHwIkluIDb+1TChgYHW0SxNGM1iiJ8UFV2RWe3SVjjEpk1nYLwfZc3zXk7FHMNbEF+VwGpwGjUtGcC3XQR7ZdUaZhfLkGwpACs0WIT8FinbTcbPkYf+CdZF2cWkU9Ty0+JFbeYUSyQ47wGkP5k/ZMz5TeWFBGWSelJUeFZdXCIvTCBUvco4uQyh5VlDgN2gPWTR9hZdvqZWCWYWGol4wyQ19FkhJEWVSopdoeXjSSD9WOVaAKXWVSYWV+CopUCH0uCWJphkGwg23ZgKDJpWjtzr/2HlCUpromJBFmZpTSAqO8m6VUpPeZCcUoyd6xZhzRo5uJxtX+T/4UYvCGYUlmYzI0Yk9Mhg1QDlYpXUgtpe5uHdW4pvxUXDAaZ3X+YRKZX2mAmCFyZ7t6YR58WPMB3MsNZmBWZ9SaCnH5nuGonPrCZf+CYUfIAKKGBTw+JgwSZ8HGqEadaCQElqhcUChFpqhbIChGtqhZ8ChHoqDQQAAIfkEBSEABwAsjQBsAB4AIAAABspAgHBILAaOyKSyyCQqn8mmFEqVNqlQKxP71Bq5ycvxIhWDl4uARVo5L2OBjhQRMLiPgFDAJXUFHHcBADUBBEI0Rx9CCoWBACR/i0ciQnoojm0alQwLDkJ2bXcAZp8SExhCnB6OG6qrLIwRGXisoB9xKWoUdQBpbpsVIgEXuy63O72ig3GAAiYMICt7yb+PexgcpSPTrG16I5J6a6xmcYdIsqzUZKNIqcoAGrxDBfPqgCFEHJHUZ15O8P6pExjQy0CDBbUcVJjQSqAgACH5BAUGAAUALJwAbAAdACAAAAadQIBwSCwGjshkssgkKp/IphQKlTapTysTqxTaAg4tN3kBfMPWcdQMFqsDAjZ6+o6f43S1t/0yMDAjG0NvcHIgSjODdWx/FXEER2UAhHsBLU6WQpSMeEKHMpqLd0SQLKF6ckUsAaaTom2krKdje3NCq62bo0O4s1y1qrKuqLu3wrqwvMevtgC9w7RamKjSvljVitTVm9jc24vd4N9qQQAh+QQFBgAHACyrAGwAHQAgAAAGqkCAcEgkBo7IpLLIHCqfyWYTSpUyqVBrEfvUGrlRrxAcFpOR4vE5kAas2YAIKmRgDG42k3MtDEEnGWpnQicyOHoCFhoBMwJufFI4R45vWgsBEY+DQhYnOXZJF5pkQi5IGhypR6KVFEcuG1+sfCOMRRGro2AAJwEyRS25lQS2RBDCfB+vjh4loLObfkcDR8GYulxDLnV3Fo/QpGmV4pBm5V7j5pvq4ey75GdBACH5BAUTAAgALLkAbAAdACAAAAaNQIBwSCwGjshkssgkKp/IphQKlTapTysTq9QauVEizWW5go9FUOBlPqfXgu058GbL3RRuRzinXziAAwExgIAVfHN1bWCKd4xEO3COXI1fbpCSlo9DkXaalJieTomhi6CcAQSmWEUoaJOslgYPEIcAfUwXBxgLR3u3pF6Il8LAxMK4xcabyMHNx17JxX1BACH5BAUNAAIALMgAbAAdACAAAAJFhI8Yy+0Jj5stRootxFQnPnkKWIkGWZooY55r0AIv3M6xXb+3nq+730P9hEHS0FgEHZVJztLZzEQ701D1cU2JcCped1UAACH5BAUNAAoALNYAbAAdACAAAAaxQIBwSCwGjshkssgkKp/IphQKlTapTysTq9QauVHvEBwWA8hHsxAdUJ/RQ9pBwygYdBEnHPCiUsZ7JDMVG0IXIAE5Amt7UwF/b2RaEgEdjJKGOhMFSgiXYEIVdQsGEhwcnCifXIwHeUOoqpGgogUmRQaVq1h8ATdFEQu6s6wtiYtDLEcwu1QAwQEEhRkySLLCmH1KLLHNVaEPdQ8qACHDbG7oaupm7GLuXvBa8lb0jmRBACH5BAUNAAMALOUAbAAcACAAAAafQIBwSCQGjshkssgcKp/IZhMKlTKpT2sRqwRYjlrukgK2iqO4svQMThfCbK96GncLRAZGTLZx1o8HShN9AGwBckcoAgIVBQEji4aIBEQqZZJuFFsBFYV/C4RDE4+eZ4xzQhykmKgAIaufoakBJAKsnJudt5RDlm+lYogLiqekwFyTgoS3dzl6IqGSWn6m00LS1tjT2nDV2XHWx1jh4lRBACH5BAUGAAcALPMAbAAdACAAAAamQIBwSCQGjsikssgcKp/JZhNKlTKpUGsR+9QauVGvEBwWk5Hi8TmQBqzZww6nwDCwIs71UPEcXNRnQipHJBsAFBMBEwICb0IaAQREFwsBH256HkcURTyRmIEVR4xEMoqgZAI0o0Ukp44drKWveqILJq20gZoBnEQhn44AkJJDlJaoYIKEuIinyVx7fb7CjHJ0MTV4gKlp1V7fWuFW41LlU3re6WZrQQAh+QQFBgADACwBAWwAHQAgAAAGlUCAcEgsBo7IZLLIJCqfyKYUCpU2qU8rE6vUGrlR7xAcFgPIR7MQHVCf0UJbwFFkBVhrOEBOJ9rxb2Rxc3V3eYJ7hH6GgWCDfUN/h46JkEKSjVyPhYBsm4udeiSKkYyelUUTpnoVaUMUR6GIGwyKFBoFq7MiShIEupQCAi8xDDEyGzXAmmqnZs5i0F7SWtRW1lN6z2hBACH5BAUGAAQALBABbAAdACAAAAaIQIBwSCQGjsikssgcKp/JZhNKlTKpUGsR+9QaudFhxOWagpFEytF8DqTX17Z7qGawzwAH1iCUu2UcgUeBgQd9cm92cW2Jd2CNi3h0cFuIk4qVjJeOXJCZkgICdZxYiRGRj18joV+aQyJIEw8gh66vEAxHMbWgXn5eTpbAv8AAxMPCvslax8ptQQAh+QQFEwACACweAWwAHQAgAAACRYSPGcvdCZGbLFpKbcRTQ+54CliJB1maALqoBhu4KyvDNe3aOa7qPW/yBYEiYZHoMSaRGmWTeYFupB/qCHXD7rQ/7hBVAAAh+QQFBgAEACwtAWwAHQAgAAAGjkCAcEgkBo7IpLLIHCqfyWYTSpUyqVBrEfvUGrlRrxAcFpOR4vE5kAas2cROKLHAgCrqM5EDpbjXQw5HKhtCFRN+b4ZHFlKKAAoBIVaPGgEvlIACAwEdmXqbnZ9kQpaYjpqRk6igFYyspEKCBYSGEomapX1/oENydDEOH3mxZrlej8a9yMdayczLzs2jYEEAIfkEBQ0AAgAsOwFsAB0AIAAABohAgHBILAaOyGSyyCQqn8imFAqVNqlPKxOrBESUris3muGYB4HwdnwsHtJithu+lhPfovh4rjayA3x6XIF1e3d0fnZDeIJYhImGi4hOf4+UikKMhYOHMo1URSIBDp9VRBZHFZuOc0cYCR5Cf4BMLBhIF7KVWpeRvACzv0PBwsC7wsTIx7/JzH9BACH5BAUNAAIALEoBbAAdACAAAAaEQIBwSCQGjsikssgcKp/JZnMBhUqZVevVmF0SPUdTsesdggObMRlZjGgMYu4avXXO6XXAne4q2+dCHRyDYWqAX4Vya22Jf4uIeI5kjJFCe5RpipOQcZJdmIaPZo2Wd6Can5yhkxWDhAESrhZ6pghdH7SHeZd5uaJ1vLumvcHAw8K6xnNBACH5BAUNAAMALFgBbAAdACAAAAahQIBwSCwuAsikMllsEpdQptMZjU6p1eW1mdVun13pV3gMB8ZDMxJNVrMB5TDAEehwGI8LZYL3tM1zSxMxBUchf3J0hxeGJh4NCxtwbnQIQnwolwEXk4B0FZofQp+diQEWo6cCgaBxXaypoLBqZ6QCO6qzlLm2B6q0sMG+osC9vL+7qMLInseywwLFzrGlr2yuWW/A127cgN5y4NZo2FXaakEAIfkEBQYABwAsZwFsABwAIAAABqtAgHBIJAaOyGSyyBwqn8hmEwqVMqlPaxGr1Bq5Ua8QHBaTj+LxOQ04B4QUkQSzSPBaX7L6OFgcFQJ7YEITKxeBJjQNASqCXFYvASWOWFYVAQaUVEMdPHRJGJpVJiF8ExyoaG1rNkcfRBaqbgAPjEUtsmt0FUUEuXoatkMeiwWiWSyYFnAGBb+DEZ/FAwjPjwAeCnQYCh6xA8ddabPja+V654Pp12bm7ejvYEEAIfkEBQYABAAsdQFsAB0AIAAABohAgHBIJAaOyKSyyBwqn8lmE0qVMqlQaxH71Bq50WHE5ZqCkUTK0XwOpNfXtnuoZrDPAAfWIJS7ZRyBR4GBB31yb3ZxbYl3YI2LeHRwW4iTipWMl45ckJmSAgJ1nFiJEZGPXyOhX5pDIkgTDyCHrq8QDEcxtaBefl5OlsC/wADEw8K+yVrHym1BACH5BAUNACYALIMBbAAdACAAAAahQIBwSCwGjshkssgkKp/IphQKlTapTysTq9QauVHvEBwWA8hHsxAdUJ/RxZGGUZAgBGO40FJ4gvBvZEIRAwEHF0IfCQEkeGxCBIZFHwEYjnoNARZbAYiPF0cSHKOkdBWBYAB8XKePFWlan7BWjwKZHbF6CpUbTIi2eh59EJuJIQ9reqqFTxCXgkQiE8MljcnQZrXZymLa3dxe3uHgudjfZEEAIfkEBRQAAgAskgFsAB0AIAAAAkWEjxjL7QmPmy1Gii3EVCc+eQpYiQZZmihjnmvQAi/czrFdv7eer7vfQ/2EQdLQWAQdlUnO0tnMRDvTUPVxTYlwKl53VQAAIfkEBQYAAwAsoAFsAB0AIAAABpJAgHBILAaOyGSyyCQqn8imFAqVNqlPKxOr1Bq5Ue8QHBYDyEezEB1Qn9FFD0vTYExEkTV82GI8HXpkQzgLAQ4XQhEyIIFgQxKGVmyJRziSe30MWpMAKAEGm3ueoJeCAJkCpY4AHpWqXI+RU3ushYeJLweNsHx+Sju7WHFzfnd5b6ZmnMq0YsvOzV7P0tGhydBkQQAh+QQFDQADACyvAWwAHQAgAAAGnECAcEgkBo7IpLLIHCqfyWYTSpUyqVBrEfsEUI5a7vLLCIuRArL5HPCCrWywGh53l1uSRuEQcdbJK0oGG0JxbWQNKCYbKQsBKgIAhmlHLEQHASeFf0cURCgBEptsdoRDLaGjZ6VEqBqqYqynqZKcZa20k3Ozr7Wku0KusFyywbm2RcK+q8AAypNafqTRw1jU1VXX0NTb0d1rq9pxQQAh+QQFBgAKACy9AWwAHQAgAAAGsUCAcEgsBo7IZLLIJCqfyKYUCpU2qU8rE6vUGrlR7xAcFgPIR7MQHVCf0UPaQcMoGHQRJxzwolLGeyQzFRtCFyABOQJre1MBf29kWhIBHYyShjoTBUoIl2BCFXULBhIcHJwon1yMB3lDqKqRoKIFJkUGlatYfAE3RRELurOsLYmLQyxHMLtUAMEBBIUZMkiywph9SiyxzVWhD3UPKgAhw2xu6GrqZuxi7l7wWvJW9I5kQQAh+QQFDQADACzMAWwAHAAgAAAGl0CAcEgkBo7IZLLIHCqfyGYTCpUyqU9rEavUGrlRrxAcFpOP4vE5DTgHBBR0S9IoHCLOdbywUho2amQAew0oJhspCwEqAm16RyxEBwEngWBwRxREKAESllyDaEQtnZ9YoQxFpBqmVKiqpY6Ce7Csspe0o7Fur7q2vLlDq61VwULDt6DGAMi8ac5ma8/S0YLT1tWX19rZXEEAIfkEBQYABwAs2gFsAB0AIAAABrxAgHBIJAaOyKSyyBwqn8lmE0qVMqlQaxH71Bq5Ua8QHBaTkeLxOZAGrNlDUY4REwkGAYt7PZQoT3h6b0IyRx9CKUiCfHcBKEQshntnABVHG0QUkoMkAQZXeZNkAiIBHKCLlIWnW6GcnqiiYAKWDJhDmq6MBY4CvgA6m4yFBRWIC8ipo0I3SHgEgY2UcTELBi6NFLJcWpfbWFaWn99VhNpCFtWP5FlqdHimToykIRjIEim/7F1pg/3zXt4EAQAh+QQFDQAmACzoAWwAHQAgAAAGoUCAcEgsBo7IZLLIJCqfyKYUCpU2qU8rE6vUGrlR7xAcFgPIR7MQHVCf0cWRhlGQIARjuNBSeILwb2RCEQMBBxdCHwkBJHhsQgSGRR8BGI56DQEWWwGIjxdHEhyjpHQVgWAAfFynjxVpWp+wVo8CmR2xegqVG0yItnoefRCbiSEPa3qqhU8Ql4JEIhPDJY3J0Ga12cpi2t3cXt7h4LnY32RBACH5BAUGAAUALAwAbAD5AUAAAAb/QIBwSCwaj8ikcslsOp/QqHRKrVqv2Kx2y+16v8mCeEwum8/otHrNbrvf8Lh8Tq/b7/i8fs/v+/VggYKDhIWGh4iJiouMjY6PkJGSk5SVloN/mZqbnJ2en6ChoqOkak6lqKmqq6ytrq+wZqextLW2t7i5un6zu76/wMHCw5u9xMfIycrLuMbMz9DR0tN2ztTX2NnawA9l1tvg4eLjnwuyTeTp6uvscyYFA2JQ7fT19uQCad/3/P3+wAzY7PtHsKBBXQMPKlzIkJSAhA0jSpyIJwrFixgzrsGisaPHj97QgRxJsh8BU09KqlxpD0BIiCxjyszWrQDMmThzDoPQQIwD/zI3dQodKiwo0aNIbRlNyrSpqqVOo0r1BHWq1at7LmndyrWr169gw4odS7as2bNoOWJdy7ZYkwBw48qVm7au3bt480qayzeu3r+AAwse7KRvX8KIEyteTNYwX8aQI0uenMjxXMqYM2veDMUyXc6gQ4ue7Nnv6NOoU+ctDVe169ewv7IOELu27duNZuPezbt3F92+gwsfzgQ48ePIhRtPzrx57OXOo0sPDX269euRq2Pfzj2w9u7gw6P9Lr68ea/kz6tfv5c1+/fw25eOT79+Zff28+v/kn6///9v4QfggAQWJ2CBCCYoRH8KNlgfgw5GyB6EElZYHoUWZsgdhhp2OAwdhx6GyByIIpY4XBAAIfkEBYwAPwAsDACMAIEEgAAABv9AgHBILBqPyKRyyWw6n9CodEqtWq/YrHbL7Xq/4LB4TC6bz+i0es1uu9/wuHxOr9vv+Lx+z+/7/4CBgoOEhYaHiImKi4yNjo+QkZKTlJWWl5iZmpucnZ6foKGio6SlpqeoqaqrdhcBC30dr7MWhy8GDK8vURGzukgxvhxqwRWixYEOATZZsr61TC0SuQEhUsrMmq6wSyDLYAq+AYLOtEce4iKK3i5lFMoLr3MqCw+sTzbVlRYnwRgSKzK0klflg74xFTgojAfNDA0RDbGMmBUjBoxrC78dOaEQ17A0yK5QENEBTQQX7baE/IOtmUIOuSIiMfiq4olrGjNtY8IOjI3/lwQBJcwoc0iPjCkR9RzTK15FA/MONqJGowiGX17yOejysGgRHC5KlmEh7hWBgdyoGPxo5qrXMEuz5FKR5UBOJCmknuFg4C2UFfXQUAhat2+yu3IX+DUaQIGAKi21EUYSF8zOQm67LUh6qLLPagLrfDBw4BE1EEQszCLxRWuXHYiNkAhMZuICAhc2ZPjA4Wydy2r1khkQYLEXz1WAX7HL+UhetuRomxnMAFRklcWZZIYcu5JyypuZTg6UILsS5uu6v1Z/j0ouXLqF6AgQjHVW4Vhg71Iy2xpCrER4gJYVa6FBnHHr7YcFdVqgxx9+f/Qn2HiaXKdFeQgWYB4VFl7y/91G4YnxISAYatZcIcgdx157UuQyUQtD/ANTQPdtxYV+SwDmnxjhxPCYEhWwMAFxMZxAgRExvUNcD0UFOeQCRR5phAf9MDABDd/RcIAGGhowQgRJFBgmTFACBGYV2x3hJJEHSCnbkzk4cNEQ1MVTllhQwFNWTgwS0QJ9IKrzIHREcEljaq+c+YRqzwSaAQE+WJmCVXuaVYSWXDagwZdJ3NIlboxV6qMUjJqTBJW4WCmCokOs8ECXglKBqlMnfFBES5BKSlmmD0wKzIZF5FPpjk/gepUGvqYmwjRlwugcmTGYWYULcMoZnxCuEUEWC0XMeuWHKjx5QgQOEkHCDbBeK/+EsbqeQyEANUBInLNSdBDCsSDY+hUI+LqJpGImnrpspAPwQC9jKShZQAhezWplrWreC+UMub2pKcXldlHnnrJBywKrdAJ6QZVemgCAMvZaeQEFQ0pQ8brCIJEkPBx4Fa6Lj644Jq+ctlgcA6gBoNpPIQo9cC4hHCzEzEvK5JqnGPQ8xEk8pNqrySFXGgCe1OzpmJ9bNqAqyAxKg3OMOuNQKV1EzNV1oojuiUC3xPlCAGEvVOovooRqa+dV8aBgxYEzrW3Ek1ba+fIFQNH3Us1SyND443j2OcSfGjg6KF7SDSEDhEu8Q1Sg/qxGBA8dOf6SggDk/bepqQXzegOeT/7/0k2kNm5M4RRZOjXi1GgAshPb1OTUrcvIbnrc4gT971vSSADt6jjJoPwrbKM9+9d+9150FMBT83Lrwm0bYN0aNjZZvK+HcFcE4YuMcA52Anj+OO6mlbW6xe7pLz13+pVxMkYp73EPZj1KH8DuZ7zxnMB/RWDf39x3Ii0wLnXRekkEZzeA8QmBURsjiJ6cojxijSB19jiChlDwNgZ40DYT1Jlz9OYzJelmPh7oQdFmF48DAsBt9XOhuarhjfrhLlRBnEBoAHDB6b1kdyfD4OOygS0aZg0wvoAKE7EXuqQpSgRw097XLHCVGaTGi54LI8yUCLN4mGsGUBwZoCJWAyRs/0NwQrgASQYHrDN24IvxGB8WG8IPD8KMdVaw3FcmgznNdQp0TfHgSqYwyVDdIDeMm2OwQNc6OF7LG1r8HX2gmIJQ7g87UFwkaPIouUKN8jEs4+R5qvGyD5TmdK9wGRNfNao8EqdNQhhNd9KUBA3hwAo0wyS6eimECZCgliuM25yYuMd6qbGQQ7RRGrnFGDbmjDA0sU8F0se6aUAAGrHU5slyqUxN3i8JhkKeD50Aj7kFcwJu2sYtv2nIH/bRCAQs1DN/9Ke7QKtidinAEtf5gFrus4oYqAoTH4iBa41TIxeVYX7UA0BxPulHWYOAAf54AWnccJXxGAY69Hc50FFjjP9ljFtKSlXBHHkyj6AEqRNiQoOtCcAHEFinfQDgTGjeJX2gIuNthtgu9o3vBDI45j2XClCdSQgvN52oO+uERwQETmiv2Bs9j9o5FlY0YB9U40/duARjWpONqMzTXR44grEiMjnvUiQAGlnVWHGubwiLG//A9891LkBR2zCkjqLg1jQmYKF2fBcVKtlNnV7KnVsMQD/bWlhcCtGXmk2jOsF6VkplqLNj/WxmNyvabDbIquVrTNbGlypcmrGlOelpaUH7MnggVq28/ev9EGSEiy4mXplzZV2fh1Yn5K0EnmXVVYbqT31FdoGBRSBTa3oj9XDJN6BNJVgxm90h4RFdinX/qTvNals/2Y+PYuXskYA2NKFuDm1wJd9uhXWw6YaOQji67xMaO15uaregrBWwPw+12iV8Dryfk8AmWXqEaeDpCbq17BUIhw/ZtrYJKZqCXk/Z0uQij7tVBGwwoQTScMyTsOLVritjfNX+SbSZGh0xfPO3GB7EZkZSwEX2wOO8pW0oJv8yJIeVQOApKKPI1f2vKTNcF1mSb7TmS/GE29ZHF2UXl8l6smmnJNlIlngK4SCWChGTNxPPuLlNuCgusawXIStYy0YWbzTHEGC69dHH4E2relAGM30R+rISXjMeBY21DTCEUnflkE+hgGRvhENAQA6TO+ciWMfqNMRcHh6o//Fs49m6FsFNgJrduJxKM5uLWauWMYlZmSlfJOsJTUFjYo4L61k81BkEiHFfLZhXRpLXvncec543XNgO2RnRStBj/G4d1jhLlpKFLaiGuayBx6XOr064WwEg0ugTK5s60vM2kpXNZNSCGDHxfCPgKHI/XRekh8ImtXw8vE4DM/rfysWvt8kUK2f/c0RfLsH3nhDvYvZRzsydJYpTlsSAu3e34nZBfBk6cI8A/Mtw6Q7EXdtpEO3u0HahsXpbHcYRTwPFAps2Y7PjDDZm+p7zrgmr77fEbH2YTlUShyH7LNyYKxBAem2kcbctStXxRS/rDi5onR4M571clcwzQLr3HP+FvPwNVDs+h+xk9PSHwiuLMhjeIYldnfxBe9jJ9tsYjw1jR+JYaVfWdE0at+jEWrvtFyose9utteUS1teLBjlRJ10qK+5cvsiE9wIO5j6qsxW3NVHAxt9NkbRvks4GvnrJR/5lrQHI4HhH+IpDGUhsX5jMoS3545MQUCROAINvln0GXoV4nfa6LEEdr2VH/dpIO0M2nRM+eKABm5Nj98xr3lvLHahRVZbp2zOXkpeRbYLKD+D2GZl9g/V9dn+Dce8ZGTps1Zz1rRdNbYBfvQC2YV3waMBfLoB6l2PPUPxLRfSz9kuscnO8sAL8AjvY9hbKcH8kpyzet1tgxnbX9Xb/5hZ3p0R8TUBZ3Hd3yAddv3IAA8gnwDWBcYV8phR9XCEkdkIoqPdx2uFuy8YdVMSBEFV/jDIlJHCA1SYrR0MfrOJz+xZ6iGE5pLd2MShx7DZrKLhYrhdtwKVjJYKEM1QANuhOBKh8fqKCKQVSAMg7w1d9kWd85DUbboaFVcV85pFyHShAPOeCG7hTwtGFTbBuJPAxRjgbVAhSN4g20sd/QFh+V6QurVeBj8R+aAOCuWeGezVHGiIDTHeE+1ZkTZYBajSIP6cM/oaFRbh401QQmRJ2EfdzUxhoiodX8ad7g0eIRQdPPpULCeYEGtiCn+eFkCZ+p3J5WaCB2tZueGdB/58zaUaYXTrmcKclVZI2g5yoeFQ2E5+IBQBUR6IIiLIGcC63Q8BIe5KXetcGiOgyZFDQcDyGdaEIHuCmjFaIGLIAgRsEjMRXjZGWIGoyHhEmjiZXaPZIgUlIibHnjlGwic82YM2mEa+CSMfHh21IflmWdwcZgRbojyJ4inxFUY/4IR5nkKEmdYcYja4DfVPzVQuyjZAHe/xTkYVod4lUbCw1H2UYjKv4V16VaMwmbLLYgPiYWYlXH02gek0Iez3Gb19gITPJabAIg/5kjE6WjRbpaU6IP86oF89Ri+S3kUPwaFyWFIDGE0jpZ4OFKB0EkthoiBanlPkId+N4dgMQlv/kR5Zt9oiQaAJc9xUUICAbFWmuxhikuIf1uE7OV39p2ZaVuH8wt4b3k3hzGJA0QkxmaYt4E1sGRleXxX+lWJMcCVp9Z2zJRR139TI7uGL614ehUZeLKBUg1HSTaZaEGRwUJlfU1ZGQWVBQJpgbBI0feYriVx4rqZB6V26wBwFgyIsmCY59uYgQyD6J94tq52fHuZOHk3xbNiVs2T+OqHgN1yOD1U9yiIIyyG7gWDz/lZpTkH8waYbUIZujqT1o6QxJAUBiJZcsmYxluZzoYnZQsHRf6XvdgS6rCZtQOXXRhZa6yX0rZU/dYpfbNYpV9o7fNZj0eIb3qIaS6Zf85z7/v/Ze0YBZZNGbRVlhAvk+76GYfsiY0fgk6iebLXlxOnWhlalQJrpvt1ExvAFeLeOip+GhSxSjnFln8jMDs8CavpEzb+lcO2Bd6WQFYBQD1lkPcglAerEDNUBIPwpRr0g8kmVOR0IlXRKPz7dm5Yg8XpmBhjmW1MGXoNWjv/g+x0JKt0kchocFGjheDvAyLlBkzFI54bCVbaVxP+I6ETGTK3VOBBUCwdec7WaU1cNuZIEB6JQqKoot+RIaQ3oNLOCkHLpKseSTvIRQO4pb2ZBR5bSZ/KA/MzlndsqiWRoF9RQfCZFPPSQQ5GKJTJVg5bmOiXqlloSpi9pMeMqiEWEb/0mhRwNgXeEES+RkoHPJOx5FXnjJoHp5jzRJo4GYWRjaFGRqJ6cZkhV4qLO6afvXcyDKmb4iR64KpUspppk1rVyXdO4UDmVBnrPzAGAkiduaNRP0rgxUNxfKVF93ham2Jx3EbHuXdUhDr7YXPHaKPg/gp7IiWRlFrXT3gOpIqgmQAK/Hma+Ja4/zHtTTnsDZRhnEGfmXDhTkZw1Em2b5fRBQsUsAP7ajEIRZUM0TILzJQ6PqcE5hJ5kYqo2nQCkksgXwD2yYnYkIP94DQPhaHopzlMDHP+c3CxJwN/5WPEi1PtvzPjE7O5+UldbnX7B3lldAJkH0PzzkjU3Xs/nlWf9igwG0NbTnqK4KpDQ85JNtNLTdEzwhi3wYSo7YOLJilazIg4Z7iS0vYTUalJTjB7GOc7cfm68YGnUMKbRZRLSEq4/cCnpmWzeoJrJah7Bc2rFFaxZ1q4h8BXTHIi2iOzb5B6+eeUbp8lzdQjItkCW8dzWfi2swsANWIzXJoYMQiAM0ow6nGyAv8HSakp8BEjZclLAkezni4gGhC1DztlkwUGsCOlymWCmGF6raoqg+RHGy2x3U8ikb5yoEywumpwNTooVFZTFlsioidkKjK6bYmwFHQxoM1rrp87AZCrSRm0Cal6xUc7vJmbLB6w9XM4Ux4IgJiVNL8rrjgQDLixz/KTA97Cud16i6vrOc5uuM3ucAYvqpE7N5WqVzu3K0U6MA+OIByTojFkAmDFNc7tssM3GA39eklnUzCeAYKUJRkHWM/KFDD8Oeste3DZqGQSR0kdvAD3y3exW7dDG7hRlfN0cuJ5ysYpO6CAm3y4I0tRCuCkytI3zBU7UwTfx+lskiZqwGm3jGarzGk8C4FaLEZ6C1a2xhbFzHdnwJmHjHerzHhpAPt4kJ+joHOskKrsjHhnzIhOAMUYrIjNzIZ4CZnxDIcuBVa7oKUdVoEurImrzJcaA8JMrJoBzK3SXCniDJotwFtTZ2AXzKrNzKPhM1z+nKsjzLmSKfm6BDnTjL/1tgL/QzNrr8y8AczMI8zMRczMZ8zMiczMq8zMzczM78zNAczdI8zdRczdZ8zdiczdq8zdzczd78zeAczuI8zuRczuZ8zuiczuq8zuzczu78zvAcz/I8z/Q8z206HEQphWLAuvXcz/78zwANB/c8BkuGZnD8F1YW0Aq90Azd0CBmGGtQ0JFTMmPAzw590Rid0RrNBpOIBxa90SAd0iI90jXUBx9N0iid0ipdzB48w3uTs/lWeMWlcB+8EfRzwGtFXP+yPJ0Su2BnwVEyizNgm7cG1Jo3Q6iREGTbiStMthQAAwm90lI91VSNCOWAPlAmOmSSb+B3sAfrkUPwQHpbQP9ioz757LwEd4tJW7kknGKKuqqbq0BFcbpex7SYF7VgWdV6vdd8zQcCmEcpEJ3YWaGqZZrxocOOtTt4eNZ4+ysMiC3M9DkSe9jrZSp4WH+SLRatqo7gSV8R8NTOoyEgACY5lEt9fdqondqC3KU0mLJvCZprlSxCqathiARFusOsKRPmldh+wxZNodtsBsZMJYhRrdrGfdzIbQWw7ZsveERp+buKGKZW4GK/gmJ/iJtSSWInjZssCsT+KWbJHd7iPd7YNlKLPND7doIc520V6cfvqZrhiI1kF7jSsd07mFAdl3zg6XBKM4/k/d8Abtzh08FG6zjkGrnFNYK/Jw7B59//tmiq3eF3rKg1gWrf5rHgMdOstlhjAd7hHj7VA45V2kuM46qh7xiN+cvDIqkE16mfMdjiSE3iGv7hNF7jfQ2uxznQr2LL/ZYj5EWVKv7eBOrjOyt1GBicKY7kNr7kTJ7SEo3gYmmCbFmE8KfTjY3BD6LedltueLmW+4qy7skYed3kZF7mGe3GUB6amxeg5zCVc7WZBi3Ygpo/RZ1HvB3WUsHmzjnjEFVuSGPmgB7oAb3CKXAmm12whdUU1do9CuKrUGQbim0nVm6S3IamjP6Z1kNKlg3nvIrpvzqL17VPLCzopF7q8Zyzs3Bju7SyHLBoNddtsI4scd01qcR7ZZ1Q/5O+LvSdQb3Bs/c767DzXG8NtyYg1nLL5zcKOBMx5qbe7M7+zR1gwuCbtet610GEjEIjwxBAwxF00+3w5C8oKs8y7fsyutyuZQk0vUD9AMFmWdtd7gvz1MX97PRe7/auCA927/q+7/yOCLzZi/0e8AI/8GqQAkOmkjNL8Aq/8Ay/HDVRa7ne8BI/8RSf7bV21BWf8Rq/8Rzf8R7/8SAf8iI/8iRf8iZ/8iif8iq/8izf8i7/8jAf8zI/8zRf8zZ/8zif8zq/8zzf8z7/80Af9EI/9ERf9EZ/9Eif9Eq/9Ezf9E7/9FAf9VI/9VRf9VZ/9Vif9Vq/9Vzf9V7/9WAf9v9iP/ZkX/Zmf/Zon/Zqv/Zs3/Zu//ZwH/dyP/d0X/d2f/d4n/d6v/d83/d+//eAH/iCP/iEX/iGf/iIn/iKv/iM3/iO//gqbXq+APmUb/SSn6mVn/lAf/msrfme//Kcz5SfP/o3H/qkf/rxDBOLLBRd6qDuwfmoH/vsPE48bgjDSGbh6TOXL/u8j85DAsKEcPvKOgWm3/vGL87jVOSLIPwPCoe7f/zQ7821hwjMP5bOL/nRn/3bXMgxbO7xxRtsYkhr8jAb13ojcNOIuO63Vja8l/6Ufv2mp/3yb80GwUyP2UDyaexlnYfy+jZiW5QhnkbHbpMVV6IA+fzzP//RvOz/Dsc9JyHnhq1Vu8W7f5RGh/UvIYvCEMOinpftVAR/DdBVYL2glAb78z//0izJgwzbu63Pbl6qlLmU/mYXzl0EQE5psD//8y/N/4icyQbdj0SK/vRinDmz0t0qnKQhAM9Zzz//8x/NGgLwi2fe24bf7K3fC+7cbdnb3akuzbuflAb78z//0ozmdFLg8c6l4j51vF51Qi6Nat38YU5psD//8y/NciziPP2GGvrYacnFQ67WzR/mlAb78z//0rzbOQkbhzXkzI3iXIziCyqcMj5zzz//8x/NdvHJ1urlaq2bJOnmrAW5mnarwnldq18ExT//8x/NqfjEV9TmhRuar8nFxlW5lM1f/6///PM//9E8jIRu6IidRu8HRtZloyagpK/Jxdtl6DMwVOj6xwKr+9g///M/zf8IsKleVRCU7AGLH1y8jteuhM2biDP3/PM//9J83YyqvRs3FGVy7l1cMhYepS3dA4mHrrc5yISH/fM//9ScCwlfCflXyZQG+/M//9Q8EfW7CcSB2/CvNfM//9bcs/+JCSy0pQD5/PM//9T8AbnKCQgQmJz1/PM//xlf/PM//xlf/PM//xlf/PM//xlf/PM//8wcBAAh+QQFDQAEACyOAOwAHQAgAAAGjUCAcEgkBo7IpLLIHCqfyWYTSpUyqVBrEfvUGrlRwECJcXycYKRwDD0JxOnjOmDbmDKV23EFj8/5XwYCcXJwJEUucoQBf0UqRx6LjUSJjJKGRQYBgpdjdXcWenSDhHNtb51kZmh+mFKpgFOlrrKtnlawuLMFo6+7vbVpXqzCw33Fw5fJs8utzcheytGEQQAh+QQFDQADACycAOwAHQAgAAAGlUCAcEgsBo7IZLLIJCqfyKYUCpU2qU8rE6vUGrlR7xAcFgPIR7MQHVCf0UJbwFFkBVhrOEBOJ9rxb2Rxc3V3eYJ7hH6GgWCDfUN/h46JkEKSjVyPhYBsm4udeiSKkYyelUUTpnoVaUMUR6GIGwyKFBoFq7MiShIEupQCAi8xDDEyGzXAmmqnZs5i0F7SWtRW1lN6z2hBACH5BAUGAAcALKsA7AAdACAAAAapQIBwSCQGjsikssgcKp/JZhNKlTKpUGsR+xQyAhSBlLsEfMNjMtILtqrX5nb6zUZP34E6DcJInDxOeGwEXWx0cQEKGwIXEwEQYnh5cQZiQhcLbZJsL0UciYhqdUU6AQ6hZKNELo+oXKpDrBKuWLBCpSC0VXGdRD2gm5SWAB6ZFrpZiIrEjg+RgnGEmXDIhWctjgN/gYdahqLe36nh1WXhwefQ6d3e6O14QQAh+QQFDQAEACy5AOwAHQAgAAAGh0CAcEgsBo7IZLLIJCqfyKYUCpU2qU8rE6sUGpKcKzcKOHE43/B2fCymAmojO+CGi9n1uHOev4/7a3hEb3pDc3SDdoF/iYVCh4BygkOEflyRe5NClYuXjZZYRRVtAqWGfHIipqeam0gYAx2PqG4aC0cIs61akFqZjL66wMG9wQDFxLTJu1aHQQAh+QQFBgACACzIAOwAHQAgAAAGhECAcEgkBo7IpLLIHCqfyWZzAYVKmVXr1ZhdEj1HU7HrHYIDmzEZWYxoDGLuGr11zul1wJ3uKtvnQh0cg2FqgF+FcmttiX+LiHiOZIyRQnuUaYqTkHGSXZiGj2aNlnegmp+coZMVg4QBEq4WeqYIXR+0h3mXebmidby7pr3BwMPCusZzQQAh+QQFDQADACzWAOwAHQAgAAAGlECAcEgsBo7IZLLIJCqfyKYUCpU2qU8rE6vUGrlR7xAcFgPIR7MQHVCf0UXLIcYocFKbNXw4gpL0ZHxHMhkCABQgNoZsYwEuVowAKmmQezUBIVqRBwEEmnsgnZ+BAJc8o2BCkwV5U3sbdSKGV68kgxFCiIpvpEJ9T3+8qURyPnUSKK2RZstizV7PqFxu0ZW9zq/YZEEAIfkEBQ0AAgAs5QDsABwAIAAAAkSEjxjL3QmPmyxGSi3EUyfueApYiQZZmuhinmsLrAEs029b47e6izmPsgV1QyBJeCQmjSBkU/lkcpxTaFWKoWatWyylAAAh+QQFDQADACzzAOwAHQAgAAAGl0CAcEgkBo7IpLLIHCqfyWYTSpUyqVBrEfsUOgI2K3cJ+Ial42gZLE4fvWy0OwA/T+f1TM3H0KSceGsiMUoqQnN0a30XABc3ATGHgV8MEUMXR4yIcCBbARUAm4JFGp8ComakpqhxQ6UtoZOtQq+xbnlEtax2tKuyvAC6vwK5vrejxbCiWpLHzLZpz83R0svM1lrYbc7Xc0EAIfkEBQYAJgAsAQHsAB0AIAAABqFAgHBILAaOyGSyyCQqn8imFAqVNqlPKxOr1Bq5Ue8QHBYDyEezEB1Qn9HFkYZRkCAEY7jQUniC8G9kQhEDAQcXQh8JASR4bEIEhkUfARiOeg0BFlsBiI8XRxIco6R0FYFgAHxcp48VaVqfsFaPApkdsXoKlRtMiLZ6Hn0Qm4khD2t6qoVPEJeCRCITwyWNydBmtdnKYtrd3F7e4eC52N9kQQAh+QQFBgAHACwQAewAHQAgAAAGqkCAcEgkBo7IpLLIHCqfyWYTSpUyqVBrEfvUGrlRrxAcFpOR4vE5kAas2YAIKmRgDG42k3MtDEEnGWpnQicyOHoCFhoBMwJufFI4R45vWgsBEY+DQhYnOXZJF5pkQi5IGhypR6KVFEcuG1+sfCOMRRGro2AAJwEyRS25lQS2RBDCfB+vjh4loLObfkcDR8GYulxDLnV3Fo/QpGmV4pBm5V7j5pvq4ey75GdBACH5BAUTAAQALB4B7AAdACAAAAaHQIBwSCwGjshkssgkKp/IphQKlTapTysTqxQakpwrNwo4cTjf8HZ8LKYCaiM74IaL2fW4c56/j/treERvekNzdIN2gX+JhUKHgHKCQ4R+XJF7k0KVi5eNllhFFW0CpYZ8ciKmp5qbSBgDHY+obhoLRwizrVqQWpmMvrrAwb3BAMXEtMm7VodBACH5BAUGAAQALC0B7AAdACAAAAaIQIBwSCQGjsikssgcKp/JZhNKlTKpUGsR+9QaudFhxOWagpFEytF8DqTX17Z7qGawzwAH1iCUu2UcgUeBgQd9cm92cW2Jd2CNi3h0cFuIk4qVjJeOXJCZkgICdZxYiRGRj18joV+aQyJIEw8gh66vEAxHMbWgXn5eTpbAv8AAxMPCvslax8ptQQAh+QQFDQADACw7AewAHQAgAAAGlkCAcEgsBo7IZLLIJCqfyKYUCpU2qU8rE6sUOgI2LXcJ+Iat42gZLE4HvGy0G36eztfhmo+hSTnvZjFKKkNub2t8FwAXNwExhYBHEUMXkkKGcCBGARWXkTJFGpwCAJh4oaOln6gtnml0rK5jsESiraqvp7WppmaxuLO6Q7ayXLTDvHdakLnLxVjOzMHRptTKztXY18uGQQAh+QQFDQACACxKAewAHQAgAAACRYSPGMvtCY+bLUaKLcRUJz55CliJBlmaKGOea9ACL9zOsV2/t56vu99D/YRB0tBYBB2VSc7S2cxEO9NQ9XFNiXAqXndVAAAh+QQFBgADACxYAewAHQAgAAAGlkCAcEgsBo7IZLLIJCqfyKYUCpU2qU8rE6vUGrlR7xAcFgPIR7MQHVCf0UJHwJap+RialBMOkItiSipjfHJ4FwAXNwExg2RxaUMXR4dvjn0BIF8Va4RzRRoBHwKVYI90RKAtnJZ+n6GrpZenQ6mwXKauqqS3srm2WLior7vAvcK6bMG0w8lqzWbPYtFe01rVVtdTfNBoQQAh+QQFBgAEACxnAewAHAAgAAAGo0CAcEgkBo7IZLLIHCqfyGYTCpUyqU9rEavUGrlRrxAcFpOP4vE5DTgHiKRbgaERbZxrQGTy1ETUZEISARMWAgIUDwEOgGACLYR3QxEMARdtayUBdkUcASmYgQOEHKWmGJuhjmAjqlwmR4ZWbpgtWrQ8AQq3ax1Hl0Ueh7QCgwM0khYKDHfEe1TNeUIpnnQSIsDEZtJe2t3cvIFp3uGO4+CzZ0EAIfkEBRQABQAsdAHsAB4AIAAABrhAgHBILAaOyKSyyCQqn8mmFEqVNqlQKxP71Bq5Ue8QHBYDFuSjWYhOr8/pgEdtbcfhFKklgIkHACABKVIoAQd+AC+GUjOCiHsTYxgbAgIaATiIG2iUH0cVbAubdwATmAAsAwwyAJBwbqiCpQc7D4mLmiq4KoUZJ7KaF3w0csIflx6vZJVwJwZCMY0JobACJUcsQgpoINTLQi6fQi1HL95gQxV0oRbnXG+aZvFi8171Wvd1pPL79HFBACH5BAUNAAQALIMB7AAdACAAAAafQIBwSCwGjshkssgkKp/IphQKlTapTysTq9QauVHvEBwWA8hHsxAdUJ/RkaEFhFkMJjrKGD3s2P9HIHtkewoZQhEpMoNgiGlWbI4BF5B8QhoBBjCUW5YCFHVIIB9OnkIdM5hHIoxcUhcOR3FvhJUVa6adt7SNABYPKbMRJ4+8rr5JA0gtrVh9J6oxJ3rNVG6Rathm2mLcXt5a4JW122hBACH5BAUNAAsALAwA7ACUAUAAAAT/EMhJq7046827/2AojmRpnmiqrmzrvnAsz3Rt33iu73zv/8CgcEgsGo9I0mLJbDqf0Kh0Sq1ar9isdsvter/gsHhMrnrK6LR6zW673/A4+Syv2+/4vH7PX9L7gIGCg4SFan+GiYqLjI13iI6RkpOUkQImlZmam5yBkJ2goaKjVQNQn6SpqquTBFKorLGys4qwtLe4uWuYur2+v1wGTZcjwMbHyEwACwlTtsnQ0amXSwrCT8/S2tubEq8d3OHiostO2ePo6YYC5uDq7/CCCAvsCwftHPH6+3sFwyD8Agp8Y+qUu4EIE7Y5p7ChQywMH0qciG8DxYsYvyXZyLGjx48gyUOKHEmypMmTKFOqXMmypcuXMEMGmEmzZs2YOHPqlGmz58ydQIMKBeLT59CjSJPCKNpTqdOnUEEwtRm1qtWrU29e3cr1aFaaXcOKxfn159izaFGWDZC2rVuQa9/KnWskLt27eHnYzcu3r4y9fgMLRgF4sOHDHgojXsyYguLGkA8/jkzZ7+TKmO9ezszZ7ebOoMd+Dk166+jSqKGeTs0a6erWsIG+jk075uzauFnezs375O7ewEX+Dk684/DiyI8cT85cyPLm0HtEAAAh+QQFfgA+ACwMAAwBMQJgAAAG/0CAcEgsGo/IpHLJbDqf0Kh0Sq1ar9isdsvter/gsHhMLpvP6LR6zW673/C4fE6v2+/4vH7P7/v/gIGCg4SFhoeIiYqLjGwxARVHBgGUARxICpWUTZkyjVYvk5QvRx6beiABLmSmCxufeBWUHkUnlkUVIBgLGA8yFlYtIsCCmpoQZo+RRgccHJOXRzbOD6dLtp6wUiOVMTEwpdZ4qaRjra/adbIBtES2Ie7Gle1TO6qD8pTIZc/ESCm3lFwQl0SGARLpojAIoGJJBA0GTKBasMocRAEJ6ayjJ+TdEBiUalB4ZUFHAY5RyOFjl6GlyzkAoyUZGCCjF5oSEam0yZPIxv9aAQFM0tHFXrlAs/LEdEiwpxUKTQntdMrzZzx4Qij5e5qvUgcjLiYsnGAD3RAODCmEKFCAx9YhHgiIMnDgA5WkV2yoyvQtgz0Dy4RYMBZYWlAkOJMs0JStVI1HAzi0YHZPLgMNKfKu5QXCLi5dnEcWWUhB7YDLbx0szncUgIvVUc/egyE2MjCoHFac9sRtQGvBqi8ruCCt9vAD96AsNTKpoZDXmsxSRuG6dghiUBvgOiykA9rLElxECLYZww7PQyqwqB3jhGiw7HmAM0LhBGQHb4XEnXvCs9UhHmUFSRYXUHNaDNRIVoRYxjwgHVqZEFYETbwksNpdLEm3jTzfiaP/ljOkKbHcTLEJYWBy9LFlDAFAGWSMc1N0eBpe3eXzFRFsobAQbMQNIQM1+lBzY3cn5nTEWijYwhgA2WnCgyY4xCPPWwQ0uFZFUCzWY3rieAeiK0kgqeRqIwi22HYaAKXPjvPFCNtq7w1mI3Or7UJjjfIgRASFKp7yX0dB7SKTFlNRVsAyH7DFomyW9IjcSVdt+cEBGPL3nnK8tKTBKWIVNoQyInKXYk1MdIIEND2+sBgNUt5AXIEBxEBFcAigUwEE79F0gkuPcrTjcAJYsMuuozJxTpiWUDQSDi6IwKRWAigZSZWNyQAtABHYkoBZiSa3DopPXFmECKI+CyYSa63F/8KyzZrJAJp7jpLTBSIMmVKstRIZZwgdjAfoAlu6RmNJAa+DpaoZAupqepSai1KAAofUgr9XGIXYqj5pySjFJuyipxC7oHdFVzA+oRexLARkLXV0elrEiBeTeg24XDLAUZUTSEkxTQE7sU5+ZqWcJo7gjoURnuG8y9S5R8pbLADdmgmCfhoTAQGKEGZMcxMwC8VQ0kYa+jG8GQ9dY85Z/CyFuABuvSDNJjUM8oBH/AkAxBF3k5kVFtfdVAN0A4CWA+Me1hyhFdASgQoz9syEXmUKzEKNzrYc6qBgl+p2JoRr3ZLCowX+BOdL5AD3YTl6/rRAJQoe69FkDzY0bs8tgP+55FSL3rHbS2+Z2OpNyxoz2WfDzne5mi8KKLHDE+91vvTdebeoKhxgp+0aQtE3WKLegDVFL7/edgHDhE2golFATvn6lifRNfAFuS3B1te63tjcIjux6W9GLPbWOkYKEVyikh1jtU5dF+uBmRz0LAamohcJekZAsvM52fCvCWwTWOfg97YakEg7ZYuXJSZ2hf1xbX5vYh7UJJa9iEXwS0aqEvkuhTSgYMUxi5ncrHhXpam1yILbURqj9LEC84FCfCZLDgmU6DZQuQ95A5SZErAhCfotYDKAKovVdNeE07Dqgz2j4BAEmLsK1syATJtOAhdINbShsCv7AOAPpTAisZT/TIQtNBHvzOW8vMUKWFPwokMggyBnPEJu/9LH/fyYjwc5qYjuehgUayfEtfFQVKnQoetYdkZcrGc1t6sC46TINSZmAyBEad8/JsnH5FWRk2MMnGq0CESFcBGPQawgW2hIwNaVEVmwjFc0aNdAPV5QdfHAki3H8zsObjGYuOSS2eBCAtDUL0u3jIcGLkUuH+5JBJuJFcW6+QT1VEMfGBHjVZagTkse01oSoFPl7JfL+GWTjr7kXkV0c8om3lODS+OE/Kzoj1m+DYtQMKES/FdP/EXzoc1DF++ISVHs0ZN1pLyoOwEa0aZBs5MfsSgShIXEhO5RQDTsYagCoDwdTXN0/5SYDM9syM6qbdRvBTDe7oaUNSJw44YSjRwWhJY+8OHuqAe9XEBdSZkNRrKWUIUpUF9ZuIjEkpcZbSVGjRhVYbKxmAKz6kJ1h74pMO4S1bgjRJPa0bDWNKuam+qEmgKNJaiKgQ5zU+QAB8lYfix/gBqATmGqvLmKrgM0+g7HPLaEDD6lrEms3BL7mUq2jjRTS7BpEmbgthbciS/JtGw5r9nUDhINq2b85VaBuUawDgRtrYDenmQzA60p05btdGZXQ1cYC71Ut291GToicCfPsvI18fwUS0vR2OSkjAF7U4s4vPEC0dQnh6IsEV8QBVnXNepfZvzGlhAGtMb251W6yf+nPtmHyioKl2jzlKhc1VjFCaQqpqF9ppsKAKNb5SqHvLrTLteqVTCy1m/DtOhrfeQ0bJELPd3S4reOicEKORW4m1TC/LaJreBM8wXnsZVY5lvhBvRXAu8ZcTtGeRgQrCsnEdYa8xanAaEKRTxH04ss56TcfJD4CTMyAAQe4CkG7UikeoxQnxAqIErMyKhRMLKEirpPU+onQWMRkmEqhAF71axCmrzbiQrJAQWIMIVz3K3+8nEpFi+ZtwTObXqwbAlqpGaiE8wz2hLZJ9HJcEdCPqldV6PWW43ZzqKN3puW+OF8DMBx4WLzl1XkixZ3A9Aa0hFriLamkLzsnO1xHAz/HEDIEnh5ChewHqHhMxb+oSUFFpDgdegzggfYyS1UMOetZcCxyLI3OXySR2U/veq6vVGF1+PQhD7Zli/mV80+Kw9+6AMa89DQa6jVLUC6ojvHetW1SIZatX3hMgRcbQC1LZQULsQ9bufP20b4gHE84NlpLm4HcxlBr+ko7fyZRjgbeI03HSzBBviCq6k2XQE0UBfp6JozgDXEq6lC8Ypb/OIYj/TeMs7xjnv844yYOMhHTvKSm5wOIj+5ylfO8pYTStAuj7nMZ07zmtv85jjPuc53zvOe+/znQA+60IdO9KIb/ehIT7rSl870pjv96VCPutSnTvWqW/3qWM+61rfO//Wue/3rYA+72MdO9rKb/exoT7va1872trv97XCPu9znTve62/3ueM+73vfO9777/e+AD7zgB0/4whv+8IhPvOIXz/jGO/7xkI+85CdP+cpb/vKYz7zmN8/5znv+86APvehHT/rSm/706+Z2dFDPeqeofvWtj31CXl8J2dteG7RX7+13/4fcw5X3d0cLpA0hZ4mW1wm+B37fZYFIRRQfwVZIvvL33qlPPL9pEUd+7qevd1nglRHXjzcrx0p77ucdOWNbRPj7BzAMld/8d1/Ivhm262uL+0CA9IllQoMEjdUaPChhAarWbGZRQC1wTgcwf1nkfq8Hf3YXNat0aZgkaf+T9mbsR1JQ4lM8Zi7pVSd59D7a934OOHc/pRjLpR8psEjW0mWvkC0lhQP8Mk7SAwCLcSUX4AECGBgIw2vAQUs44BWvsG1qpVq2JIIjGHcpZ1h5lDtRol8aZkXt138nlR1hhhwqBGepZ4RH+HaHkzTDVzvJpYFXyD2FJSBmFoFLmFchdQMmyGRAtn1bGHepcyq8gALD90AvVFc+9UaV0Hw0eE8qtVrSlASPsHHYpIVxyHYLkX1MYiF9+G6qh1ceQEgPYEgneFrIcoYf1EdOWIQNmIhceEV2pQCiAC49AGXIwmFreIGZ+FacqFxDyATSB4ptZ0dOACtMk0m9Q1Oh84X/NRBu8FNvbRhIcEiLbGeF2KSDmGVgWzRwKPVEkLKJIeSFxIiIxnh2LpWMUWSIcMGEMsZ+X5gBCxFfwYg8EJiFn3iNavd8sZYC/uKC0UhJWEIvAxAYKoYnoqJZ7caDHfYxuSWMhuGHIZiO6ph2kyBcctJpoqgmU1KBlkBOvThob4Ii/4g81VeNBFmQ2HiJL0OKKuIeIzVuIvEZbHEQAZePUagEArgLvQBLFflbroWOqqeRakdc8Yhxr2FjnjiTNJl23PBRFHcaqXWIGdmTZbdwg0UVOnJbO8ltRpl2HyAC4WgTKZiGsliMT5mVI4OVWtmVMumUXhmWTdkVYlmWA8mTBWZpdUEAACH5BAUNAAMALI4ATAEdACAAAAZ6QIBwSCQGjsikssgcKp/JZhNKlTKpUGsR+9QauVGACwsBgMNj6sN8PgrHmIx8Pm+7xYGY1R545/d2fnpSfIKAbX5VbIh4ioV4enRzi2eGhIECcIeVkJtgllOYmpeMo6Glf6ScpleYXpSfr7BcsrNYtY+vuV67Wr2eXEEAIfkEBQ0AAwAsnABMAR0AIAAABoBAgHBILAaOyGSyyCQqn8imFAqVNhdUpZWZ1RZVgRu3G/2Gx+QA82M4oMlbY1odH87pgFBS8u4ORxwcBmdyaWuETnOHYoVwZoyJho99WYuUVJaNfpOalZyRjkRgkHaKn6WSLoGCR6scGwB3QiebsnVCtre5dbtxvVu/VsFTprpzQQAh+QQFBgAGACyrAEwBHQAgAAAGjkCAcEgkBo7IpLLIHCqfyWYTSpUyqVBrEfvUGrlRrxAcFpOR4vE5kAas2e4AheIYNCQW51qNYigvameBBBkmFhgBJ4FkahNEfRiLYHxEFEcCcYJxFUQRlwJvahRbcKGgcqSFpg2oX6p7m6mZjHGjrrOTsberrXqvmn62vrhcbaZmsMiayrTMuc7Facdeb0EAIfkEBQYACQAsuQBMAR0AIAAABqhAgHBILAaOyGSyyCQqn8imFAqVNqlPKxOr1IICLyF3af2GAeNoGSxOB7xstBt+dr+HqgljoIiYBXJpQzVKBiFxdkIfRyQmABUFjG2CABwBJ4CKkoFjbRZGiHMURxmZQhKhgpB3RA6pnaump69cj6SyqHVzGwsBFaC6lJaYeJuJtnGrtFiDhYcuk514EHszGX+ctVpOu9vR2t7ZzOHiVOTlVeTH4evedkEAIfkEBSEAAgAsyABMAR0AIAAAAkWEjxjL7QmPmy1Gii3EVCc+eQpYiQZZmihjnmvQAi/czrFdv7eer7vfQ/2EQdLQWAQdlUnO0tnMRDvTUPVxTYlwKl53VQAAIfkEBTUACgAs1gBMASsAIAAABsNAgHBILBqPgIByyWQin1Bhc7qMWo1U6nUrzTa5W+8XbBU7yVFzFQ1VK9ltN/zpDgxpBw2jYNBF0HVCL1kUZIEAJDMVG0IXIAE5AmCHSAUBhVyURxIBHZNyjToTlkwIn2pCFXsLBhIcHJYop2ZdB39DsLKZcqoFJkUGnbNiiAE3RRELwruoLZCSQyxKMMNeAMkBBIwZMku6YaCDTSy51Vl3D3sPKgAhy+Coc0ea8kP09Umg+Pb6+/nx/v7RCiiQGME6QQAAIfkEBQ0ABwAs8wBMAR0AIAAABqFAgHBIJAaOyKSyyBwqn8lmE0qVMqlQaxH71Bq5Ua8QHBaTkeLxOZAGrNnDDqewwEheEfV5SEBi6AEoemRCF0cwJoUiHYNgQikBE1Zvj5ECUgtrhQyRLXlbmpV+ChROoYogCUgVjVxSFhqWbqdMFEcbs3uvt7mEAC8gH7gAFBOylL9+nLy9jgARMKmcGiMZprpmtFrI2dhe3N/ak+KY5FNrQQAh+QQFDQAIACwBAUwBHQAgAAAGjUCAcEgsBo7IZLLIJCqfyKYUCpU2qU8rE6vUGrlRIs1luYKPRVDgZT6n14LtOfBmy90Ubkc4p184gAMBMYCAFXxzdW1gineMRDtwjlyNX26QkpaPQ5F2mpSYnk6JoYugnAEEplhFKGiTrJYGDxCHAH1MFwcYC0d7t6ReiJfCwMTCuMXGm8jBzcdeycV9QQAh+QQFDQAGACwQAUwBHQAgAAAGokCAcEgkBo7IpLLIHCqfyWYTSpUyqVBrEfvUGrlRrxAcFpOR4vE5kAas2UPXpNGYvEzO9XCufGzUZ0IzRxUCAhUFAQSAZAARCwEtRB1HGW56lAV4X5JvADIBHIZEDwEil4EEoUwOiqiNqiCsrp6gErOnnpmjeZ16j5GTla9ggoSGiK7EXHtPEH/LWEQvfHbQ0VVpntp63IHejeDF4szk0uZUQQAh+QQFDQADACweAUwBHQAgAAAGiECAcEgsBo7IZLLIJCqfyKYUCpU2qU8rE6vUGrlR7xAcFgPIR7MQHVCf0URX4ACocAqFSepNjs9TSg98YH4SDCEVERQpIINcfnMCkk5wQy9HF1eVQpcaU5sAcg6ffZaRmqWcp1ugoqSEpo2osKqyrKmhq1+4rrOPamxuwcCgXsNmx2LJxsVabEEAIfkEBRQABAAsLQFMAR0AIAAABq1AgHBIJAaOyKSyyBwqn8lmE0qVMqlQaxH71Bq5Ua8QHBaTkeLxOZAGLNYATYDEpBwja7YtMGHK+G5weAEXRTFzgWdCJYhDFneJZEItgEMKAQpqimoWTgGdkWCWmJOVoVyORxsAII2nWERyLRkMhJ6bQi+YKqavVUODE66+WUQOaF+4paTJklufz8oAFarRzkPHLFdwQx2Q1qIZh0c1U4JHGCIC6+CoaXlt8O9rQQAh+QQFIQACACw7AUwBHQAgAAACRYSPGcvdCZGbLFpKbcRTQ+54CliJB1maALqoBhu4KyvDNe3aOa7qPW/yBYEiYZHoMSaRGmWTeYFupB/qCHXD7rQ/7hBVAAAh+QQFBgAHACxKAUwBHQAgAAAGrkCAcEgkBo7IpLLIHCqfyWYTSpUyqVBrEfvUGrlRrxAcFpOR4vE5kAas2QCKSIJZJHit71l9HCyOCgJ8ZEITKxeCJjQNASqDYFYvASWPXFYVAQaVWEMdPHVJGJtVJiF9ExypmaNZNkcfRBarbmsAD41FLbNvAnUVRQS7tRq4Qx6MBaxdLJkWQhYGBcJ7EaDIAwjThHEKdRgKHrIDyktpvOa16Hvq22bp7uvw7V5vQQAh+QQFDQAEACxYAUwBHQAgAAAGnkCAcEgsBo7IZLLIJCqfyKYUCpU2qU8rE6sUhgIkLXcJKIHF46P3bE2ry+ypex1uz+EkyrcQsjjvXwoMSh1DbgFrAQ8XiRFCh41EBmyQcA5FIoqPgHFCH2+VX3VDFqCcNkWliAChnQCfBQKsnCGYmrNpkUOTL5u5cIqMGSdvuGNrgoSGp3qDfbK+x1p/v9PRXNbL1daV2d3cd97h4GlBACH5BAUGAAcALGcBTAEcACAAAAaiQIBwSCQGjshkssgcKp/IZhMKlTKpT2sRq9QauVGvEBwWk4/i8TkNOAeGEUKOMeC0vmQhpfAkONcmBgETF0IvR3dtay1HHkQKg2p5kA5FewERink3ASJXARaaYACCKJ93bqSdp6Jcqp5boK1YAJSWjbNUAIwLjkOQE5KjgYOFJjaIwq4Al0oKf3lCcTELBXZ4w2aA2tHc2V6paeHey+S04mdBACH5BAUNAAQALHUBTAEdACAAAAaNQIBwSCQGjsikssgcKp/JZhNKlTKpUGsR+9QauVGvEBwWk5Hi8TmQBqzZ7gBF8M3Ez+p5/U7Ob+FvfnuBcXpOgGuCh3aEDHJ/jImFkHxgigAXR5F4ABgBH0QfmpVcQhIBI0QOo4QiR6AbLgWskhEDSiq0nJg8DQUhFhm6fWmEZpLHu17Gy8jNylrM0WtBACH5BAUNAAgALIMBTAEdACAAAAaNQIBwSCwGjshkssgkKp/IphQKlTapTysTq9QauVEizWW5go9FUOBlPqfXgu058GbL3RRuRzinXziAAwExgIAVfHN1bWCKd4xEO3COXI1fbpCSlo9DkXaalJieTomhi6CcAQSmWEUoaJOslgYPEIcAfUwXBxgLR3u3pF6Il8LAxMK4xcabyMHNx17JxX1BACH5BAUNAAQALJIBTAErACAAAAbPQIBwSCwaj4CAcslkIp9QYXO6jFqNVOp1K802uVvvF2wVO8lRcxUNVSvZbTf86Q5sTwHRUxZQ1LcpAQ5PIQsqf1cUAQl0AReIVwWOR48FApBWHAEfQiIDLkItgklyW3x6ABMLEp15pGpcH6MZCzELG6mbr2Zgb6KHFaQZu2Jgth4sGJcjlV2wXHgfBgoAJxOiM868XCh9uoczATDaxVyKDLcAHksU5F5k6BNDGm/uWWQSCzJDI6tDmHOIAAxoTwvBIgMJJgy4cE5DOA/Z1AkCACH5BAUNAAIALK8BTAEdACAAAAaEQIBwSCQGjsikssgcKp/JZnMBhUqZVevVmF0SPUdTsesdggObMRlZjGgMYu4avXXO6XXAne4q2+dCHRyDYWqAX4Vya22Jf4uIeI5kjJFCe5RpipOQcZJdmIaPZo2Wd6Can5yhkxWDhAESrhZ6pghdH7SHeZd5uaJ1vLumvcHAw8K6xnNBACH5BAUGAAIALL0BTAEdACAAAAaIQIBwSCwGjshkssgkKp/IphQKlTapTysTqwRElK4rN5rhmAeB8HZ8LB7SYrYbvpYT36L4eK42sgN8elyBdXt3dH52Q3iCWISJhouITn+PlIpCjIWDhzKNVEUiAQ6fVUQWRxWbjnNHGAkeQn+ATCwYSBeylVqXkbwAs79DwcLAu8LEyMe/ycx/QQAh+QQFIQADACzMAUwBHAAgAAAGqkCAcEgkBo7IZLLIHCqfyGYTCpUyqU9rEau0ogIhIXcp/YYB42gZLE4HvGy0G352vwUAS6jQCOHMbWlCLQtKCnF2ABF8BxmKE0d1cy4BMSZDHgyIcw8BIkU8m4INAS1FMqJjJkcWp6lcqwGtRCSvWHKmtLZUADeeoLtVlAaXQpnBWYsBJ44XEJGBqgCEhshdQnqkfoByglpOc9/RsOLd0uKJ5eno4eze7mNBACH5BAUGAAIALNoBTAEdACAAAAaJQIBwSCQGjsikssgcKp/JZhNKlTKpUGsR+wRElK4rN5rhmAeB8HaMLB7SYnbADV/L6aI4G68f8+17RG9qRnJzgnWFd4iEToZ/ioFDg31ckI6Lk4mYkkIzm0KGh0MiAQ6VWEUWRxWAfnRHGAkeAKJNLBhIF7WPWpyvvryZvrbBwp1axcHKxL3LhkEAIfkEBQ0AAwAs6AFMAR0AIAAABoBAgHBILAaOyGSyyCQqn8imFAqVNhdUpZWZ1RZVgRu3G/2Gx+QA82M4oMlbY1odH87pgFBS8u4ORxwcBmdyaWuETnOHYoVwZoyJho99WYuUVJaNfpOalZyRjkRgkHaKn6WSLoGCR6scGwB3QiebsnVCtre5dbtxvVu/VsFTprpzQQAh+QQFBgAFACz3AUwBHQAgAAAGikCAcEgkBo7IpLLIHCqfyWYTSpUyqVCrwIhdMgkxpAXQ9RIhR0wsRiGXkUXL8VJ8w4mpAOdqDxRJenx2QgxYbX1+AA4cHAZHjIx0iH+BdX2UPIJvmJplnJaDeJVcoUOAJZ1dn6SbopmgraajTpeuqVirtKVCp7dVWrqxwJPAhLXFxMjHw8taycx2QQAh+QQFDQAmACwFAkwBHQAgAAAGoUCAcEgsBo7IZLLIJCqfyKYUCpU2qU8rE6vUGrlR7xAcFgPIR7MQHVCf0cWRhlGQIARjuNBSeILwb2RCEQMBBxdCHwkBJHhsQgSGRR8BGI56DQEWWwGIjxdHEhyjpHQVgWAAfFynjxVpWp+wVo8CmR2xegqVG0yItnoefRCbiSEPa3qqhU8Ql4JEIhPDJY3J0Ga12cpi2t3cXt7h4LnY32RBACH5BAUGAAYALAwATAEWAkAAAAb/QIBwSCwaj8ikcslsOp/QqHRKrVqv2Kx2y+16v+CweEwum8/otHrNbrvf8Lh8Tq/b7/i8fs/v+/+AeAaDhIWGh4iJiouMjY6PkJGSk5SVlpeYmZqbnJ2en6CZTqGkpaanqKmqq6ytrq+bo7CztLW2t7i5uruiTby/wMHCw8TFubLGycrLzM3Ov8jP0tPU1dbXh9HY29zd3t+dEYQN2b7g5+jp6ukFC+VM6/Hy8/TGJgYDATZQ9f3+/wBNCSBiSFvAgwgTKlQkoKC5hRAjSkRocKLFixi5DXyYsaPHj8migBxJsuQrLCZTqlyJSQiDRhVZypxJcxABRA0N8KvJs+fM/weDchaK6bOoUY8Bkj7YebSp04wOhnJ8SrVqRKJWs2qdh3Wr16/euoIdS1aa2LJo0xI7q7atW1yB4sqdS7eu3bt48+rdy7evX5dJAwsW/Lew4cOIE2cZzDiw4seQI0v+27jx5MuYM2u+U5nx5s+gQ4v20nnw6NOoU6suUprw6tewY0du7Vi27du48dJOmru37996dgcATry4cTXCjytfzlxL8ubQo0tP8ny69evMq2Pfzr239u7gw6v+Lr68ec3kz6tfnzg9+/fw9bqPT79+oPn28+vnvHu////80QbggAS6gV+BCCa4xYEKNuhgFAw+KOGER0RI4YUTWojhhgpqyB7hhwN6COKI+olI4onxmYjiiuqpyOKL4bkI44zYBQEAIfkEBYUAPwAsDABsAYEEgAAABv9AgHBILBqPyKRyyWw6n9CodEqtWq/YrHbL7Xq/4LB4TC6bz+i0es1uu9/wuHxOr9vv+Lx+z+/7/4CBgoOEhYaHiImKi4yNjo+QkZKTlJWWl5iZmpucnZ6foKGio6SlpqeoqaqrQzYBDpgKATJyHQG3txaCOLcmlh69rIwqr8JSxA9MFA4DC8FGJ7NasrR/Ebi3IpYKGM4BOFXACxtszbpjyIEWz6ok2O/YQq6wZ7LSRMw1VDgiHU3R1eBU4EDQ2bl98AzMoCCGVwABW0RMKMBAAwgVGeaI82UsUTozFPpV+pjk2q0YKJFQm3bvD0EOzVxUgnASJUMqG8t9I0PSz7r/ApdeiIiwxIILf0RWwEsor1i9k+SG5KPijt4SGQZI2Emw4OCUC0tvwQjzk+MVCt3gndBowIDZjoY+WDxTdaTCqwE0ZFSCVWuWvoQOLJA56efNKxHaQlxjjudcQBTYTaIIju+CEEYyam4gTfPeeWjsBcAotWWUuoW4epUCdqcJDx3SHu7i8G2VGHlbEIUtQQHc33iUlhgFEK5gwpJSXFbU+FRtS5SXCF9CccUR0E9xJytNgOry1K4RhgfQuvuX51dsFfCQGbh7OtNFgTDN6jgl1IiiO5csSX8S/EhwZp0RoL1gQEUjRFXEBzw0E4MC7Ekhyw4PhDdVKwdoQNFdRTAQ/xZ93mRTUg24FcBBC0ZwRsEyzXCwmgAVkOjgCbN1OB5ruRQRQl5JZciZBgkeUdYFJ3TDIXmSSeDUEyMsycQIGjYgQQoB7gTlAj4cwJ5g+hRhUoQAuBDiQ0pY4KOJVBJYIYIKOrGSSvdEgEIJBw7wgA22NQUCAANtOBaSDfCp5ABaXgcTlncSNUUHZx654EQYQDgfcuTNWecNeBqxIy43VLkiMxW9KIAKh2IwpaJCeBiWb18RcOkJH3i5QgiX+tUeC1FOMFSKVtZZqBCb3tKpEZF9aCMuASFh4IYE1Dhmsq0Us6wP0AoRQZGhtlbFtDGUkCY0gylxbYknEoGbqMCOlv/UmkC2OYQzF2RwZZZgOqHqUpVFu2efaEYbTxIsIrpasYJy9msRWLHZ3BBmGqlrjU8c6sx2vH7aIrrKSOQwil/1IhGW1QrB4IzxLugqyZ7KWyKNUYQkwcYIfxgAqveqxeuAMTswX4gzgDsmAxgnMSECLV0YZliz6VzqSxy8oGNB9DFM0bM2KjVmySKHRVrFVrRWoyw5+PszxIZNjUtU6G0AkwTuPukkwGZ7s2fFE2FTAZ88FoGCkx28xB8RTcJjK3l13+sWFMphhsSOpAVr9gR7JfUK0dhsV6zVnLpLAC5pqSvFC0jHHKIBDVLa4M+QI0zQgRRXXTMDWAuxeU0iSrX/urAvZdry497ouFTqC77+HREGFZ6j7LcP66XfeTH99KEhC4EWU/jcHmTOO+OyFhGtYRM4FcZ7E3tplBYx/TvmAWCfp9aGjyWq734T/t1QKH3pS+ODftE7FPcNdZ5NERz3xMI/26xpaj7yinomdou5SSh5Vdrb1abwswYuBgoOEczoNIcvIvzkJLjYGvHk9w76PUF4fGPeBJxnu6Xl7mbXOQn9Zhc7GeSGHNfKGxQmtAHORMVoJFgI2uYTNgINTwkziJoQWFcy0HkuVc4wjwW6sT3phaADqBIBvI6gmq4drzTAC6IJiaRDqZkoKx64QAvYljUyqe2IT6BcrMRFkV/J/yVcNmJcvGJEP/+ky2mZcQYAjzYYRS1DdwKo0ATOsYy3FaWMNirZCWSQryk6EnRKumIEKJCCuRWrASgghxwH+MQLiORzChgjES9YAWfs6oOmoyRHLMkqI7ZuhAxgFS1JGUprnTJnWFjlEBhUhElWUkO1bKMDsCYDB8ZPjx4w0xwld0twARKJSoSiBkyYgsNZM4YqgggN8fG+dP2NCRI8iJnG98ckMDGAHAMQKcHIyApZ5ZkFeMEeWbYoOOaMAZrkpDOtdc5W4nEDTjzMJ3s5Sn/RD3NeaUYt5RS9ODoSirnMyC6loCustUBAOKqdQa9HyK1FAwMKwoEm/cXOieVzn/8mdEJHF/PRBYgwfvlSlj+rBs5pmqAb3zLJair0LTcVY3ONW0D6LjhCiMVnCUkMWU3rBbYRAu9oKF2CHrkWji8CaqlcvNEHIxc8MoGqbfaaGAnQtbl74i2ruIRYO5vannNmoDpKqGmbvgQFH9YVKHADrOR2+lVyVvFoEBCDf3Z0ACLU9JpIKEsMlZdHycEVsU6Q5xS64VMnSBaMTRAfOi9KTsjCKXpaTABZsUlSh44QAWacTd2mUJx/ZHOqgMsbL7wZPHrQIC9k5WtctaDZwQ6UqQR1YxE0NLijNVZ6tUvXYT0kwtkdRFtd6IlVLUtc0pbJgv6KinCHMJGiHmGrw+3/gv7CmtPJUueJ3C2tvox4WNvuAEnbMdri4Ksn24aMh6Sk2UHfOsj5clU8N5FTieQqXw+KBW4PESZOJAZeukayspEtY+KQkBMjCna/2vCdebXqOS0S5qOKS8IWB8tO6I7nqQQOww2eaBCevhd+BoahGc3y2xggV6c8wMKBYCuFFQOqvVVK5n+8O1fWRpBJSnTHAJg6qWjd0hXjkBCTGwwut6LHQ3uByTlYcA/9NhmKPetukJe8gBZ7SZDE4s9nixW7pxq0TQ657i3cXAXtxq+XLi4wBiHZhDzb6KH+FFN96curNNOG0ADwo3uVgFdbAjNdpmLaodgI5fsCoEIM0a8p/6OEjREX95uailqNoRjT8cpjUNhYdKRvtDuteUkH7jPvZ4XUQGdwrGsJC+EQWiMBTffAQ6vxNaUtlM0OX9qdzdN0N0Js1BBDqinU5hO23pE/SAfasWUUx0oRg6u4eY7Ovitfw17XYj+zOsDD9tC4l7xmK8xOBAy2Yomw4VG72lg6W95BNrkM72pPegibYwF3ytrPWcT0tCDm6ozp58p0nWNG0gYRf61w6qboRRl/k6Cx1zRLyXw0jIRe9RLzAgM+H8PbfnR1HE+Hjct6lj8TASQRRw7JacVax+mhOefYC/AUP5nRuVW4OcNyVftiW30tMSiirId0qCpxyLz6NbILTv+4mtzuuRfuKv+E6GDcUb3s3635ajleJ0PK7EYqP+9Bjfxm5Rr2vR9q7SPXjKUs78iEWjT7obrtVrSD++Niy0uzxDP1Q/XyzukebEWY126Yw93uyuFcKoGchQMCFGdiswjU+s3bZef78CQ27RGiWteZQTlk2EF40ehDMNpWrqLrMwLWDy07aXygeVCMyttbG/f0MFnRTXB24peCNpPr0IY9ODCgtNfZflLWwsldOxOCtcL/1TrV1aAw9boe7UOBHafB9Lr3w07v94I+x71X+qcHHgUA93DKRpMo/GACaPgL7eobV2mz5izKdVZJR3Rid3r3BwL7x1+7FmfAdSCyFg7/lQZ5oUVrqAcA7vZt4PdeQdNXU/ZRIjhrnhFdUER4EBY8iDdss8IVXnVCb8N/HEg+ZlQ+e3Zw7HeCsgICLgg0OGgFMXJAHMBKJigARrY3padiGFh1EWd1IYNuBseEzkWDXNcyGgMVclc+5BWAnoNiR+MOGxAZwPOCSuhyp3F8WzaD/sdrgZKBmJWD8rBtA2Z97DNC2mcZBeBTD1hocNZgSqJ6qWYAs6FFx0V3U4BletiH2Md5y/Z+/XWAZ2ZvS4JUF2KBW/gn1ORfWZh16GeHnSh7xzWAXrSEajh//RdjGeZ6P/FrnXcPUMgEhsg1JTBwygeK78WKVoAbHkAiRsgC/2DhTU3iVjK3XqmogmUSJVJgiSvXS6/YZMFYd5VXTe/miZGFjFL4Fw/2iPEWHnp1gWaojU3oZIFEiqj2bPG3cIZnfEq1iexoYcDgYxYBFh9ANOnTDLhYhhFBWOCYgpb2SB+Giq6wgtPYHqCCY2d4fZ+og/VDH+oRUu6Sc+fogeaokFVAVAwnfT8IdJAoe/KHST9me56GXxeyYbx0jaunRPbnYgJGgCWYZeZyUecyiqdHNNdnEqe4h+n4PVvAWH+WVmZYHLG4jWhFDP/YgVkQDWvkG0XihRE5TDe4kRBojGmHVnkFSTaZkEZ4ULXllBi4geFEjcVokukXELnXRlgjDv8fSJFViZCR54RHp4lSSGYd2XOKyHE7xXon6WXs0A3yGGksgFToCIvkeJCWEYpRCYH1woZSiYpmKZF9ZnksGQWcJTohlWyu4ZF0ZHfzV4iDyQSTmVtThpH9qISOOIUbKQ5E5iX15ySFYx7dKHsbt4HFpES4JXtX9ZXZl5tcGYp/KJNVaXNk5oD+ljaswxrs1BqAtjlJSB6iqYK2II1CmWRV6YOqeYayQAMaqFRP1Fa+cyPIF5ZOKZCHiTiQVAPRxZNceU3c+ZJdCZlweJGLyQU7Qm3ruYXjkUkf+Z4Z2Zbi6GEGCXFxGXVPaU5qtp/Z6ZK9d1XzEQ1EwQ0z1m/UyT3/yBWU3mF0YgmNYSVrZ+l8KwhSsPmBW/eY0ImbatkEzEVKRQlypkGU4qVs7XFkZceZ37gEJ7qNoblcNlV07veDcgmayDFq1Wd1IXmg61hYvSeA2xiksxk974RQ2Viie3kZEUKUF5VaM8phZEhK5iEvfXeTw8kODjFiAHcAH7AbsQGmG3IQDJJY+plH9FeL/iIDhqQAgzNjCfBrDUOVEJZlJsGN2egBNIeC4Jk1K/gCF+EZE2GhyechW2pDAjhSvnA+yPF7OUoeoNKeI3p5kWOo0zQ9igooSvoEMaBPi5FQ6QkRFxAssfMTy+RLMtoEshmJJ1lRC0ZT4tmUUGmamNZE/6pCWzWgpkjKlQ+3hRDAq5UaQB+3QEw1KNipb+5CoXTYfioKQJT6Kwp2PbUXnqCJaF9kAYliLSelp7DqnmBpVBhQT1MTUngEqXHqGVo0TdfWRo4EreaKrpA2H5yWiqE6a821jz1KTh20Q2/DGS0hJugjg6k2daajQi+RTOfTDFEEh2g6JndCWp2TAE23qINpsN6DXqVYhY6amAAXOmU3flh5kgM6TEwjeKsROO+ASIkRFuJaJcBjjfZ5L7W5htGprcsnWp/zMyzgscHZAEZSZTcLsTnrAi9xPwShICR6V8rFsRBrVy6khRfIQEXoOAQkVsJTeNCqtLcjJS/RNgIHiP/q0wPF5jAEoWQPO7XVE7YN63//mlx5wah1KVOc4w3QOX81sarmRnu1YxL11j4y26bRyohCsrLlp0AVJKD/eHLdiUD+8UFxc49NALYu1LRY6W8c5gMUSzkhpUFTU03RELBvdXvz2plLELMgNAAsCoGT9wD/WbUaKbdRwy8Yu3irOaRQt6LsghEeC5oRe7MmSx4y0i8Sa3cqtSHa8J2ZkT0+VmsKWDD9Erw4ubPEmp+RVW4To3e+VDekswIGSa/KBE62ZjLkUpqd5CASsCscVbCX5Cu6YIjOO56Eyj2WIm/eO3PyBrxzSCqTlwFlizDyK4pHWrhQqlxyQiH6+5/fqyr/SuZZUAIzHkYo80tru5grctqcB8x85Xgsv2MozCJbH4JybjW34NoiLZCtGGSo6StouOazw3a85cKJsjoqS+O+5Wp8g3uhmXe+M3w/rMjCjIk8IJOVCjQDucJPWmYsm6uZyccNIBNNnKuYLqAd0YO7D6C7DAMqaFS/JdoqXBEpVHyjj2I2DhzDw/sebNzGboy3lvvG7tFKGSvHdnzHeJzHerzHXwBmfAwcTQJWfzzIhFzIhnzIqCBLhHt+iBwKRgGh09vIkjzJlFzJlnwHpLZvDnzJllBTJkKwiMTJojzKpFzKpsxxPNjAp4wJGcy+w7rKsBzLsjzLtFzLtnzLuJzL/7q8y7zcy778y8AczMI8zMRczMZ8zMiczMq8zMzczM78zNAczdI8zdRczdZ8zdiczdq8zdzczd78zeAczuI8zuRMyZhZzuiczuq8zm18zuz8zvAcz/LMCe48z/Z8z/icz4JQz/rcz/78zwCNBvwc0ARd0AY9z4yiwQ7cymySidu4vwcd0RI90dTsRGNjBJ5sNkN6zhdARRT90SAd0s0sRkMEScXSqpXiTJjpAbghyCL90jAd08DsR/hZmF0XwTKd0zq907GsJEgBo4UZszjN00Rd1Eb9x6NGsMLWRtrrcVGyyUcd1VI91cYgdaRzdm+Ih3JD1Vzd1V59CvrHZUg4Wv9KdTlffdZondaVoIymyJWjlazkq9ZyPdd0nQckuY2AhpY6imlNXdd+/deAnQaveaT9V9PSCihmG9iKvdiMvQUm0ahLrUzMpNJHZKqNfdmYndk/e7D8ldEEe0/1PGN1rNmkXdqa3Tf929a3dikbbGV1l22mHduyPdu0Xdu2fdu4ndu6vdu83du+/dvAHdzCPdzEXdzGfdzIndzKvdzM3dzO/dzQHd3SPd3UXd3Wfd3Ynd3avd3c3d3e/d3gHd7iPd7kXd7mfd7ond7qvd7s3d7u/d7wHd/yPd/0Xd/2fd/4nd/6vd/83d/+/d8AHuACPuAEXuAGfuAInuAKvuAM3uD/Dv7gEB7hEj7hFF7hFn7hGJ7hGr7hHN7hHv7hIB7iIj7iJF7iJn7iKJ7iKr7iLN7iLv7iMB7jMj7jNF7jNn7jFP12Z4PjPM7cOl7FPR7kvf3jUCzkRu7bRH7kSh7cSb7kTi7MYtYIRPx/ablsOv7kWO7LrcTIiTDlGZavRfbjWT7mujwRkawOQG5xVdDkZN7ms0zHj+DlX77mYu7mdg7LZbkIcl6HYX7ld/7npezHkZXKduKLiYsy6IvoXDMvB6Nvqb1ja2Qwm4yXff52gH7pnPx70XsECzS1XF66E8SVP3NTrDY/wms2tnI5FEuVdx0FbI7psN7Izyid1gID0OKo/0iRQ5elUlgEmy11GS8VTUzsRK19SKLOUC5qv31V57He7IYME1YLKOIqcxCpBME7a11Bmmknf1A3gSZQfGnl584+7n+8e+PocvwMxte4xhh9t+bjVWNdJXGMd5ZO7vaux8H6kmj0kYKRaZoGRz6nPbWbW8MxlW6oext3tfV+7wwvxyEqJD1oIp0lfgVEuOXX0hzc7f4YnzjarwovMw0f8m78mf8hxSbYmyS2TblqwO04qEX8kqT+8U4s8jT/G9WuDAKHoAsqmF+Tunxmnj2s7JDL58su7jV/9PVBfwc20FW4mRn/uvzIs+dOQcyO9FaPCvGeVmOU7BIK1OVruFAL28YcP/TtbnMwaPRXn/ajIOfemgKKouttEjg/+q7EOqUQnPGEVOwzkOociriuXvVqH/igYO4lW7k+c9HyCrEV+6pajerY+/Irt69WvvCCX/mdEHuJHsCD7jC/ylTLyybEuMNC0ij9R8RkP0CqRQWvbvmsz8qC7gliAtGCifatX/uU0CSlmQnNcIcyj8C2//uTYfaZsDdiX/SUD/zI7wgf4AJXCgkIEO1nf/zJP/3cvPrUf/3YbP3Yv/3TrP3c//3O7P3gP/5uEAQAIfkEBQYAAwAsjgDMAR0AIAAABoRAgHBIJAaOyKSyyBwqn8lmE0qVMqlQaxH71Bq5SRA4IB4fd+OyeZNpZyxHi7ttPgqIlPjdWS/mAxRbfXh6X2uEgHtCdQF+hXyHQ3+BhmOOiZVgl5SQloici4OSj6GRQpOKAIybqaufraKnpKqxAKiZXF6dmrq0plquusFew8C1VsXIdUEAIfkEBQ0AJgAsnADMAR0AIAAABqFAgHBILAaOyGSyyCQqn8imFAqVNqlPKxOr1Bq5Ue8QHBYDyEezEB1Qn9HFkYZRkCAEY7jQUniC8G9kQhEDAQcXQh8JASR4bEIEhkUfARiOeg0BFlsBiI8XRxIco6R0FYFgAHxcp48VaVqfsFaPApkdsXoKlRtMiLZ6Hn0Qm4khD2t6qoVPEJeCRCITwyWNydBmtdnKYtrd3F7e4eC52N9kQQAh+QQFDQAEACyrAMwBHQAgAAAGi0CAcEgkBo7IpLLIHCqfyWZzAYVKmVXr1ZhddpQWbDda4Zip4eI4SsQE0tx1oJh4i+V0u1o+b+vja3lwTnyCd4F+g0J8fYSKAFR4iYdjhnuSQ3WPjJaAlZOXiEMSASSUXUUiATEXAp6olwMJEIuFRRcgBUc+tZhbkLa/wL5bkaK/nMLDx8XByM7NckEAIfkEBQ0ABAAsuQDMAR0AIAAABodAgHBILAaOyGSyyCQqn8imFAqVNqlPKxOrFBqSnCs3CjhxON/wdnwspgJqIzvghovZ9bhznr+P+2t4RG96Q3N0g3aBf4mFQoeAcoJDhH5ckXuTQpWLl42WWEUVbQKlhnxyIqanmptIGAMdj6huGgtHCLOtWpBamYy+usDBvcEAxcS0ybtWh0EAIfkEBQ0AAgAsyADMAR0AIAAAAkWEjxjL7QmPmy1Gii3EVCc+eQpYiQZZmihjnmvQAi/czrFdv7eer7vfQ/2EQdLQWAQdlUnO0tnMRDvTUPVxTYlwKl53VQAAIfkEBQYAAwAs1gDMAR0AIAAABqFAgHBILC4CyKQyWWwSl1Cm0xmNTqnV5bWZ1W6fXelXeAwHxkMzEk1WswHlMMAR6HAYjwtlgve0zXNLEzEFRyF/cnSHF4YmHg0LG3BudAhCfCiXAReTgHQVmh9Cn52JARajpwKBoHFdrKmgsGpnpAI7qrOUubYHqrSwwb6iwL28v7uowsiex7LDAsXOsaWvbK5Zb8DXbtyA3nLg1mjYVdpqQQAh+QQFBgAEACzlAMwBHAAgAAAGo0CAcEgkBo7IZLLIHCqfyGYTCpUyqU9rEavUGrlRrxAcFpOP4vE5DTgHiKRbgaERbZxrQGTy1ETUZEISARMWAgIUDwEOgGACLYR3QxEMARdtayUBdkUcASmYgQOEHKWmGJuhjmAjqlwmR4ZWbpgtWrQ8AQq3ax1Hl0Ueh7QCgwM0khYKDHfEe1TNeUIpnnQSIsDEZtJe2t3cvIFp3uGO4+CzZ0EAIfkEBQ0AAgAs8wDMAR0AIAAAAkWEjxjL7QmPmy1Gii3EVCc+eQpYiQZZmihjnmvQAi/czrFdv7eer7vfQ/2EQdLQWAQdlUnO0tnMRDvTUPVxTYlwKl53VQAAIfkEBQYABAAsAQHMAR0AIAAABp5AgHBILAaOyGSyyCQqn8imFAqVNqlPKxOr1Bq5Ue8QHBYDyEezEB1Qn9FECwizGEx0lDFc2Kn7jyB6ZHoKGUIRKTKCYIdpVmyNARePewAaAQYwk1uVFHRIIB9OlXwzl0cii1xSFw5HEWuknBWxg5S0b7YWDymwABEnjrmMABZJA0gtqlhDNCenMSd5y1RukGrXZtli217dWt+UttpoQQAh+QQFEwAGACwQAcwBHQAgAAAGokCAcEgkBo7IpLLIHCqfyWYTSpUyqVBrEfvUGrlRrxAcFpOR4vE5kAas2UPXpNGYvEzO9XCufGzUZ0IzRxUCAhUFAQSAZAARCwEtRB1HGW56lAV4X5JvADIBHIZEDwEil4EEoUwOiqiNqiCsrp6gErOnnpmjeZ16j5GTla9ggoSGiK7EXHtPEH/LWEQvfHbQ0VVpntp63IHejeDF4szk0uZUQQAh+QQFBgAHACweAcwBHQAgAAAGoUCAcEgsBo7IZLLIJCqfyKYUCpU2qU8rE6vUGrlR7xAcFgPIR7MQHVCf0QBbQEZgxFoZR8NQWcPlBQyCDBwLDWlvZHEBGBkbBogTAX1siydCLAEgmAEoiWCLI5wsQiRzApVyIqWnoZ9crrEpp6mtK7a0f7iirrUurDLAr1iyu8NUxbyzOsdVqsK9ur8CpsHRimqV2XDb2Gba39zh3mLg5WhBACH5BAUNABQALC0BzAEdACAAAAahQIBwSCQGjsikssgcKp/JZhNKlTKpUGsR+9QauVGvEBwWk5Hi8TmQBqzZw48jsYgpLs718AS1qM9CMgEYKhsAEXwYhm+HDAs4RRMBKW56NgESTC4BB5WAIIMcoqMGmJ5kABxcD6dgqZNajKAEsZamVoweC7BMEa1cgUcuGUIXIgMVv1h7VMmMcSAYjhA1fspVac9metnc24Dd4N+o4eTjYEEAIfkEBQYABAAsOwHMAR0AIAAABo1AgHBILAaOyGSyyCQqn8imFAqVNqlPKxOr1Bq5UeLLFbmCj1+c+ZwWbM+B9pv94pYB8HiLw18EJnx8GUJ5cl9sTgFqc2CGiYhDBYprjYmLh5WRk4xcjpqQhJuYnUQxAS2UpEMgfxucWEUWfgwaDxODeHBMFRwDs3eFXp+ZwsHCuaBexsW6x8jEys3HeUEAIfkEBQYAAwAsSgHMAR0AIAAABpxAgHBIJAaOyKSyyBwqn8lmE0qVMqlQaxH7BFCOWu7yywiLkQKy+Rzwgq1ssBoed5dbkkbhEHHWyStKBhtCcW1kDSgmGykLASoCAIZpRyxEBwEnhX9HFEQoARKbbHaEQy2ho2elRKgaqmKsp6mSnGWttJNzs6+1pLtCrrBcssG5tkXCvqvAAMqTWn6k0cNY1NVV19DU29Hda6vacUEAIfkEBQ0AAwAsWAHMAR0AIAAABopAgHBILAaOyGSyyCQqn8imFAqVNqlPKxOrZCoCsi03WjyBBcbx0WsgidXaNDw+VK+Fi2RYPh6GOBwJZ3xcXoNOdoZ7iHNEX4t1iWWHkY1DM5R4ko6ZAHYBimiMfZOQmpZCmKaem5edn6mAgQExsgqnpBhYE7iFdL1Yv8BUwqyoWrDCyb/LdM1xn0EAIfkEBUIAAwAsZwHMARwAIAAABpdAgHBIJAaOyGSyyBwqn8hmEwqVMqlPaxGr1Bq5Ua8QHBaTj+LxOQ04BwQUdEvSKBwiznW8sFIaNmpkAHsNKCYbKQsBKgJtekcsRAcBJ4FgcEcURCgBEpZcg2hELZ2fWKEMRaQaplSoqqWOgnuwrLKXtKOxbq+6try5Q6utVcFCw7egxgDIvGnOZmvP0tGC09bVl9fa2VxBACH5BAUUAAcALHUBzAEdACAAAAa5QIBwSCQGjsikssgcKp/JZhNKlTKpUGsR+9QauVGvEBwWk5Hi8TmQBqzZwshswIC0Lkf1OU5PjvJua0ISARoUQgoLgG8AFkeHQxOLgjKFRTaTezsBNUUVmWQAhCKeoGCiAS6lC3qhm51En6yBe5USl6ZcjY9ED7lYg4UXiIqzjAB4RzEMqb9VcSd0E3fOWVKfGq2nUn8E2rqNL8NCKsyQxaEALcoGBUc2ToLII77MDhVfe2nHZvL9Z0EAIfkEBVYAAgAsgwHMAR0AIAAAAkWEjxnL3QmRmyxaSm3EU0PueApYiQdZmgC6qAYbuCsrwzXt2jmu6j1v8gWBImGR6DEmkRplk3mBbqQf6gh1w+60P+4QVQAAIfkEBQ0ABwAskgHMAR0AIAAABp5AgHBIJAaOyKSyyBwqn8lmcwGFSplV69WYXUovR2HX2wQvxGPk1YxOB9ZhgDtejs/fQs8Jw5jQ2HJzeQNKBHaCAA8BExdCJ2qBbgAWR41DGoeSMotFL5lpiQEsRRWfY6EipKZdqKpnkaAOoq5toJsSnatZk5VEBrpVQoqMjpB3g4XAWgB6CX0tgMdbTojTsKfW16zZ0tbd099b4Vd3QQAh+QQFBgADACygAcwBHQAgAAAGgECAcEgsBo7IZLLIJCqfyKYUCpU2F1SllZnVFlWBG7cb/YbH5ADzYzigyVtjWh0fzumAUFLy7g5HHBwGZ3Jpa4ROc4dihXBmjImGj31Zi5RUlo1+k5qVnJGORGCQdoqfpZIugYJHqxwbAHdCJ5uydUK2t7l1u3G9W79WwVOmunNBACH5BAU7AAkALK8BzAEdACAAAAagQIBwSCQGjsikssgcKp/JZhNKlTKpUGsR+9QauVGvEBwWk5Hi8TmQBqzZQg/BwGAYTh+B+hwfIDELR05rQiABDxdDHweDfACAeXpXhI8BCFZvhUcuFJJfjhkPST0kJpKZiiyiRxynlEwyRy17ZFYOASO0YLa4ulxCMZySL0cWvli0fpuNtQAVIw+AAzyRx1VpqGavXtnc21rd4N+Y41JvQQAh+QQFBgADACy9AcwBHQAgAAAGlkCAcEgsBo7IZLLIJCqfyKYUCpU2qU8rE6sUOgI2LXcJ+Iat42gZLE4HvGy0G36eztfhmo+hSTnvZjFKKkNub2t8FwAXNwExhYBHEUMXkkKGcCBGARWXkTJFGpwCAJh4oaOln6gtnml0rK5jsESiraqvp7WppmaxuLO6Q7ayXLTDvHdakLnLxVjOzMHRptTKztXY18uGQQAh+QQFDQAHACzMAcwBHAAgAAAGokCAcEgkBo7IZLLIHCqfyGYTCpUyqU9rEavUGrlRrxAcFpOP4vE5DTgHhhFCjjHgtL5kIaXwJDjXJgYBExdCL0d3bWstRx5ECoNqeZAORXsBEYp5NwEiVwEWmmAAgiifd26knaeiXKqeW6CtWACUlo2zVACMC45DkBOSo4GDhSY2iMKuAJdKCn95QnExCwV2eMNmgNrR3NleqWnh3svktOJnQQAh+QQFBgADACzaAcwBHQAgAAAGk0CAcEgkBo7IpLLIHCqfyWYTSpUyqVBrEfvUGrlRrxAcFpOR4vE5kAas2QBHwJap+RialHMtlItiSipqZ31HEhcAFzcBMYNkhQwRQxdHiG+FIFsBFW58cXNFGpsCl590RKItnYSmoaOlfq6qsKCor56xtrO4tUOpq4+tusBghSSyxFxtpWaezazPwdHFacxe1lpvQQAh+QQFBgAmACzoAcwBHQAgAAAGoUCAcEgsBo7IZLLIJCqfyKYUCpU2qU8rE6vUGrlR7xAcFgPIR7MQHVCf0cWRhlGQIARjuNBSeILwb2RCEQMBBxdCHwkBJHhsQgSGRR8BGI56DQEWWwGIjxdHEhyjpHQVgWAAfFynjxVpWp+wVo8CmR2xegqVG0yItnoefRCbiSEPa3qqhU8Ql4JEIhPDJY3J0Ga12cpi2t3cXt7h4LnY32RBACH5BAUNAAYALPcBzAEdACAAAAaoQIBwSCQGjsikssgcKp/JZhNKlTKpUGsR+9QauVGvEBwWk5Hi8TmQBqzZQ9ekwYg5Uib1eTg/YhhHF3pkQiiBQxYKgm5rQgcBI1ZvjgEHko0ALUcEFRtXmAIKSRoyGU6gABQiIYABGBGDYFIqBZWxXFYvAQ+3WLm7vVUAIDUWAkIftS7BWcNIf0c3nox7qXIxCw0PJMfMXWmT4Kha4Wbjl9Xm6V7l7GtBACH5BAVJAAgALAUCzAEdACAAAAaNQIBwSCwGjshkssgkKp/IphQKlTapTysTq9QauVEizWW5go9FUOBlPqfXgu058GbL3RRuRzinXziAAwExgIAVfHN1bWCKd4xEO3COXI1fbpCSlo9DkXaalJieTomhi6CcAQSmWEUoaJOslgYPEIcAfUwXBxgLR3u3pF6Il8LAxMK4xcabyMHNx17JxX1BACH5BAUGAAQALAwAzAEWAkAAAAb/QIBwSCwaj8ikcslsOp/QqHRKrVqv2Kx2y+16v2AjYUwum8/otHrNbrvf8Lh8Tq/b7/i8fs/v+/+AeWGDhIWGh4iJiouMjY6PkJGSk5SVlpeYmU6BnJ2en6ChoqOkpaane5uoq6ytrq+wsbKzgk20t7i5uru8vaQALSJpqr7FxsfIycqrO2rEy9DR0tPU1FXV2Nna29yhAGTX3eLj5OXmbc/n6uvs7bQ9a+nu8/T19n0CAM3Dtvf+/wADDgPhrJ/AgwgT/pOnsKHDh8oE7EPDEKLFixhlEeTHJKPHjyBnVQxJsqRJOhLH5JtysqXLl/HYjIRJs2bIGiul2NzJ8+OK/wADKBrsSbToQwgPwA01yrSpvY1mZjqdSnWd1KpYs267qrWr12Vcv4oduyss2bNoX5lNy7btKE1w48qdS7euXbtu8+pd5QSo37+A7woeTLiw4cONACv2i7ix48eQIzdevFiy5cuYM2s2RFnx5s+gQ4seDaBzYNKoU6teHdf0X9awY8ueTcg1Y9q4c+vencQ2UN7Agwtf7TvA8OPIk0surry58+d0mUOfTr26I+nWs2vfzgU79+/gwy/xLr68+e/kz6tf/zw9+/fwgbuPT78+7Pn28+sHjX+///+Q9QfggAQKJmCBCCaYyYEKNujgIww+KOGEhURI4YUYamFhhhx2CCfFhh6GKKIRII5oYoglnqgihimu6OKDLb4oI4IxzmjjfzXeqKN9QQAAIfkEBVMBPwAsDADsATYDYAAABv9AgHBILBqPyKRyyWw6n9CodEqtWq/YrHbL7Xq/4LB4TC6bz+i0es1uu9/wuHxOr9vv+Lx+z+/7/4CBgoOEhYaHiImKi4yNjo+QkZKTlJWWl5iZmm4vCxJoDgE2m1oUoQGoSAqihSoBD1aho6RUIay0uE8fMbmBKK9fDKg0RRioJFGdn2eyvVQRBagYMQaqt0g4Ih14rjdKES4uSc1qFi7bgrYizn3Z6Gi7SRcwJwbHSSIawiAU2DsDDDSIs2LhlEABRyIQMBaDQAQkKiQ0KCCBGDdPwVCBIGIB1TUnykB9ZAfSU4YlMgwgw4fxzq4DSigsYDBupBkSwNLZJFkHZ4j/chiS6PO4cwMEosOMdJhJdNmUpUxRwSpyASDSC0YIIF3A4uJPL8LsbRjCIkCMoiydliHH08kJtFBWtBwkkyYStmh8DlLXNo/eMy9uhNWgoEKxCTpaAJxVRGuBDwIUpiLiYd/DXQG6SqnM9fJEHcXM9qOgbwLCIYpvuZgJ2c6vr12EjQjQYgiGBxzgHgnJTHffISB8N/lLd/IRvIBzBioh/LcauTzODJ3AIIQxB0JtQvt4dgSRsjlMDPllNwp48ULInxSSGquQuoZtZ/6unM7rjB03AuhoI/dKARWwQN0CDfVTBG/vJWDTCNOlcFoUzSwUEAJUGcdefULkE9A6AFlg/wQJD0Rz0BSrHSDZhEjMhBWDAZnIEQgM8VPhVh4VEdVOwtA4XxEd5EagBC889ARnY6UYgJC2eKQWcDqaRUSEDDlohEJnUVRbEcjpcyUA0eioABQzoCXDjhkOiFiRBwawUQW5WcmkDESk1hoYplTpgIc8LgRQgUauaA8GLgJgAzCBbYgmmCNpOURHQWmo0qFP5EjjlweaGSQVq/xUAS8AvpAdY+NZKGh9xgw0hDAqSFFqaKkK8RZsQuRGKQBVBYDneypuhoJ1+4AqRUHRhEDBfXEeMN0IQgqxnYFE/FKNMHWOpcOR/uFKY6uhOlWrZot2Kal+EIpiT1TY7ieqYhqEFv9VPbZSNmBUGiT7RIlVMuWdEay9m9SFW71DKwcA7wkwwFgGvJMDBlM0sK+OBTRRc/IxOyNZBk9VKcBSDXzCk+IiVa65WxGAZaILbBlCwgNz4OsSKWDIMbb6zmRxs2oigJRpsV7zAapikOcxR0R5+7EQ+SIV36AIIzXDFFnS1u0AwX0LaRMI9/Bnyr7GPFcUCnoYDxOrjsxBER0dae2tOYsMBWdoAyCrfKZmOJcr5Z3qdLhbQbBeFKkRlSnNN4rG8b2Dc2krDU7f5vYtpnSQrAiouCf3Mh509yCtAAWK2cpMGITVBSFyai2P9Y3ZQGsts8YxBHiS5jITq0Uud7s2qln/QJAeFBTftied6MFxvoUJZ1ry7EurNuFK+aGay3tTfN0jV/d56KFN4F4nJRdOhKJpRkdFXdm5t8pjCGFWQ/PVVRDBsOAytw7I/npRwTHrYc8sDiE4TlautfOV+wlHk0Z8HPO75LGqW/uaH1eqAJ0kjC8+5ptCrjbVhLBpr3WowKDZ1ueq18VEdhwEwN+Idjc8QQU9Y0oXrdxjj7i5RQY4QI8F9DGrIQWrd0epDwkAWKQL/EN0AEjN5Wr1EGhtQHqsqFZN/jOqT2ynhh3EDtCCgrdkERGBpFMhCVFAH9oF0UmUMWJcjCdClzElhkc4z/YWaATkAG94EPFgE7HQCSgy/2ll3fgg9DjGACvKjl8FjKL2ItY8K0RDXkOoFUK2468TpmkB4GojKxQ4tC7og4llhIkS+FK7BUgMcFsSgIKU94Smbals3BrVANAjBeLMyCKAVNUkgfgpI+SAFaY7SQJTsZ11QE6LTnBkNHwJxmUt7hMKXM/fFGgYiajtCoy63HAI9CDOwKpPROgQzb5ixOBo5XdKRALkntnEJ0pTlJ7E1wZLybxsehF8cVJOMoHmRTfagpTTLMDldrbHmSwNCbf0FYIGuUQlNDCOtJxCNDFHxcWhcZt6rEkkDRefTM4oWaYsZBVy0xrIeeqLX3GkO0M5qnVKEoY8EwM8p4gSMtntn/+7EZU3pZDRs9WubfPyoEjtRtJ8OmAh51Tnyka5uMTdLXUg+8RqGloSmihQqcWUHVNMUDbxEWom63hbFvjHzvcx6ZrAC2UN2CgEe6DDiFDBWTg3UKgbaXJyQ0Ek5l6RMoNV1C0km9NKL6RFnACThLcSWF0VBMeS9OCitespT0m3AFbesXNwOWhCZNYCudqwsfAj4dTy+LyCjnQIIfLqYnOWtex1zwpjeh91lpHayZ00TX8Vm4jIQJ66YsxsFPOWRtSpWNfSJ5V4Le0pZ+JYilJBsqcdmWhz6jecunOok6SIeFSnypxBphMJZRlxjTKA60a1XWWBU1UFWRUDYMUWdmz/QkHGRRTJOSGl+7vmBXSgNVKudKVilA1pEzQTahjsrSVVklKaREaq5RWL8eyrGb1IYIjJbaJH9CJgkWBMeu7tsUsQnkG3NmCPAOqTTqBOP8DjnY7MLFsRvcuBy2qTaJhsxRCtws64eZax2GKA7SToHGtCFM5xwXRN6uEtzYIbDpwFwBOOoxT3B1wDCxfBOs4pWAWp3Gf0p8e1DM1yBbBLzNInttoNgDR/qawMMlaZLnNmK5U0sD9GSsI7ttaPDAaDA1o0yStgQQGVGArzxnh2AJwJhSyshZqCLIvxVeet3HyFEim6du4No0kP/doMR5bDU9qhggrchLFycQIH2IHe/+TS5DgfYa9V/iyLuWjnxWGSe3+mAjUVsxTIHJLKOnY0j1OQOhD/WI5M8jOKIU2839IUxguNsmGtgeTFlVoXMZgvdW1pE8GQc64aROQIdcHoMo5ti2f27XOjUBcXdpsJZo515uS11h1rM8mpnjaud4y9QFI6C4ZGNQyU49dHw7vRGGIbsY3UtnmmWgkaLrZ6S7NmBUADAb/IwFsUy1lsbPeNrba2v13d6mFXocY1CMoAkKWcFFba4xg/ZlC14Cx0w1krEOaqRpl87I+O9t7ywaeUmX3yp3DqVUZaWZi+jUAhCVzZemybmlcN21D18247l3SkXU7SMcHG4Dm/6PyAKP/GKl9xjZFE0FGWNJH4bRTGXxc31kF2Kx64FLXnurjdpp51mlWj59YgXEyn3FlyiyadtNEH3St+arkffL/zBhlG3y5vcWMq8F+qh2JgulNwx7rSnCks2ShAdyfg4NySLq49Yj7py4e35olF4CcbH/UOS9PFBOHUvvV2BMuF2+njzj1VOH/hm1v+zl9NZLqtZW8n376rYkP5XFn9srcUtuvaY3Rqwj6XbX15ap3fJIwjfNc96b6MXoQKiIufT7EuOPtqhFvKEX5pMCehbMWtICTv3hDDKx+x60f8F/t4/UQ7vaeE9xS2QwyuECZ1VmZQt3+9p2sq5lV042vbkV1PMHr/CYGAEjN96pR9SNMYb5dPoIRA54N7x+VBjAQ4vcc3nDIbS9ZxCDhoLKZ36VdSJwh+ZldGQIRUqMFVtdJ9LnUAwMZza1R6YcZK1pSDTEWDAxZX7uRrSrRaBUQ3OWZq9SZ8gsMeJ4N6axRKEqEB/XAiwGQ6EJQjaBM6p7QKUxNmCYAn9pOBz7MxOcN/eJd/RgAf4rQDerVaTIMKmjE+EMZXSwAQeqdjsGYC9kB7bEVGqDQWfdNTdFgFdbFBAtRFYUhW90dQbnd3M4KJAqgm7gEOI3JMT/gwpKeBGHJ6KMhGi4hApqVAmrc2nNZB5AMg0ZBeYOMCnpFZlKFLO3JhjgEZ/74jaS4SQWn0ikUoi29XGqPBcPG0AJcSO3PCbSV0CkL4DbOIQzJDhWqTAUDGfO6EIdCHYWXTFFZ3edSjipLyg3I4iArUYxgiGNJgOxIWATkUOGe4YbziLaUmcx12I5XEcccxMAoSAxoDPET2AHGDPWbxMAOQfRt2VPdAMSgzMFMDOXMWSYY2V24lSVGREgnIJAXpQu+lHENBbTczkZCkfaJlTjNCdCMYODoTOAYZhSREiitYRs+2BBLBjlDHKFHzMM5zdh+pLpJiiFwjDYXoT5czLTpyYfW1j0xxYpIYRzcClbUSNFPXMPZSC/2ij02AOEqjQ4EjIMw4jCPxjfqHP/8iQkzUtyTW9x1m4gAkYFmW9mRxchRQ4wHokhV/Ig7vVpcCA0MN54X9yJVkAyP+5Gv6J5R5owrsVUPg8EOG8j0WMnzgt5S78S6aeJHHYyewxIEoMohumSO0iJNkMi0smVuROXNy+ICMdVfPICENQBgkhZaGwoBzp3AP9nifuZPUxJE50Iq60ph6ORFnYmU+QllZEWSVYhn+sCcg6U4/eWoGYXNUAZsOARGhUxGodTUeQphMACzSQyyokZ2pwknH5xzY9AW2iZ7sqQhA157mkgCK8DV0sIPwSSLo+Bs7o4ldsJ73+Z+BYAyu6RzJdgj0OQc2A5UA+ocd6Rx6hkDc2J//HbigFJoH5fWfBWoIByoHE1ehnQZG8KkkVtGHW+CfHnqiKPoGGVoI85Cif7ApOdJbfeEC96idPTOhLpqjOqpS9rejPtphGiCjPzqkRFqkRnqkSJqkSrqkTNqkTvqkUBqlUjqlVFqlVnqlWJqlWrqlXNqlXvqlYBqmYjqmZFqmZnqmaJqmarqmbNqmbvqmcBqncjqndFqndnqneJqnerqnfNqnfvqngBqogjqohFqohnqoiJqoirqojNqojvqokBqpkjqplFqplnqpmJqpmrqpnNqpnvqpoBqqojqqpFqqpnqqqJqqqrqqrNqqrvqqsBqrsjqrtFqrtnqruJqrurqr7Lzaq776q8AarMI6rMRarMZ6rMiarMq6rMzarM76rNAardI6rY/QYDVCrdgKrdYqKtnarci6rWLmreL6rds6ruZarOB6ruoarOm6ru7Kq+36rvJ6q/E6r/Yqq/V6r/raqvm6r/6Kqv36rwI7qgE7sAbrqQV7sAqbqQm7sA5LqQ37sBL7qBE7sRarqBV7sRpbqBm7sR4LqB37sSK7pyE7siZrpyV7siobpym7si7Lpi37sjJ7pjE7szYrpjV7szrbpTm7sz6LpT37s0I7pUE7tEbrpEV7tEqbpEm7tE5LpE37tFKro1E7tVZ7okEAACH5BAVkAAIALAwALAIOAEAAAAIqhI+py+0Po5y02ouz3rz7D4ZRQJbmiabqyrbuC8fyTNf2jef6zvf+/yoAADs=\n" }, "metadata": { "image/png": { "width": 600 } }, "execution_count": 26 } ] }, { "cell_type": "markdown", "source": [ "# Reindex\n", "\n", "Now that content is stored, embedding indexes can be rebuilt with different configuration settings." ], "metadata": { "id": "PR3TzkzrjJ6x" } }, { "cell_type": "code", "source": [ "# Print index info before (info() is also new!)\n", "embeddings.info()\n", "\n", "# Reindex\n", "embeddings.reindex({\"path\": \"sentence-transformers/paraphrase-MiniLM-L3-v2\"})\n", "\n", "print(\"------\")\n", "\n", "# Print index info after\n", "embeddings.info()" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "fAz7Jv6SjR6a", "outputId": "d9ffddea-bb78-4664-b0a4-ea8eccdba8f9" }, "execution_count": 27, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "{\n", " \"backend\": \"faiss\",\n", " \"build\": {\n", " \"create\": \"2022-05-10T20:32:24Z\",\n", " \"python\": \"3.7.13\",\n", " \"settings\": {\n", " \"components\": \"IDMap,Flat\"\n", " },\n", " \"system\": \"Linux (x86_64)\",\n", " \"txtai\": \"4.5.0\"\n", " },\n", " \"content\": \"sqlite\",\n", " \"dimensions\": 768,\n", " \"objects\": true,\n", " \"offset\": 7,\n", " \"path\": \"sentence-transformers/nli-mpnet-base-v2\",\n", " \"update\": \"2022-05-10T20:32:25Z\"\n", "}\n", "------\n", "{\n", " \"backend\": \"faiss\",\n", " \"build\": {\n", " \"create\": \"2022-05-10T20:32:26Z\",\n", " \"python\": \"3.7.13\",\n", " \"settings\": {\n", " \"components\": \"IDMap,Flat\"\n", " },\n", " \"system\": \"Linux (x86_64)\",\n", " \"txtai\": \"4.5.0\"\n", " },\n", " \"content\": \"sqlite\",\n", " \"dimensions\": 384,\n", " \"objects\": true,\n", " \"offset\": 7,\n", " \"path\": \"sentence-transformers/paraphrase-MiniLM-L3-v2\",\n", " \"update\": \"2022-05-10T20:32:26Z\"\n", "}\n" ] } ] }, { "cell_type": "markdown", "source": [ "# Index compression\n", "\n", "txtai normally saves index files to a directory. With 4.0, it is now possible to save compressed indexes. Indexes can be compressed to tar.gz, tar.bz2, tar.xz and zip. txtai can load compressed files and treats them as directories.\n", "\n", "Compressed indexes can be used as a backup strategy and/or as the primary storage mechanism." ], "metadata": { "id": "s9aLt2zF2ZW2" } }, { "cell_type": "code", "source": [ "# Save index as tar.xz\n", "embeddings.save(\"index.tar.xz\")\n", "!tar -tvJf index.tar.xz\n", "!echo\n", "!xz -l index.tar.xz\n", "!echo\n", "\n", "# Reload index\n", "embeddings.load(\"index.tar.xz\")\n", "\n", "# Test search\n", "embeddings.search(\"lucky guy\", 1)" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "0oOC8ToG1pyn", "outputId": "7e6dd30f-a1cd-47f1-d298-abacf7b69835" }, "execution_count": 28, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "drwx------ root/root 0 2022-05-10 20:32 ./\n", "-rw-r--r-- root/root 301 2022-05-10 20:32 ./config\n", "-rw-r--r-- root/root 77824 2022-05-10 20:32 ./documents\n", "-rw-r--r-- root/root 10898 2022-05-10 20:32 ./embeddings\n", "\n", "Strms Blocks Compressed Uncompressed Ratio Check Filename\n", " 1 1 45.8 KiB 100.0 KiB 0.458 CRC64 index.tar.xz\n", "\n" ] }, { "output_type": "execute_result", "data": { "text/plain": [ "[{'id': '4',\n", " 'score': 0.3691234290599823,\n", " 'text': 'Maine man wins $1M from $25 lottery ticket'}]" ] }, "metadata": {}, "execution_count": 28 } ] }, { "cell_type": "markdown", "source": [ "Note the compression ratio. Depending on the type of data stored, this could be quite substantial (text will compress much better than objects). " ], "metadata": { "id": "x5iSRKZVYfok" } }, { "cell_type": "markdown", "source": [ "# External vector models\n", "\n", "txtai supports generating vectors with [Hugging Face Transformers](https://github.com/huggingface/transformers), [PyTorch](https://github.com/pytorch/pytorch), [ONNX](https://github.com/microsoft/onnxruntime) and [Word Vector](https://github.com/neuml/staticvectors) models.\n", "\n", "This release adds support for pre-computed vectors using external models. External models may be an API, custom library and/or another way to vectorize data. This adds flexibility given the high computation cost in building embeddings vectors. Embeddings generation could be outsourced or consolidated to a group of servers with GPUs, leaving index servers to run on lower resourced machines. \n", "\n", "The example below uses the [Hugging Face Inference API](https://huggingface.co/inference-api) to build embeddings vectors. We'll load the [exact model as in the first example](#scrollTo=0p3WCDniUths) and produce the same results." ], "metadata": { "id": "oAISaBYVg60i" } }, { "cell_type": "code", "source": [ "import numpy as np\n", "import requests\n", "\n", "def transform(inputs):\n", " response = requests.post(\"https://api-inference.huggingface.co/pipeline/feature-extraction/sentence-transformers/nli-mpnet-base-v2\",\n", " json={\"inputs\": inputs})\n", "\n", " return np.array(response.json(), dtype=np.float32)\n", "\n", "# Index data using vectors from Inference API\n", "embeddings = Embeddings({\"method\": \"external\", \"transform\": transform, \"content\": True})\n", "embeddings.index([(uid, text, None) for uid, text in enumerate(data)])\n", "\n", "print(\"%-20s %s\" % (\"Query\", \"Best Match\"))\n", "print(\"-\" * 50)\n", "\n", "# Run an embeddings search for each query\n", "for query in (\"feel good story\", \"climate change\", \"public health story\", \"war\", \"wildlife\", \"asia\", \"lucky\", \"dishonest junk\"):\n", " # Extract text field from result\n", " text = embeddings.search(f\"select id, text, score from txtai where similar('{query}')\", 1)[0][\"text\"]\n", "\n", " # Print text\n", " print(\"%-20s %s\" % (query, text))" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "d6dbB3nMk29O", "outputId": "fab5be50-4714-4935-8d1f-0955960d1cec" }, "execution_count": 29, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Query Best Match\n", "--------------------------------------------------\n", "feel good story Maine man wins $1M from $25 lottery ticket\n", "climate change Canada's last fully intact ice shelf has suddenly collapsed, forming a Manhattan-sized iceberg\n", "public health story US tops 5 million confirmed virus cases\n", "war Beijing mobilises invasion craft along coast as Taiwan tensions escalate\n", "wildlife The National Park Service warns against sacrificing slower friends in a bear attack\n", "asia Beijing mobilises invasion craft along coast as Taiwan tensions escalate\n", "lucky Maine man wins $1M from $25 lottery ticket\n", "dishonest junk Make huge profits without work, earn up to $100,000 a day\n" ] } ] }, { "cell_type": "markdown", "source": [ "The next example uses [spaCy](https://github.com/explosion/spaCy) to build vectors and then loads them into txtai. The vectors with this model are much faster to generate at the expense of accuracy." ], "metadata": { "id": "_kq_fbjd4g74" } }, { "cell_type": "code", "source": [ "%%capture\n", "!pip install spacy --upgrade\n", "!python -m spacy download en_core_web_md" ], "metadata": { "id": "IIGlel3ShyGP" }, "execution_count": 30, "outputs": [] }, { "cell_type": "code", "source": [ "import spacy\n", "\n", "# Load spacy\n", "nlp = spacy.load(\"en_core_web_md\")\n", "\n", "def transform(inputs):\n", " return [result.vector for result in nlp.pipe(inputs)]\n", "\n", "# Index data with spacy pipeline\n", "embeddings = Embeddings({\"method\": \"external\", \"transform\": transform, \"content\": True})\n", "embeddings.index([(uid, text, None) for uid, text in enumerate(data)])\n", "\n", "# Run search\n", "print(embeddings.search(\"select id, text, score from txtai where similar('nature')\", 1))" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "nikKybxEkhjC", "outputId": "15dcd1cf-7068-4365-e6fe-8af29ff24b9d" }, "execution_count": 31, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "[{'id': '3', 'text': 'The National Park Service warns against sacrificing slower friends in a bear attack', 'score': 0.44850602746009827}]\n" ] } ] }, { "cell_type": "markdown", "metadata": { "id": "aDIF3tYt6X0O" }, "source": [ "# Wrapping up\n", "\n", "This notebook gave a quick overview of txtai. txtai 4.0 is now out!\n", "\n", "See the following links for more information.\n", "\n", "- [4.0 Release on GitHub](https://github.com/neuml/txtai/releases/tag/v4.0.0)\n", "- [Documentation site](https://neuml.github.io/txtai)\n", "- [Full list of examples](https://neuml.github.io/txtai/examples/)" ] } ] } ================================================ FILE: examples/25_Generate_image_captions_and_detect_objects.ipynb ================================================ { "cells": [ { "cell_type": "markdown", "metadata": { "id": "4Pjmz-RORV8E" }, "source": [ "# Generate image captions and detect objects\n", "\n", "txtai as the name implies works with text and ai, pretty straightforward. But that doesn't mean it can't work with different types of content. For example, an image can be described with words. We can use that description to compare an image to a query or other documents. This notebook shows how images and text can be embedded into the same space to generate image captions and detect objects." ] }, { "cell_type": "markdown", "metadata": { "id": "Dk31rbYjSTYm" }, "source": [ "# Install dependencies\n", "\n", "Install `txtai` and all dependencies. Since this notebook is using optional pipelines, we need to install the pipeline extras package." ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "id": "XMQuuun2R06J" }, "outputs": [], "source": [ "%%capture\n", "!pip install ipyplot git+https://github.com/neuml/txtai#egg=txtai[pipeline]\n", "\n", "# Get test data\n", "!wget -N https://github.com/neuml/txtai/releases/download/v3.5.0/tests.tar.gz\n", "!tar -xvzf tests.tar.gz" ] }, { "cell_type": "markdown", "metadata": { "id": "PNPJ95cdTKSS" }, "source": [ "# Create a captions instance\n", "\n", "The captions pipeline takes an image or list of images and generates captions. This pipelines works using a combination of an image encoder model and a text model. " ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "id": "nTDwXOUeTH2-" }, "outputs": [], "source": [ "%%capture\n", "\n", "from txtai.pipeline import Caption\n", "\n", "# Create caption pipeline\n", "caption = Caption()" ] }, { "cell_type": "markdown", "metadata": { "id": "-vGR_piwZZO6" }, "source": [ "# Generate captions\n", "\n", "The example below shows how to generate captions. A list of images are read from a directory, passed to a caption model and text descriptions are returned." ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 1000 }, "id": "-K2YJJzsVtfq", "outputId": "7cfd549a-1db6-47b9-c4ae-623e94ed48d1" }, "outputs": [ { "output_type": "display_data", "data": { "text/html": [ "\n", " \n", "

\n", " \n", " \n", " \n", "
\n", " " ], "text/plain": [ "" ] }, "metadata": {} }, { "output_type": "display_data", "data": { "text/html": [ "\n", " \n", "
\n", "
\n", "
\n", "

a clock on the side of a wall

\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
\n", "
\n", " \n", "
\n", "
\n", "

a painting of flowers on top of a table

\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
\n", "
\n", " \n", "
\n", "
\n", "

a computer screen with a picture of a person on it

\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
\n", "
\n", " \n", "
\n", "
\n", "

a blurry photo of a sunset with a sky background

\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
\n", "
\n", " \n", "
\n", "
\n", "

a blurry photo of a bunch of stuffed animals

\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
\n", "
\n", " \n", "
\n", "
\n", "

motorcycles are parked on the side of the road

\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
\n", "
\n", " \n", "
\n", "
\n", "

a large tree branch with a person in it

\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
\n", "
\n", " \n", "
\n", "
\n", "

a city street at night with traffic lights

\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
\n", "
\n", " \n", "
\n", "
\n", "

a book shelf filled with many books

\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
\n", "
\n", " \n", "
\n", "
\n", "

a large building with many windows in a city

\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
\n", "
\n", "
" ], "text/plain": [ "" ] }, "metadata": {} } ], "source": [ "import glob\n", "import ipyplot\n", "\n", "from PIL import Image\n", "\n", "# Get list of images\n", "images = glob.glob('txtai/*jpg')\n", "\n", "# Generate captions\n", "captions = caption(images)\n", "\n", "# Show image/caption pairs\n", "ipyplot.plot_images([Image.open(image) for image in images], captions, img_width=425, force_b64=True)" ] }, { "cell_type": "markdown", "metadata": { "id": "dQmxNGkXw-YN" }, "source": [ "Reviewing the captions, they are all generally in the right ballpark but far from perfect. The default model does a decent job but more robust models are necessary to fully deploy an image captioning model. " ] }, { "cell_type": "markdown", "metadata": { "id": "GxjHgXnz1MCD" }, "source": [ "# Create an objects instance\n", "\n", "The objects pipeline takes an image or list of images and generates a list of detected objects. This pipeline works using an object detection model." ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "id": "unoteHQM1l6V" }, "outputs": [], "source": [ "%%capture\n", "\n", "from txtai.pipeline import Objects\n", "\n", "# Create objects pipeline\n", "objects = Objects()" ] }, { "cell_type": "markdown", "metadata": { "id": "HvZE-bww1v1k" }, "source": [ "# Detect objects\n", "\n", "The example below shows how to detect objects. A list of images are read from a directory, passed to an object detection model and detected objects are returned." ] }, { "cell_type": "code", "execution_count": 12, "metadata": { "id": "F_qrDbdv2IVu", "colab": { "base_uri": "https://localhost:8080/", "height": 1000 }, "outputId": "18642f80-faeb-4fc7-f907-6acc6bdf32fc" }, "outputs": [ { "output_type": "display_data", "data": { "text/html": [ "\n", " \n", "
\n", " \n", " \n", " \n", "
\n", " " ], "text/plain": [ "" ] }, "metadata": {} }, { "output_type": "display_data", "data": { "text/html": [ "\n", " \n", "
\n", "
\n", "
\n", "

[('clock', 0.9837772846221924)]

\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
\n", "
\n", " \n", "
\n", "
\n", "

[('vase', 0.9913519620895386)]

\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
\n", "
\n", " \n", "
\n", "
\n", "

[('cell phone', 0.9672072529792786)]

\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
\n", "
\n", " \n", "
\n", "
\n", "

[]

\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
\n", "
\n", " \n", "
\n", "
\n", "

[]

\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
\n", "
\n", " \n", "
\n", "
\n", "

[('motorcycle', 0.9990019202232361), ('person', 0.9853999018669128)]

\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
\n", "
\n", " \n", "
\n", "
\n", "

[('bird', 0.9167556762695312)]

\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
\n", "
\n", " \n", "
\n", "
\n", "

[]

\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
\n", "
\n", " \n", "
\n", "
\n", "

[('book', 0.9250583648681641)]

\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
\n", "
\n", " \n", "
\n", "
\n", "

[('umbrella', 0.9032363295555115)]

\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
\n", "
\n", "
" ], "text/plain": [ "" ] }, "metadata": {} } ], "source": [ "import glob\n", "import ipyplot\n", "\n", "from PIL import Image\n", "\n", "# Get list of images\n", "images = glob.glob('txtai/*jpg')\n", "\n", "# Detect objects\n", "detected = objects(images)\n", "\n", "# Show image/objects pairs\n", "ipyplot.plot_images([Image.open(image) for image in images], detected, img_width=425, force_b64=True)" ] }, { "cell_type": "markdown", "metadata": { "id": "dEX-dbXE3U7W" }, "source": [ "Reviewing the detected objects, once again they are all generally in the right ballpark but far from perfect.\n", "\n", "This model or larger models may do well for a specific use cases in which the model has a high accuracy. For example, the results could be filtered on only accept certain types of objects, which have shown to have high accuracy." ] }, { "cell_type": "markdown", "metadata": { "id": "HeN8e1uy-icp" }, "source": [ "# Wrapping up\n", "\n", "This notebook introduced image captions and object detection. While the default models for both tasks aren't where we'd like them to be, they provide a good baseline to build on. For certain, targeted use cases where the models excel, they can be used now. This is a fast-evolving area and it is fully expected these models will improve!" ] } ], "metadata": { "accelerator": "GPU", "colab": { "collapsed_sections": [], "name": "25 - Generate image captions and detect objects", "provenance": [] }, "kernelspec": { "display_name": "Python 3", "name": "python3" } }, "nbformat": 4, "nbformat_minor": 0 } ================================================ FILE: examples/26_Entity_extraction_workflows.ipynb ================================================ { "nbformat": 4, "nbformat_minor": 0, "metadata": { "kernelspec": { "name": "python3", "display_name": "Python 3", "language": "python" }, "language_info": { "name": "python", "version": "3.7.6", "mimetype": "text/x-python", "codemirror_mode": { "name": "ipython", "version": 3 }, "pygments_lexer": "ipython3", "nbconvert_exporter": "python", "file_extension": ".py" }, "colab": { "name": "26 - Entity extraction workflows", "provenance": [], "collapsed_sections": [] } }, "cells": [ { "cell_type": "markdown", "metadata": { "id": "POWZoSJR6XzK" }, "source": [ "# Entity extraction workflows\n", "\n", "Entity extraction is the process of identifying names, locations, organizations and other entity-like tokens in unstructured text. Entity extraction can organize data into topics and/or feed downstream machine learning pipelines.\n", "\n", "This notebook will show how to use the entity extraction pipeline in txtai with workflows." ] }, { "cell_type": "markdown", "metadata": { "id": "qa_PPKVX6XzN" }, "source": [ "# Install dependencies\n", "\n", "Install `txtai` and all dependencies." ] }, { "cell_type": "code", "metadata": { "_uuid": "8f2839f25d086af736a60e9eeb907d3b93b6e0e5", "_cell_guid": "b1076dfc-b9ad-4769-8c92-a6c4dae69d19", "trusted": true, "_kg_hide-output": true, "id": "24q-1n5i6XzQ" }, "source": [ "%%capture\n", "!pip install git+https://github.com/neuml/txtai" ], "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "source": [ "# Extract entities\n", "\n", "Let's get right to it! The following example creates an entity pipeline and extracts entities from text. \n" ], "metadata": { "id": "0p3WCDniUths" } }, { "cell_type": "code", "metadata": { "_uuid": "d629ff2d2480ee46fbb7e2d37f6b5fab8052498a", "_cell_guid": "79c7e3d0-c299-4dcb-8224-4455121ee9b0", "trusted": true, "id": "2j_CFGDR6Xzp", "colab": { "base_uri": "https://localhost:8080/" }, "outputId": "6276538e-5b06-4038-ae0f-0aa642e86814" }, "source": [ "from txtai.pipeline import Entity\n", "\n", "data = [\"US tops 5 million confirmed virus cases\",\n", " \"Canada's last fully intact ice shelf has suddenly collapsed, forming a Manhattan-sized iceberg\",\n", " \"Beijing mobilises invasion craft along coast as Taiwan tensions escalate\",\n", " \"The National Park Service warns against sacrificing slower friends in a bear attack\",\n", " \"Maine man wins $1M from $25 lottery ticket\",\n", " \"Make huge profits without work, earn up to $100,000 a day\"]\n", "\n", "entity = Entity()\n", "\n", "for x, e in enumerate(entity(data)):\n", " print(data[x])\n", " print(f\" {e}\", \"\\n\")" ], "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "US tops 5 million confirmed virus cases\n", " [('US', 'LOC', 0.999273955821991)] \n", "\n", "Canada's last fully intact ice shelf has suddenly collapsed, forming a Manhattan-sized iceberg\n", " [('Canada', 'LOC', 0.999609649181366), ('Manhattan', 'MISC', 0.651396632194519)] \n", "\n", "Beijing mobilises invasion craft along coast as Taiwan tensions escalate\n", " [('Beijing', 'LOC', 0.9996659755706787), ('Taiwan', 'LOC', 0.9996755123138428)] \n", "\n", "The National Park Service warns against sacrificing slower friends in a bear attack\n", " [('National Park Service', 'ORG', 0.9993489384651184)] \n", "\n", "Maine man wins $1M from $25 lottery ticket\n", " [('Maine', 'LOC', 0.9987521171569824)] \n", "\n", "Make huge profits without work, earn up to $100,000 a day\n", " [] \n", "\n" ] } ] }, { "cell_type": "markdown", "source": [ "The section above is running an entity extraction pipeline for each row in data. The outputs are the token(s) identified as part of an entity, the type of entity and score or confidence in the prediction." ], "metadata": { "id": "bVd_qHh97IlJ" } }, { "cell_type": "markdown", "source": [ "# Feed entities to a workflow\n", "\n", "The next section demonstrates how the entity extraction pipeline can be used as part of a workflow. This workflow uses the output entities and builds an embeddings index for each row. This effectively computes entity embeddings to compare the row similarity with a focus on mentioned entities." ], "metadata": { "id": "fY71Dyyt8Arv" } }, { "cell_type": "code", "source": [ "from txtai.embeddings import Embeddings, Documents\n", "from txtai.workflow import Workflow, Task\n", "\n", "# Create workflow with an entity pipeline output into a documents collection\n", "documents = Documents()\n", "workflow = Workflow([Task(lambda x: entity(x, flatten=True, join=True)), Task(documents.add, unpack=False)])\n", "\n", "# Run workflow\n", "for _ in workflow([(x, row, None) for x, row in enumerate(data)]):\n", " pass\n", "\n", "embeddings = Embeddings({\"path\": \"sentence-transformers/nli-mpnet-base-v2\"})\n", "embeddings.index(documents)\n", "\n", "for query in [\"North America\", \"Asia Pacific\"]:\n", " index = embeddings.search(query, 1)[0][0]\n", " print(query, \"\\t\", data[index])" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "yB3e6HVM8dRJ", "outputId": "8424ce5d-9652-421d-d0bd-261b911ea7d6" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "North America \t Canada's last fully intact ice shelf has suddenly collapsed, forming a Manhattan-sized iceberg\n", "Asia Pacific \t Beijing mobilises invasion craft along coast as Taiwan tensions escalate\n" ] } ] }, { "cell_type": "markdown", "source": [ "# Run workflow YAML\n", "\n", "Below is the same example using workflow YAML." ], "metadata": { "id": "u_0WiV1NvbCK" } }, { "cell_type": "code", "source": [ "workflow = \"\"\"\n", "writable: true\n", "embeddings:\n", " path: sentence-transformers/nli-mpnet-base-v2\n", "\n", "entity:\n", "\n", "workflow:\n", " index:\n", " tasks:\n", " - action: entity\n", " args: [null, \"simple\", true, true]\n", " - action: index\n", "\"\"\"\n", "\n", "from txtai.app import Application\n", "\n", "# Create and run workflow\n", "app = Application(workflow)\n", "for _ in app.workflow(\"index\", [(x, row, None) for x, row in enumerate(data)]):\n", " pass\n", "\n", "# Run queries\n", "for query in [\"North America\", \"Asia Pacific\"]:\n", " index = app.search(query)[0][\"id\"]\n", " print(query, \"\\t\", data[index])" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "3qmRdU7LvhQ3", "outputId": "e01453fc-5bbf-41a6-e557-64fa453f80ba" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "North America \t Canada's last fully intact ice shelf has suddenly collapsed, forming a Manhattan-sized iceberg\n", "Asia Pacific \t Beijing mobilises invasion craft along coast as Taiwan tensions escalate\n" ] } ] }, { "cell_type": "markdown", "source": [ "# Wrapping up\n", "\n", "This notebook introduced entity extraction pipelines with txtai. This pipeline supports a number of different configurations to help feed downstream systems and/or directly use the entities.\n", "\n", "As with other pipelines, the entity extraction pipeline can be used standalone in Python, as an API service or as part of a workflow!" ], "metadata": { "id": "i3kkN5Vpx8ks" } } ] } ================================================ FILE: examples/27_Workflow_scheduling.ipynb ================================================ { "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "name": "27 - Workflow scheduling", "provenance": [], "collapsed_sections": [] }, "kernelspec": { "name": "python3", "display_name": "Python 3" }, "accelerator": "GPU" }, "cells": [ { "cell_type": "markdown", "metadata": { "id": "LjmhJ4ad9kBL" }, "source": [ "# Workflow scheduling\n", "\n", "Workflows are a simple yet powerful construct that takes a callable and returns elements. They are streaming and work on data in batches, allowing large volumes of data to be processed efficiently. When working with streaming data, workflows continually run until the data stream is exhausted. \n", "\n", "Workflows can also be scheduled to run. In this case, a static set of elements, dynamically expands. For example, an API service endpoint that returns items, or polling a directory with files coming in and out. \n", "\n", "This notebook will show how to use workflow scheduling in txtai." ] }, { "cell_type": "markdown", "metadata": { "id": "8tLWvo9v-Q0u" }, "source": [ "# Install dependencies\n", "\n", "Install `txtai` and all dependencies." ] }, { "cell_type": "code", "metadata": { "id": "Fa5BCjMFqVKE" }, "source": [ "%%capture\n", "!pip install datasets git+https://github.com/neuml/txtai#egg=txtai[workflow]" ], "execution_count": 6, "outputs": [] }, { "cell_type": "markdown", "source": [ "# Create workflow action\n", "\n", "Workflows run a series of tasks to transform and process data. This section creates a callable object that can be used as a workflow action. The object iterates over a dataset, returning a batch of data." ], "metadata": { "id": "EJ_hHmQtRgQM" } }, { "cell_type": "code", "metadata": { "id": "3hYRk9JnsM0J" }, "source": [ "from datasets import load_dataset\n", "\n", "class Stream:\n", " def __init__(self):\n", " self.dataset = load_dataset(\"ag_news\", split=\"train\")\n", " self.index, self.size = 0, 2500\n", "\n", " def __call__(self, fields):\n", " outputs = []\n", " for field in fields:\n", " output = []\n", " for row in self.dataset.select(range(self.index, self.index+self.size)):\n", " output.append((self.index, row[field], None))\n", " self.index += 1\n", "\n", " outputs.append(output)\n", "\n", " return outputs" ], "execution_count": 7, "outputs": [] }, { "cell_type": "markdown", "source": [ "# Build workflow\n", "\n", "Next we'll create the workflow. The workflow reads batches of data from a stream and loads it into an Embeddings index. We'll run this workflow four times on a scheduled interval to demonstrate a scheduled workflow." ], "metadata": { "id": "_B4YFu-1R2QC" } }, { "cell_type": "code", "source": [ "from txtai.app import Application\n", "\n", "# Run up to every 5 seconds 4 times\n", "workflow = \"\"\"\n", "writable: true\n", "embeddings:\n", " path: sentence-transformers/nli-mpnet-base-v2\n", " content: true\n", "\n", "workflow:\n", " index:\n", " schedule:\n", " cron: '* * * * * 0/5'\n", " elements:\n", " - text\n", " iterations: 4\n", " tasks:\n", " - __main__.Stream\n", " - upsert\n", "\"\"\"\n", "\n", "app = Application(workflow)\n", "app.wait()" ], "metadata": { "id": "1oZag3tKWkfe", "colab": { "base_uri": "https://localhost:8080/" }, "outputId": "54d6ce91-d782-421a-e092-7e78bb6ee1d0" }, "execution_count": 8, "outputs": [ { "output_type": "stream", "name": "stderr", "text": [ "2022-02-03 02:12:06,720 [WARNING] _create_builder_config: Using custom data configuration default\n", "2022-02-03 02:12:06,727 [WARNING] download_and_prepare: Reusing dataset ag_news (/root/.cache/huggingface/datasets/ag_news/default/0.0.0/bc2bcb40336ace1a0374767fc29bb0296cdaf8a6da7298436239c54d79180548)\n", "2022-02-03 02:12:06,751 [INFO] schedule: 'index' scheduler started with schedule * * * * * 0/5\n", "2022-02-03 02:12:06,757 [INFO] schedule: 'index' next run scheduled for 2022-02-03T02:12:10+00:00\n", "2022-02-03 02:12:34,937 [INFO] schedule: 'index' next run scheduled for 2022-02-03T02:12:35+00:00\n", "2022-02-03 02:12:59,967 [INFO] schedule: 'index' next run scheduled for 2022-02-03T02:13:00+00:00\n", "2022-02-03 02:13:23,349 [INFO] schedule: 'index' next run scheduled for 2022-02-03T02:13:25+00:00\n", "2022-02-03 02:13:49,621 [INFO] schedule: 'index' max iterations (4) reached\n" ] } ] }, { "cell_type": "markdown", "source": [ "Reviewing the log above, we see the `index` job ran four times. Now let's query the index and see what was loaded." ], "metadata": { "id": "SC3Yf9EgSHiK" } }, { "cell_type": "markdown", "source": [ "# Run an embeddings search\n", "\n", "Let's run a search against the newly created index." ], "metadata": { "id": "5hBY2TsZSQY0" } }, { "cell_type": "code", "source": [ "import json\n", "\n", "# Show total number of records\n", "print(f\"Total records: {app.count()}\")\n", "\n", "# Run a search\n", "print(\"Search:\")\n", "print(json.dumps(app.search(\"life on mars\", limit=1), indent=2))" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "3slP9p_KkbtD", "outputId": "9473aaea-eaa3-42e5-f9db-1f9f6e51f5c5" }, "execution_count": 9, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Total records: 10000\n", "Search:\n", "[\n", " {\n", " \"id\": \"119\",\n", " \"text\": \"Life on Mars Likely, Scientist Claims (SPACE.com) SPACE.com - DENVER, COLORADO -- Those twin robots hard at work on Mars have transmitted teasing views that reinforce the prospect that microbial life may exist on the red planet.\",\n", " \"score\": 0.7236138582229614\n", " }\n", "]\n" ] } ] }, { "cell_type": "markdown", "source": [ "The index has 10,000 records. We also see the top result for the query on `life on mars`." ], "metadata": { "id": "4i3H19ZmUSlK" } }, { "cell_type": "markdown", "source": [ "# Run a scheduled embeddings search\n", "\n", "Now let's incrementally load the dataset with a scheduled workflow and run a scheduled search after each batch is loaded." ], "metadata": { "id": "-t-D7yu_WZl-" } }, { "cell_type": "code", "source": [ "from txtai.app import Application\n", "\n", "# Run every 5 seconds up to 4 times\n", "workflow = \"\"\"\n", "writable: true\n", "embeddings:\n", " path: sentence-transformers/nli-mpnet-base-v2\n", " content: true\n", "\n", "workflow:\n", " index:\n", " schedule:\n", " cron: '* * * * * 0/5'\n", " elements:\n", " - text\n", " iterations: 4\n", " tasks:\n", " - __main__.Stream\n", " - upsert\n", " search:\n", " schedule:\n", " cron: '* * * * * 0/5'\n", " elements:\n", " - life on mars\n", " iterations: 4\n", " tasks:\n", " - action: search\n", " args: [3]\n", " task: console\n", "\"\"\"\n", "\n", "app = Application(workflow)\n", "app.wait()" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "xpuwL9aCUJOd", "outputId": "d994aaaf-7315-4109-d024-3e09773cc538" }, "execution_count": 10, "outputs": [ { "output_type": "stream", "name": "stderr", "text": [ "2022-02-03 02:13:55,789 [WARNING] _create_builder_config: Using custom data configuration default\n", "2022-02-03 02:13:55,797 [WARNING] download_and_prepare: Reusing dataset ag_news (/root/.cache/huggingface/datasets/ag_news/default/0.0.0/bc2bcb40336ace1a0374767fc29bb0296cdaf8a6da7298436239c54d79180548)\n", "2022-02-03 02:13:55,808 [INFO] schedule: 'index' scheduler started with schedule * * * * * 0/5\n", "2022-02-03 02:13:55,808 [INFO] schedule: 'search' scheduler started with schedule * * * * * 0/5\n", "2022-02-03 02:13:55,810 [INFO] schedule: 'index' next run scheduled for 2022-02-03T02:14:00+00:00\n", "2022-02-03 02:13:55,814 [INFO] schedule: 'search' next run scheduled for 2022-02-03T02:14:00+00:00\n", "2022-02-03 02:14:00,001 [INFO] schedule: 'search' next run scheduled for 2022-02-03T02:14:05+00:00\n" ] }, { "output_type": "stream", "name": "stdout", "text": [ "Inputs: [\n", " \"life on mars\"\n", "]\n", "Outputs: [\n", " null\n", "]\n" ] }, { "output_type": "stream", "name": "stderr", "text": [ "2022-02-03 02:14:24,500 [INFO] schedule: 'index' next run scheduled for 2022-02-03T02:14:25+00:00\n", "2022-02-03 02:14:24,522 [INFO] schedule: 'search' next run scheduled for 2022-02-03T02:14:25+00:00\n" ] }, { "output_type": "stream", "name": "stdout", "text": [ "Inputs: [\n", " \"life on mars\"\n", "]\n", "Outputs: [\n", " {\n", " \"id\": \"119\",\n", " \"text\": \"Life on Mars Likely, Scientist Claims (SPACE.com) SPACE.com - DENVER, COLORADO -- Those twin robots hard at work on Mars have transmitted teasing views that reinforce the prospect that microbial life may exist on the red planet.\",\n", " \"score\": 0.7236138582229614\n", " },\n", " {\n", " \"id\": \"271\",\n", " \"text\": \"Saturn's Moon Titan: Prebiotic Laboratory by Harry Bortman In this second and final part of the interview, Lunine explains how Huygens may help scientists understand the origin of life on Earth, even if it doesn't detect life on Titan. Astrobiology Magazine -- Titan is the only moon in our solar system with an atmosphere, and it is the organic chemistry that has been detected in that atmosphere that has sparked the imagination of planetary scientists like Lunine...\",\n", " \"score\": 0.4750666916370392\n", " },\n", " {\n", " \"id\": \"1132\",\n", " \"text\": \"Is Mercury the Incredible Shrinking Planet? MESSENGER Spacecraft May Find Out (SPACE.com) SPACE.com - With a new spacecraft bound for Mercury, that tiny planet nbsp;near the heart of the solar system, researchers are hoping to solve a slew of riddles about the small world.\",\n", " \"score\": 0.47124743461608887\n", " }\n", "]\n" ] }, { "output_type": "stream", "name": "stderr", "text": [ "2022-02-03 02:14:25,496 [INFO] schedule: 'search' next run scheduled for 2022-02-03T02:14:30+00:00\n" ] }, { "output_type": "stream", "name": "stdout", "text": [ "Inputs: [\n", " \"life on mars\"\n", "]\n", "Outputs: [\n", " {\n", " \"id\": \"119\",\n", " \"text\": \"Life on Mars Likely, Scientist Claims (SPACE.com) SPACE.com - DENVER, COLORADO -- Those twin robots hard at work on Mars have transmitted teasing views that reinforce the prospect that microbial life may exist on the red planet.\",\n", " \"score\": 0.7236138582229614\n", " },\n", " {\n", " \"id\": \"271\",\n", " \"text\": \"Saturn's Moon Titan: Prebiotic Laboratory by Harry Bortman In this second and final part of the interview, Lunine explains how Huygens may help scientists understand the origin of life on Earth, even if it doesn't detect life on Titan. Astrobiology Magazine -- Titan is the only moon in our solar system with an atmosphere, and it is the organic chemistry that has been detected in that atmosphere that has sparked the imagination of planetary scientists like Lunine...\",\n", " \"score\": 0.4750666916370392\n", " },\n", " {\n", " \"id\": \"1132\",\n", " \"text\": \"Is Mercury the Incredible Shrinking Planet? MESSENGER Spacecraft May Find Out (SPACE.com) SPACE.com - With a new spacecraft bound for Mercury, that tiny planet nbsp;near the heart of the solar system, researchers are hoping to solve a slew of riddles about the small world.\",\n", " \"score\": 0.47124743461608887\n", " }\n", "]\n" ] }, { "output_type": "stream", "name": "stderr", "text": [ "2022-02-03 02:14:50,112 [INFO] schedule: 'index' next run scheduled for 2022-02-03T02:14:55+00:00\n", "2022-02-03 02:14:50,138 [INFO] schedule: 'search' max iterations (4) reached\n" ] }, { "output_type": "stream", "name": "stdout", "text": [ "Inputs: [\n", " \"life on mars\"\n", "]\n", "Outputs: [\n", " {\n", " \"id\": \"119\",\n", " \"text\": \"Life on Mars Likely, Scientist Claims (SPACE.com) SPACE.com - DENVER, COLORADO -- Those twin robots hard at work on Mars have transmitted teasing views that reinforce the prospect that microbial life may exist on the red planet.\",\n", " \"score\": 0.7236138582229614\n", " },\n", " {\n", " \"id\": \"3300\",\n", " \"text\": \"Mars Hills, Crater Yield Evidence of Flowing Water LOS ANGELES (Reuters) - The hills of Mars yielded more tantalizing clues about how water shaped the Red Planet in tests by NASA #39;s robotic geologist, Spirit, while its twin, Opportunity, observed the deep crater it climbed into two months ...\",\n", " \"score\": 0.6666488647460938\n", " },\n", " {\n", " \"id\": \"4201\",\n", " \"text\": \"Martian hill shows signs of ancient water LOS ANGELES - NASA #39;s Spirit rover has found more evidence of past water on the hills of Mars, while its twin, Opportunity, has observed a field of dunes inside a crater. \",\n", " \"score\": 0.6453495621681213\n", " }\n", "]\n" ] }, { "output_type": "stream", "name": "stderr", "text": [ "2022-02-03 02:15:18,333 [INFO] schedule: 'index' next run scheduled for 2022-02-03T02:15:20+00:00\n", "2022-02-03 02:15:44,592 [INFO] schedule: 'index' max iterations (4) reached\n" ] } ] }, { "cell_type": "markdown", "source": [ "The workflow above runs up to every 5 seconds. Note that since the index job takes longer than 5 seconds, the time difference between jobs is longer.\n", "\n", "The index job loads the next batch of data and the search job runs a recurring search. \n", "\n", "See how the search results change over time as more relevant results are found." ], "metadata": { "id": "o9EX2NgxV23x" } }, { "cell_type": "markdown", "source": [ "# Wrapping up\n", "\n", "This notebook covered how to use workflow scheduling with txtai. While there are existing ways to schedule jobs (system cron, serverless, and so on), this is another easy and quick way to do it. " ], "metadata": { "id": "Fr99QHPtTMJt" } } ] } ================================================ FILE: examples/28_Push_notifications_with_workflows.ipynb ================================================ { "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "name": "28 - Push notifications with workflows", "provenance": [], "collapsed_sections": [] }, "kernelspec": { "name": "python3", "display_name": "Python 3" }, "accelerator": "GPU" }, "cells": [ { "cell_type": "markdown", "metadata": { "id": "LjmhJ4ad9kBL" }, "source": [ "# Push notifications with workflows\n", "\n", "Workflows are a simple yet powerful construct that takes a callable and returns elements. They are streaming and work on data in batches, allowing large volumes of data to be processed efficiently.\n", "\n", "This notebook will show how workflows can be used to push notifications upon certain event triggers. Using this method, an activity feed of content can be created." ] }, { "cell_type": "markdown", "metadata": { "id": "8tLWvo9v-Q0u" }, "source": [ "# Install dependencies\n", "\n", "Install `txtai` and all dependencies." ] }, { "cell_type": "code", "metadata": { "id": "Fa5BCjMFqVKE" }, "source": [ "%%capture\n", "!pip install datasets git+https://github.com/neuml/txtai#egg=txtai[pipeline,workflow]" ], "execution_count": 1, "outputs": [] }, { "cell_type": "markdown", "source": [ "# Create workflow notification action\n", "\n", "Workflows run a series of tasks to transform and process data. This section creates a callable object that can be used as a workflow action. \n", "\n", "The action below pushes events to [Slack](https://slack.com). While Slack is used here, any notification service can easily be substituted in ([Zapier](https://zapier.com/), [IFTT](https://ifttt.com/) etc).\n", "\n", "It is assumed there is a Slack workspace and application installed and ready to use. [See this comprehensive example](https://api.slack.com/tutorials/tracks/posting-messages-with-curl) for more information on setting up a new Slack app and posting messages via the API.\n", "\n", "The channel id can be found from the Slack web interface. Log into Slack and click on the channel where messages will be posted. The `channel id` is the last part of the URL.\n", "\n", "```\n", "https://app.slack.com/client//\n", "```" ], "metadata": { "id": "EJ_hHmQtRgQM" } }, { "cell_type": "code", "metadata": { "id": "3hYRk9JnsM0J" }, "source": [ "import logging\n", "import requests\n", "\n", "# Uncomment and set. The following are dummy parameters. Your parameters should not be publicly shared!\n", "# AUTH = \"xoxb-not-a-real-token-this-will-not-work\"\n", "# CHANNEL = \"C0XXXXXXXXX\"\n", "# URL = \"https://slack.com/api/chat.postMessage\"\n", "\n", "# Logging configuration\n", "logger = logging.getLogger(__name__)\n", "logger.setLevel(logging.INFO)\n", "\n", "class Slack:\n", " def __init__(self):\n", " self.alerts = set()\n", "\n", " def __call__(self, elements):\n", " for alert in elements:\n", " uid, text, _ = self.extract(alert)\n", " if uid not in self.alerts:\n", " logger.info(\"Sending alert: %s\", alert)\n", " self.alerts.add(uid)\n", "\n", " headers = {\n", " \"Content-type\": \"application/json\",\n", " \"Authorization\": f\"Bearer {AUTH}\"\n", " }\n", "\n", " response = requests.post(URL, headers=headers, json={\n", " \"channel\": CHANNEL,\n", " \"text\": f\"{text} {uid}\"\n", " }).json()\n", "\n", " if not response[\"ok\"]:\n", " logger.error(response)\n", "\n", " return elements\n", "\n", " def extract(self, alert):\n", " if isinstance(alert, dict):\n", " return (alert[\"id\"], alert[\"text\"], None)\n", "\n", " return alert\n" ], "execution_count": 6, "outputs": [] }, { "cell_type": "markdown", "source": [ "# Build a semantic notification workflow\n", "\n", "Next we'll create a notification workflow. The example below indexes the top trending Hacker News articles and pushes an alert when an article matches an embeddings query for `software development library`." ], "metadata": { "id": "_B4YFu-1R2QC" } }, { "cell_type": "code", "source": [ "from txtai.app import Application\n", "\n", "workflow = \"\"\"\n", "writable: true\n", "\n", "embeddings:\n", " path: sentence-transformers/nli-mpnet-base-v2\n", " content: true\n", "\n", "tabular:\n", " idcolumn: url\n", " textcolumns:\n", " - title\n", "\n", "__main__.Slack:\n", "\n", "workflow:\n", " index:\n", " schedule:\n", " cron: \"* * * * * 0/5\"\n", " elements:\n", " - front_page\n", " iterations: 1\n", " tasks:\n", " - batch: false\n", " extract:\n", " - hits\n", " method: get\n", " params:\n", " tags: null\n", " task: service\n", " url: https://hn.algolia.com/api/v1/search?hitsPerPage=50\n", " - action: tabular\n", " - action: upsert\n", " alert:\n", " schedule:\n", " cron: 0/1 * * * *\n", " elements:\n", " - select id, text, score from txtai where similar('software development library') and score >= 0.4 and id like 'http%'\n", " iterations: 1\n", " tasks:\n", " - action: search\n", " - action: __main__.Slack\n", "\"\"\"\n", "\n", "app = Application(workflow)\n", "app.wait()" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "DWEdyXUgIKqW", "outputId": "c00e2949-db6f-4b4e-e514-7f0c1fcb14b1" }, "execution_count": 5, "outputs": [ { "output_type": "stream", "name": "stderr", "text": [ "2022-02-10 15:12:42,838 [INFO] schedule: 'index' scheduler started with schedule * * * * * 0/5\n", "2022-02-10 15:12:42,839 [INFO] schedule: 'alert' scheduler started with schedule 0/1 * * * *\n", "2022-02-10 15:12:42,843 [INFO] schedule: 'index' next run scheduled for 2022-02-10T15:12:45+00:00\n", "2022-02-10 15:12:42,851 [INFO] schedule: 'alert' next run scheduled for 2022-02-10T15:13:00+00:00\n", "2022-02-10 15:12:45,884 [INFO] schedule: 'index' max iterations (1) reached\n", "2022-02-10 15:13:00,042 [INFO] __call__: Sending alert: {'id': 'https://datastation.multiprocess.io/blog/2022-02-08-the-world-of-postgresql-wire-compatibility.html', 'text': 'The world of PostgreSQL wire compatibility', 'score': 0.40123000741004944}\n", "2022-02-10 15:13:00,254 [INFO] schedule: 'alert' max iterations (1) reached\n" ] } ] }, { "cell_type": "markdown", "source": [ "The log above shows the indexing and alerting jobs each ran once. There was a single match and it was sent to Slack. The score threshold of 0.4 is relatively low, it can be raised if more strict matches are desired.\n", "\n", "![3.png](data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAs4AAABOCAMAAAAHHWDvAAAC/VBMVEU0CB0wFSIbGy0XHDwyGAwhHRwgHSAgHykuHCQ0HBwwIBkvISstIyAsJCklJzEyI0hNHxxNHyshKVspKy0uKilHIjUrLCoqKkpEI0IkKmc/JEtEJyU5JltFJE4oL0AqMTQiL3QsNS8nNlBcKh5TKFk2NTBFMiU7Nz0gQFEkQjNxLSRZNSNqLzRTOSopQnF3MR8hRH5uMVQkRI0hTF0eUztZP19dRDQYV3FVTCo6T24kVKCJQiNFUn8wVp1VVC2DRi5OU1hyTTQMapFbV1QgYq9AXYgVcEYgZaQTaq0dbL8lbbg2aqM6aK0nbqxpZS0Be6o9abihVytTa14Qd81Oa4tKarI0c6hSbKggeruoXShdaqg/dLeeYjUgfcd9bTE2d8Mgfc1pbnVwbWosfLOKZmGHZ3dLdqmuYyWNakxicKaEaIlucKoDmVZ+dyweh9Vsc6R+b55yc5xbe6cSkNxBh9Iwjct9eai9cC0yjtMAq15ihrB6g4GRhzB5hamGiFtyh7MenuWLgqihgH2hhWU/m9I6ndpblM24g1K1hGPLgTqvh3RRnOGci6eDk7Nzmsammi6kkaaXla6Kma8AymmqkqI/se5cqu1jq99NsuN2tEZgr9Xclkx8qdfJm2O3n6uFr6atordOvfTHooO7rS/PoXWaqs6KsNBkuu2jq8CsromNtr9Vxvl0vvdwwuuCvtnrqF2Mu+ivtL3Br7u4tK7QrrFmz/2EyfmsxpvYtre9v9zmuoGVyvSB0fLZxjeF0Pmlyet31vvhu7b3u2/Mw8mzzOOM2Oyl0efmwbzoxpaV2Pqc2O3Lzcqv1dmF4v6T3vrhz63Y0tb6z4TI1970zML40Y7o0svY2Mfy08aY7f7H4PSl6v225fv83JS07f7h49/63c/G7dvC7Prb6OnV6vTp5tvj6Oyx9v/05dz957T66Nb+7Kn868+//P/98MLK+/7Z+ejT+/318/Hd+//9+MD9+Nr9+c//+eTp/v31/Ozt//j+++zx/v/+/vT6///////s9sLcAAAAAWJLR0QAiAUdSAAAAAlwSFlzAAALEwAACxMBAJqcGAAAAAd0SU1FB+YCChEmKYU8MzQAAAAZdEVYdENvbW1lbnQAQ3JlYXRlZCB3aXRoIEdJTVBXgQ4XAAAZNklEQVR42u2dC1xUZd7HvSCjpGYOTflaKhbe0uxV39R800STu1hewhRRLNJVY1NRNN3FckxDDAvU9cLrhbL1kulKS+qrKAYq3vC6gjgLxE2UHQaYcefyfPb/f57nnDkDDOBaq+4+f4xhznlu55zv+T//5znP79SECBP2b2NNxCkQJnAWJkzgLEyYwFmYMIGzMIGzMGECZ2HCBM7ChAmchQlrCOfMsM5q2TRqd427u0ajcVfjB/znGZb5T1dTcatuqxBXQNivg3Ompwbx1QDJwLCGGWO6TRt3xLq7zHNl3+Z74MO60DXU5ljg3WHPnCbmpD6qGOWOW85MXAFhvw7OYWqNqlkzlUqtUjVTadTMOyPXTZu28WjZ0gMoD7Pj7Nr+dE2cS95bYSNZrVxjyD431RvfE4GzsIeHM9A8aMeOJXOWfLHkC5VKwx21u6bJgEWLRn3Vswn87anA2fWVvBo472sBX6pWfpRjXdhipIXUxPnajIhLt35eOeKNabH5SSNGvBF7SeAs7FfEudmO0UPdhw4apBn9RXMMmdXuSPPs2aN6Nh0we1RTiDyUOLsGmCjO1lPDW3cal1412c2VWvOfJsJv1dtu4L+r32JBB+J8alrYN4Dz4lun3v8maebZC++tKVTgbN4eoSfEuD1s2tcmxQbz9rCw2AK2YeVMA3xs+T3dX/L+YXH1hDnHWePebA7QvGR0t6GeO1Qtn2hJrcmAr3oOaDnAAz5aatztODd7u5VqhQ1wJvtaqd7o06L9maSXXLuGhc1o1fzoqeGuXaftGuI635bVsUMu984/r1yTtDgfcC689t5mwPnWysX5dpzLPv94gZ5Yk9ZYjCs32+QN5NDvLeYkuoGU/OYTJHjLNAxjjJ+HCZyF1YezptmSQYOWjPac4znoD4tmgy1C+2rUKI8BA0YNGIXuWYHz3oUtmu8FnKuHuE6ywK+Qex9g5HGzXfOj4LQh2NjUokv5KtcQC8f5wm/Owj/mnb+r5Z3Lfiz5rZ5Uv58D2C7QA9VsA7l+Axw0DcOtSZsPoWNO2rUsj5BTsSsFzsLqxVk1Z+iSOao5g4YO+kPPAWg9e/b0GNWyJ9I8YNRsR5z3VL3l2n6Qa+jtF1TrIIhu8drfauBc0qvZrr7NT0uxc9KIsLARayB2HjHt67piZ6S35LflhNz9TZ7uBt8AdiqMRRd338+htCcdBtirPskTOAtryDtPmdNsyujRQ0cv6dgUrWXTlk1GeYBnBq4HLKqBMyl5AWJk5zhbV7XwcBtp4jhfeO+bW7cOfXQWgg1kG4KNGjMbCpxz5Q00ZmbBBoQdEIvYAOeqT9KTNlsFzsIawHnOHNXQ0aMHzVF90QwnNjw8NB4tB3zlgc55wKJRTT0ccSaHOrrSYCOUBhvWGjiTmy+4YiqGc9JHZynT9eIsBRvyBnMajP6uoHuu+nzaxx+HfZQHOJNT05YZBM7CGpjZaD5njueUKd2A5h3NPXHeGWeem8xeNArGgrMXNXGY2UBQrZta8qFgtxbtc8AbdxqXo8DZvLBFP719oq6BeWeklw8FSekNecNmPjY8FQFgG1d+jzibtx8jAmdhDeCsVi3Z8cUO+Pf/nvgchT0VfKINTjt/NbuJuhbOxDi5hTxRB/wNb9N1swJn40RVDGkkztWfhE37eLONTdQh1XxD2eds5s68ko4HAeokhrHAWVj9wYbavY2qrdvgCS01zYFm9oQbflq2gWHgqCb47Ft9X2VLs3REPBUU9jC8s8a9c6v+QS5t8dm25J3hB4aETTV0Jcf9FA3OOUp6ZCiWIAn71+Osdu/sMjDIRe2OK+pwGR1FXOOhoX/DN3HChD0uwQauA1VznNniOu6d2VIk4FstTpiwxwVnBBa8sxfgrJFwpkQj0GoKtDhhwh6XYIN7Zy8abPAVz+50vo6troNPccKEPT7BhlrC2dE7u7uzKFrgLOwxwhlDZBevmjgrTZwwYY/XUFDgLOzfzjuzH4GzsMcWZ3xOom7n1b9dZ7W78M7CHnvvrG7t1inIq7OLG38i6AznqreYjsrV78p/nW58ZdaFXEJ44rlsxz2H+qi4MtzVtdO4nFo5S47ZnJRZNqN128HHCMqwXoW/1liwlH6GOlJuap/9gCfLhkceYMjix2yc+Lr+5rNH8YNUbTU5JD0+9g77o/g05Lrq/CTp5uutccn07KzWygdpPmaqdfJ+1Ds2hVyfdKTxbU+cZ/kncjm0dFJyzZZLR2lesM3ZwZHiWO/A5Xnw5eJi7+C9eHkOhPsEH8Ulat9O8Ik8Lx9fWrh35HlooZyBFQ2JliMQsNlnuf5+cFa31ngtDwpaHtTZTe1eJ89S1VczM7c8+U1m5o2sXwTn271X5NtoglfOXvzj8Fdya6EY6gTn6iEv7/rTjNcNpGpys3H7/zSjVYDh18LZPHEd4DzZAWcDw9lAsvrp68Y5BRlNTLA5xWwbKQ29UxNnXWitC2d89y8KTnD/1Unn7g/n+8/liHNqzZY3hDMeXERk+qW58wzkesD6G/sDttnIQb+9l74NSIXjDTlzaa2vdK3pZr9z9gwMmdUh6Zdip+YSY4S24NbSKFPjcda4t/bq0V/j7hIUSXmuP3Y+8SRe1F8G56znz/MECOLNdjG1MjrDOYu2AnZueoausLvSbr7NCc4PatVD1vE6FccMONPPfc5wTgRGraud4mx8t4gcZHuVOF+eWgvnsjEKnOvY3wic7z9XPYYtbwBnmmRqOfQIvn+xro4GFFPGFhkjIK15dbRNNwYueuk7u1na0ndgszVunknKwDZfHwmJjBEJtsu+RYrNjcDZXe02sP/AHv1b9+8RNNCt7tmNOnD+v+GtBwNJZSu7tX2TRgm3n/qB793XRW9O6uPadbOFZD335+EdihDnkuGtu/6O41y1spsKMp9wc2UL/RmIlX3n28pmuNGg4xSkXlM9pIWraz8D/A2xSExl713Dmu3h9WU9+wMtqbJvKL1TrKs6FNlxrh4SZQPXDxheef78vi5FLCu2afBhhk4ZtuCY1BCyyX9Lx67fH3q17Zt5tMktusLGuzO6dQo33HyhBY2vQm3SMcPdaQKc4ePvC6Fl7X+HCwjNC8dgQ46Hnp3rE5lnXubNLdo460isf+BOCzHH+kOvaV6mxXQZ0RbjrHO0A16+VGszb4X+N5ckQgbfouIv/X3WG+i+yOwMf9iWTNLmegefJ4mBtMyRcHFxA3ThujFn6R7qRn3Phvts+Pn3+L3qXYgQ8BfgbM8VtwHq0RrI8ai08HmG4ljoz7F3h+4eogDs9YOP2EgxFLDTxGqnwI0/J1fHWg4441HmMpyvLvYOhObiJxjeopikAmX4gGHpeAxVyuB3IQYcgHNFgQWbxnGmuOJvKQP32R/ipUz80MBxLrofnL0Gqge2GthfPTDSRVPn1EZtnDu+fPLa5H566HFvGD+jLqrqLXSQbWJs5omhZN8z39w61DHGlvV093GxJsD5bl//Sxcms37fOPHl9J8/e/Kw7O8YiLfbxdwd5n/p2uRX8qqHhOdf+NpknAje+W6vFZZTL4QXVPbqPu7LfF5f9ZBnFp+EM5f19B6pWT8ovPMq+Gtf69dNZFM/PeKMWfM2vZxuPsRqpC3Y/r3ckE3NVvx9X/f/zSkZFmKDQ0u/NrlDrnFi+KVT0GHc7rUOMwDOHQefxB12nGn5pKRXDN7MtCHHfbR5tyK0FmtctI2Yl8HFNUb47c0/AJ1sYlRexY8cZ/OyVHIdrlgK7Evz15KU4PRbS+eZSAr4UWucNi/TfxvJ8DtTkZbDrrBu0pH8teAScT/dkuJ3BDJus+m8g9ML1zLvq/PWGorfmX7eunWqXomzPVe8z/qCNP8EcjwweONh80dReZC1nDbi4vemlNAztrSAbPMC6N2PmXjtHGepOtZydpRL5xkQZ13ABogGok3gfS3FM7UFPAnrGqbqr4/EGMc4axv3xTwOv+7L++mUseXY9JHnpAwMCDx9tK8zztxgqli2wXI/OAd1Hti2h1cPtVeki7qROGNne+K581eeh2+VvamrhEtb2ft3rxlu995TPWQ+BgJdyrMAaRps7EOSebBxExk0TvSzOOCMsO9DR1fSax14VhthEJGbL2YT6wfzbZV9AR+5vrJP+6hePoYU80jg6T0KnLOeyzZPfPvFXONb8wnijFkr+67DQJguXr3J7gK5IdBSwDYG7wM9PbTb7daRCmj5ByMtdpzpjt7rauJshUz0DynYODhVr8Q5Aa+wFmIKiyIGLrfG7QbvlmBjwQb6NAlX/BI/z3KQXVrmsGCTDrHiYBoxI6JAOeA+DXtxcH8W9HVOcMZxIZR73BdyZaDLM767mzYCe3b0l3EJsAW/HpQDFMBZro62nB8lFIg4J2K4C9CWYlAUj+loErSrcBNfpm1jBQClUewKmSMkQFk9pQGpUgaiCMAyfO+Qq9A9hZwn94GzS5CXW2evHp37D3T2KKXu2Bl+7VN5dO/u2X0Pj4Sz+lW+mH2iSxHzVUAvTQcX3oaXXMKZQkvJkXHGF84AnateM6CbD7m36onBX1uI7J1Leu8hlYiboj7rhWFdcp14Z0C38r//OmTP7aeOUpwx6812nbtDXrzPeAvsDUGcKxHbTbxR1X3Zmu1Vr5nsONMdQ+bXxJlcefqH6iHs3Xz0Qh8fW67EeRsNYU1X/YM3SgN3GCOWjikiZeispNj5sq+EM17leabSmcHrcxT9b9n4VBnMMtqFAyo0ENWNOSfjjPVCAic4o88DRmgrE7E/h8pZOACOM3j69OkTEsgBn8jvTITXznCWq+OjW5q/dHwq4EyPEWti3jmZ2AfAxe/stRGld7YmhjDQzUu1JoIB2Vi90jvTDHRzueyddZM2FsDAMZttbpx37hHU1qtT6/6dN/RwkV4g2kicT3QoUo6ZVs03vrV+1RhLXTgb6sP5lbMXCzECZjgDxBc+7YahDA4FT7Tx7LTGwnBW1oddRGWvECl2zlXgbF3od+J106rQE4CphDON7bk1iHNl3xhb2acjPN1G1sAZdtTCGRp8hY8K6sGZVHw71y+bj5WySQo461IJZ/P+xRMmSDgXfwtfIL05LTYgmeOctiZ8wshfFWe8W/gc47fhH+p57TVxxpbzo9QFnFPiTDICJ0DMTXgShBMGDIRlpXUAzbkSzXihCgsKCixy7CxloJttiTx21sfjpIZx5jaWunETdS7q6ITBQT02BDl7zu0U55uSd6RIjYMoYNO4YevAuUnBBscZuvMiRbDxQx3BhswXjUWps6c4Vw9ZT+925mJ5fVQkC8GOdZU8s2FRzmyc6DAFAvcOUyC/hDMLgJhdkYKNH+zBhgPON5/ac7v34oLa3hl21MIZmj2Fz904wZlddN6hA1FRJvOCc4QFIkCUdXVUjkXyzqUzNxSgd8aEUIoOcT4Ykm5Remep97/jFOdZwE9ZnTgDKbSVLNiYuZsHAjzmYLEQK5C5SEWwcQdbLh0l5GfBhokGG+YFyTbp4BjN2/CUmDH6ISlQl4LmaPsMFJ3iwJkNKYMUW7OZDRKH8ZFRGjg2bqJO7ebWPyjIq78LXRda2zt7OsXZ+AGOqfgbMk64AxU3qVJwkzQUlHC++dSK/AvDFUPBLU8eJbVwxqFg6eQueXfjb1i3dCm3fvDKtR+NC13V6k6LTZRJqb5VXWPTtr8UYOLzzp92pPPOL+/PzEy7wWa0nzkNN1XbPUTGGYen+RfeOyy3IGmF3BAHnJ8Kh0ErYn2elAx7zVTZd37pDTYUZDsccG6/64AJ4iHpTQwSziQ+5OxJ6+rIS+nGiA9zCrf6nrf+eNpyFRzasgSLdRk4XRpWJIaeqTjgrzXOgjh6K+Cc4fvTyWvAUjGMC68eNhV/hEOtvRfz4oCJNAgsM3yzC03SUDAzfJtNwtkaF2NQ4gw3iKF4rQ/DWc4VjyO6gN2sleYILQ4Fc2lZF1foM3BEGHve/HUOSQu9w2rHYqWhIFaHLacD3r0FF+dqLcqhoDXROzAweL2JJykO16afPHkS5503XNoPo0hrot93sOGGxbwsBPfk1Zh3ljMo552LSEbA3oJC+yx1wziH0Xc4u7i4uLlonDxFCXOKM5vq2sxuK+j50Zm+hgpsnKiDCEHGmRzq0/bNP7+onKiz1caZlMxojfNwVWwejZx6qVN4yf+k3/p5y9N7GJO8PuPKPq6dFmMm4/ZXW6s8nljPI3BXVxrEmj+gYzQMTWScaZsWG/hEnVvbN9Plhjjg/OzbfWgbPuv+xtwtr5usWzp2WnEPcX7uj3SHEue7w3Bujw4GHXHWzfVZZ7oaHrjzXsTyxd7BR6S5LvMCrUUXeofE0aG+eS1sO6AlaeHTV5wdcwccl3dU+dYJ05fvjzJdXexN5+u2egef182dHvndglTY7xOVi2DSWTU6USfhHKVX4kyn235awHCWc8VHxnoH77TweePiWH+cbWOP607b6HzdegOdMzxPWO1YLOIsV8ee/ByP2u/vs9wgT9RhO0snnSkshFuOJbGuZvOUu/lTQRO5zjZM1afwGUzeF9AZwSM2RQb+VBBqQIjT5gb6RJ5p/FAwky5BYq/er3vNhmcmeYi2Cvvxyt576n1yt/CBn2TbI/L7eUIkPfyPcvr4j8XODnZwg610fNFDOJXx0bYHKwBa7nQXBhXGWan1JPmXrNkgmdPYSn2qDaztnR/kfybxS9iJZ76zlH7Wr/4HW3f7vm54eDhnPe/8ZqoD54dmD4xzPXbd74il4sDU8oe9BOlRN3PSq63rWp7kaFc6xtgeFs78ieB/Ns4YTPnYlxAJnIUJEzgLEyZwFiZwFiZM4CxMmMBZmDCBszCBszBh/3k4U6mxLIhrZB5JUZZSS7NmXuv9Yf1P86giuhFGJUA1raYI+Z8xVGCsjqLre9CscfNxFQN+1NRdyw9MHMXXxLlEVNhDxpmKgn85nA+G5DYgm0nRPhDOqQ96ouJ3MxFFXTjX0F3LODuIrwXOjzDOVBT8y+Ec1yCsiQ+C84ObeRlf4iXhrKyrhkJaxtlBfC1wfnRxZlJjLldmSwzX82CBCgoOov4lMdrCVxkyyfDf6DLCud6RX8qXn6l/qd6ZBxuSyJhI+mIqKL5HU+i5SNg466dYn1SueiZp4T4oPMaCd03Vyzijdo2uGTcv2I0boeDABKyQK5GRLizhDJEbGbk/PPgIlpOHami2nJPJqUvDpdWMujE/LfUOPk11JVgsfNCTsYuqKRLpcrKIjWv9oRYmvlbqqgXOj6h3pg5WkiubI9YXFK/lik7ju8ngy3zOEePMZGtcyJn8/X7nmGT4Hl/kfXGuhLO85DtOXhPDRcY2SV/MBcU0hSQSNkYELv8mLy4qD1dulY7fa7l42iYVLOOMIvbSd8YWgT/Nxo3xEyK/O60L2Gv5eSZ3tNi6wv1H7Y302WDKCIzKKV6qtegCgs/kr/UtkuTUVECPjdAFRKXTZeR2nOnJQN0P1JdKvbPfkfwDvtlcfK3QVQucH2mcuVw5g0p7+TsQUPKmC/0ywQY8laLuDEXFVDJMJTjohOVgQ1L/KnHmImNJX8wFxTSFJBI2RkSbmPDHOCuVS9ulgmWcjbOSyfHlEakk5UMqpIjHNYvxVOHD36VTylRx9kbC4VS9uw1f7qDXBUAxVJHK5NR2nFGciUqgGjhTLf1xemA02MC7muGs0FULnB95nI+P1Sf6TAcLLbpMVQSwIUV7far+YJSJ6RlRAU9VPCiQxBAYMtOkslySwspyS6pMSZDJBcUK75zMeLlMKw1MNa/2WX7aIheMiB2Ezh/41VpXJx/U4iuIKM5UtBeIKuUoA03BWkccGmlE1w3F0BCZq+dQsGfHme+ohbMuIFVWAiLOs3YrcObaEIHz44Az12eSinwU2ZaO/8uyZBjUw+CpDpwTGHU0qSPOLHdNnG1MUMz8NxcJM5zlN+Jc/NJbe08qGBGroGLfjLF/HV90PfSvsMGOM5d1VtjFw/XhHJFg43LqGjhH1MYZtdBjsonA+XHHuTzD4V1L5mUbx0DMuREiA/oWEKkfp8FGXJRF8SoSSf2rDDa4yFjSF7PZBJZCEglTXkrH2yff4JZCDTAWbJ/ZKBu/K8pknLULRdEcZxbdyNN37OUkDo10wLl0fLIkp3bEGeuuiTPcg19GWxxw1gmcHxOcUWpskHA2L4jMKdwfI5FyMBAua0YguFc62DqAoywJ5wy/VEv9Q0EuMpb0xVxQzBTRXCRMebEmwgju4ooi3eYCM7CM+uO0cCXO1tWBkCwxEByyjDNWWPjtTptiKLjToZEyzt4wAo2dWq7jcmrwxxU32FCQ7VDiTE8GUOwD90VKSJEdZxRfC5wfA5xRapwry5WpFvi03esl42gJ+3U6B3bEZsfZesDfZ3macqKOqpQVOHORsaQvlmbRUBF9i4uE2cQu7gnea8F02jxa8Pr9SpxJCg4SL49MJXacocJAu/KHqoSzHRop4xywMRyrt3I5NUnxD9zJvPMuukOJMz0ZOBgsx7y5Ms5MfC1wfvRx/tWsPhmbJBKuPwT6RUz5tKSRVs9rb4UJnGtbgyLhh4qzbky24Eng3HicGxQJP1ScE6NNgieBszBhAmdhwgTOwgTOwoQJnIUJEzgLEyZwFiZM4CxM4CxM2KNo/wBlU0k6/ty2uAAAAABJRU5ErkJggg==)" ], "metadata": { "id": "GsDzX_K8Qh0d" } }, { "cell_type": "markdown", "source": [ "# Build a notification workflow using SQL\n", "\n", "The next example is similar but it instead runs a SQL search using another column.\n", "\n", "This workflow indexes the top trending sports events as identified by [neuspo](https://neuspo.com). An alert is generated when an excitement factor of 40 or above is met (the field for excitement is called `weight`). " ], "metadata": { "id": "HqU5KNZwWMnR" } }, { "cell_type": "code", "source": [ "from txtai.app import Application\n", "\n", "workflow = \"\"\"\n", "writable: true\n", "\n", "embeddings:\n", " path: sentence-transformers/nli-mpnet-base-v2\n", " content: true\n", "\n", "tabular:\n", " idcolumn: url\n", " textcolumns:\n", " - summary\n", " content: true\n", "\n", "__main__.Slack:\n", "\n", "workflow:\n", " index:\n", " schedule:\n", " cron: \"* * * * * 0/5\"\n", " elements:\n", " - 10\n", " iterations: 1\n", " tasks:\n", " - batch: false\n", " extract:\n", " - rows\n", " method: get\n", " params:\n", " size: null\n", " task: service\n", " url: https://neuspo.com/data/articles/list?category=Top&from=0\n", " - action: tabular\n", " - action: upsert\n", " alert:\n", " schedule:\n", " cron: 0/1 * * * *\n", " elements:\n", " - select 'https://neuspo.com' || id id, text from txtai where weight >= 40\n", " iterations: 1\n", " tasks:\n", " - action: search\n", " - action: __main__.Slack\n", "\"\"\"\n", "\n", "app = Application(workflow)\n", "app.wait()" ], "metadata": { "id": "1oZag3tKWkfe", "colab": { "base_uri": "https://localhost:8080/" }, "outputId": "81f5fbea-6585-4afb-c2d9-1e5de3e17f40" }, "execution_count": 6, "outputs": [ { "output_type": "stream", "name": "stderr", "text": [ "2022-02-10 15:16:49,702 [INFO] schedule: 'index' scheduler started with schedule * * * * * 0/5\n", "2022-02-10 15:16:49,704 [INFO] schedule: 'index' next run scheduled for 2022-02-10T15:16:50+00:00\n", "2022-02-10 15:16:49,704 [INFO] schedule: 'alert' scheduler started with schedule 0/1 * * * *\n", "2022-02-10 15:16:49,714 [INFO] schedule: 'alert' next run scheduled for 2022-02-10T15:17:00+00:00\n", "2022-02-10 15:16:50,474 [INFO] schedule: 'index' max iterations (1) reached\n", "2022-02-10 15:17:00,010 [INFO] __call__: Sending alert: {'id': 'https://neuspo.com/stream/be15c852925b53639b63feb7169a2842', 'text': 'Islanders 6, Canucks 3: Five-goal first period keys Islanders win in first game post-'}\n", "2022-02-10 15:17:00,215 [INFO] schedule: 'alert' max iterations (1) reached\n" ] } ] }, { "cell_type": "markdown", "source": [ "And the notification in Slack for this job.\n", "\n", "![](data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAnIAAADJCAMAAACQYi84AAAC/VBMVEUzFCIyGAwfHSAYHT4fHigeHjIlHx4tHiY4HR0zIx8hJFhMHyIQLGgpKi4yJGAKMVouKik0KSIrLCo4JFojKW5EJUI/Jk0uLE5GKCciL14kMUJNJTpTKDAsNS8mN1JVKFpqJyNcKyhGMyczOD07NzEzNWpqKi9hMCEkQjMgP29TNicnPIMLR4Z4Mh94MycfR480SGYeUzsoS3deQTQgVm6FPiNXTCxHUFckVaF7RitMUWNYUUguV545V4EUY4hvTjeSRyJaWCwQZakxXZ0JZ7oVZa9CXH8dZaMVcEYuZaUzZZsVbMBBY58VcLIhbbg4aLZMZok+bqZAbbKDXnE1c6hdbV2fXTAgeqwWe85Wa6ZkbHOqXyVza2RJc6N4by8vecUlfb6KZmGNaEphb6SvYx9sbqmBbZlsc6QUjNpZe6dneZxafMi8by0xjdGHgDBCiNF6erQ2jswLpltWh7o5kMSAfapqhq57gqomnuSzf1WOhaavgWC7gEhhlbyPi5CDj5TMgTtGn9Weki91lq2rh5yEkq+djotUnuSujHwwqu5pm9GBlryhkaj3gkKqk6M7su7Zk0perPBQsubnkFD4jFNqr9OeqHjQm3Coo7a4n62bqcOFsra7rS/UoWV0teiDstfDpZOSr9VsuPeprKzOpnVRwvaMtMvooliftJX4nWvPqIRnwO2issEU5352wOTMq7C6tMJozvyGyPqLz67durh+0/KH0fn1u3H2uJN41/y+xs2S0+XNxMib0PLnwpDtwnyozvPnwpy1zeO/y9mn0uiV2Pjsx4zM2knhzKLk0lXqyMKK4f36zIPW1LbQ2Mq43t33zcLF36Xt0b7d1NfR2uHo1c6a7f782I+l6v6t6P3b3dr83J773rO/6/zh4+D63c786jnS6PXe5+y49eGv9v/x5du+9P/X8en86dbC+v7P9v777tXj9tL/8a3+87j+7+rT/Pz+9sD99sng/P/n/PD9+NX69/z/+eLp/v3w/f//++z/+/n5/v/+/vT///9lhc9kAAAAAWJLR0QAiAUdSAAAAAlwSFlzAAALEwAACxMBAJqcGAAAAAd0SU1FB+YCChEkFZhlLTEAAAAZdEVYdENvbW1lbnQAQ3JlYXRlZCB3aXRoIEdJTVBXgQ4XAAAgAElEQVR42u2dC1xVxbrAdbdBMekapSamJ8Ue5DHNV3nOIW0n7lBB8JBvhMQOjyNyyWcoJRlHRUKkvAKVqKlclUS9+EK05NHxAhVHUEGBfeAK8UhBtrDZj7V+95vHWnttXgICks5nAXutNTPfzPzX930za82eXjwTJt0qvVgTMGHIMWHIMWHCkGPCkGPChCHHhCHHhAlDjglDjglDjgkThhyTRwW5G0usLEWxshxgNWCAlZXVAEv0C/2/5EaHi9EXNS961gOPM3I3ACxAzApoA86siBDuLPsMQOhZiszdHWO2B35xK2TuBtMMa6cMvsDrdr8q3yI9UdSSsB54nJFbYmklNzOTyy3lcjM52DtMG2KvV68+w/r0GQYkLjEiJxuS3hi5gvdDDXxWX9kW/oyF/E8neYYck9aRA+Imf/31ypUrP/30U7ncihq8AVa9Xv/ww3c/fKUX+luCnOyPhY2QizOHD5oNvoXcCvM5Wr4xcj8t88wtKtrw5zcWbS85AD9Dchlyjz1yZl8vffvptydPtlr66RMohANXCsQBcK8Adu/2sjJFTjZXjZHjLk21sHZO08x7WoblifOL4af8PQuwg3XTiINFyF1ctCQWkFt74/JfYw94/vLT+0FS5HRHvGp4XrN3yaJ9auMBzYZFixYtQWfgwAZPdGa3j5qY1Aus937vyFkNMFv59oC3Vy4d9fbTX8v7UAHYXnm9z+vD4FcfqwGSWO69vkATIMef6St/Y6T5kPzdr8qGL1kyue8Tpy9NkQ1f9NUkmb8hq2//fGrlSjfE7l5bAsgV/fQ+Qq7oI58SI3JVWz8BsLjd27WaDXsM4gEsB0KxJS0I/AeibPeiWPip2bqEIff7R87K7NPJkz9dOmrpqMkfGuW7d98d9vrr777+LjJzEuSOrjB/4hggVztJ5q5FP3SLkZe9PuKJ02D8wLFGmPev2ATnKHKX/5Z2+a9pxModamLlylMK/lbD1/4VAL25pmZ3qJYcwAOSzwrRL273notrGwC5rwLh88XtGxhyjwBy8pVg4+QrJ789+Z+vvI7klVdeGfZun1cQca+/+6EpcvvrpsmGTJa53x4h389zm8zH3muE3J3XzL4a80S6EMvtXrRkyZ+DWo7lEGEFf6sAxP5WeCWFHiCoYSNX+9dCTOTukwdCtXWf/cKQezSs3NKlZkvfXvr2258+1wtJH/j37jCwcMDe642R42++DDEbRW5FU+QAw2EW9g0UOfCmRUUHvH4BxwqCHGujEasEuTLxAP5ViT9eDNVyu8HF7j5Z937agX0cQ+6RQG7lSvnbS5dOXir/1AwNWIcNsxrW5/XvhiEjh8YPw0yR479/TiZxrNximZMUOYQknr3DyB3xAsguv3+oVeQExypBTrebZFG3YdHHHy/xKgTk+IuLAisZco/GiHXlylFLIZaTf/r1E6PQvByamUMjVhg/fPih6YgVIcdF9DEOH9LBqlk750uQ04Hpq2zzvBwijA4feIljvbmGGjkvGKiic4Ccbu8PPEPukUDOUr7y60+/hv/+dxSaCyZPH/r0QdNy333Yy7IJcrxmo7k4SQKMTOljHSpBTrNYvoVvI3K1W5cs+nifQXMETZKAA9UKB7aSKWXdhj0UvN3kAEPuUXCslgMs5fLe4+b3sUJPH8jTLswcDB3e7YWeg1m2K++svv3L+DZbOSaPpZWzGmDZd+L83vgZq2Dl4B8MInpZ4bPtyRqMnJ/waII91mfSPHIQv/UG5IAtxB9EchhDq2FW+G/4xBqMSWc6VvSOEkYOwTVAfJMEaMOP94FBS9ZgTDp1kgS5UmLlrATkMHUIOksMHWswJp3pWImVe+st7FjpG3MD8FwJeasEfrMGY9K5jtVSQM7Uyg0YQKI6hhyTTkZugFVzyEmFNRiTzh8+MOSYPBQrR/4x5Jh0KXJ4Brj3WxN7W1kOYFaOSbdYOUsLC+vV8216W9AnDy0ix60YS1/Z5eOerWg25zPP5LVW8PdD90iWgFVtmGCOn9O2LtxyJ0PjY5emmluvUosfr4/Ab8P7Q/7taojaSVsEVUbK/BuVUhfV0ErSu681uv72Cztb0tZU4voJbXR94Olmr6BZ3U8iXsyXftTM+0uN6QUFy57GT8VNju0zdApDl37oOHKWFlZvBc+fH7zaxsJyQLPMdRpyN1/2lyzIKZgyfPO3e5f1y28/ctef90k68pyxz68/uSo5OTmpOKuDyOlW2KbUNy4ka3RNz0Yurgly6ka1m1ncpFYR7p2CnGbxzg4jZzXg6fkTJ1oN6D0/ADPXhVZOM+8dSZvUzbNF75pzRYZ2I8eteBMM0CYjEtef3N+hhhOQuztmSzM92ipyLXHSjcjdT64PSm/amCs6B7naSR1GboBl74kTJ1pPtJhoPX9ib8sB97Ny3IFXZeNiMXLg3IaHavm4mQdGyu2AngL4/B4gp9uNLjFwi1d/ZOFXu8zC2oe8/MZFvFgoNSK9BKW5IxNk4y7w3OKgjyzMwF1ibs88lYfcnXzGBdyJ3wy+oPkIPhE3rCpG9+tY9c3n95sgd33o6etDT2JcKnFiwWtDWnOZ7NkK3YEJsuGxWqFIihx+0xmdvvvSV1PMTmKd1ZssZDIMB1Rkw0gzN6jERVrjdw6MfPO3SVsMon+v2mBh/d6TRuRAW3pxHDLiuhVzJMoj5KCtrEMbEHLI+bmpja0oZkXSk1YUU9dO8jPwt0fsxDzFPVsmthm9EVEFzIeT6OWMBa6VvTbrqX9N7Z+Hi/xtEjTDWHQ5dCT8+cRp2hIoodzlt2UWODGtqEnlaV0JADdfhsTtgdcUubcmWk7sC4ZuYkBvq2aHrFLkrg/dVXRpH0LuzktBud8MOs3Hyd1SL7/srr07ZmbxT/OgOSNeTOO+GZTOLbcZ90nKineKq8IJclnPvTHqT9tFOxfxlHCzZw069PO8/mXccrOg3APP7RSR0yy2TSvduw914veooCFp6DVNKvqLz+/hbz6/R0BOr9cbEHKaxU5avm6aH5jU4qqNoyuFsKdQt3FImiFucGzRgee2GGiRgpW789oWbLnvvmbj/Ekx1TmCWjnQaxXo5WfIGhirvTxiPx9nOfyTfZDUcP35VTcuT5nbgBXd2EdEDmlLL77zAuR88/mTEuUBudpJq0quxCDkNPPcci+O2CK2opgVTY9b8Qdj6k3AS5w5GPmIsTUIOaHNBOTGDI/9eWO/dNqu6XAQkBtq4xvzGylSs5iCkjXoGPf9wFAtbYm7Y2zTaifZhBoinkkXyjapvFBXAoChvYbYFLn5lsDcW9aWbwWgF5jug9yZZ9IFx1pPHFLc6Ars5LC/AFruvgbK6Bb7cctHF4LLn6sVvf+QQz9LArBNzwrv1aGcoPW55XO0PGoSATlisFAn/jjwGLJqlVLfI3dpPHx4Uw3I8XGActagTOxV7rx00uhOrj9zq3YSKj5idAUtUnSsAnJj5jSIOhuRw2s6Rv+KYOYj3myIQ8uJICn0MahwZlD6dWRsjY4VaaujF9/bBMEIuGiJ8tBQt1/aaaCOFW4UbvEcrdCKQlZietSKkqpnPZOnW/zJH/I08/x5jBxtMwE5VBEBBxE5uMd4WqSIXBzodfe1/ULj44SoYW6/tF8ou0FS+RqhrhSAB0HOqvf8tyxs3ppoM7HJo/3mkKudZ+b8g1aI5cCaEOSge1DbIuSuP2kDMsqfBDXfPzcupJiMHbATRLVqbOXw2aEncQKorojcGXIFt1xm7oRXVQz3TRET6K9MMUaG158MzcnJyTUg5O68sJPbZN8QJ0daWO6//YJM5gSRNlg5wRHTgBOKbIIc+i3obEQOVSTu2f+bZo2y/Isa1xiS1k1DAEMfYUVF5LC2tcLF0J1107YYJMoDcrqNfWbs04qx3CbAmLRihZCVkP4eLtyYGm7oO0N/mbT/9vOnCXK0zaTIYcsqRQ7dfbRIEbnrAy9w3z+TLzQ+Ra5SWrbapPK0rhQAXFvctob2WzkL6/mWb1mbT7Q8aNObfgFOa8MH3aVlFv5aHMu9/+dR5lLk1Lg3wYtIo/6qrVOHpJNgNhN191NlTWI53ZH3R40yawU5p+8Hojyr9k6lK3n0BtOhiiSWg1vyzbsv7efPUCOqAxZLABAbGwiGRORokc0jR3Vuihy10I2R22mKHNa2Tri4bp7TdaSoUXnsDq5sfXVsJUKufOufRz3dFDkhvdCKQmpuhdP3f2mIcDoD17cLOVqkiJxuk40NRNxC40uRE3VvBrmdFABcW9K2HXjg1dtq/baJ822C57cwemgyYoXmge45Myi2RGLlKiJQH0NvEuclGbrdHYP7Au4QHptoYcQ6jYxYcxo22aZopVYuC8Fp4liFkQdtXOy7sQdtBjlo6f8aXcbTxIIzI55JcKw1tMgWkCM6myAHdb+HfYsEOeJYoUOzkLVC3WHUlhMuBsKWku9qocoLI1ZIB8jdHBFULLVyQlZCerEVhdRnXlwKoeiLS4GcdiKHD4jI3fwDuZ9pS0iRM+reuPI0J1D2ARwrYsyi98T582G8it9Zuo+VKwgsrpo3pwG6B/TjfhxhRO7mk6u0l6dAc0KIXnLlfTLQ1Gw9ZPhx6GlSOYjlDvTZz3+zsJLO0uF5ucHpEDrovhloRO7OC/44JxxKHwlFx+umvdNwZJ+2YMoWktXgzTeOvDpXKxk+GJGrm2YFYKHERZfp6lgoS25pOeMCHwHDh4vPb9HRIptFTtA5rt/548S3DIktPTB0P9jIVblXNoSKyIEfDyq5PGWOunYSDDg+Mg4fQFs1vdgAeSP7ZFQeDx8+S+G+GV0GyKFOLJhiRE7MiqYnyElS335pcDoUbrmfbx9ytEhu+R9/jjHgu09maTkuVmgJKXK8qLuk8kJdKQB3x7j8lNIx5Jbg56q9e/e2wE+8mrNyS0yQW2Yhdy5E3VMwxeaNtRv9ReT4i6/K7f4b7h0dDKOt16pJa9BJCTx+2DBSPi5Wy20S5qWqlo3ETx9+nGDzp69eMyKHcpqBcqraYCGfkYaP3xwRitaU0ScOkBUqAtpmZ1PkeBzeC8URE7FxbknRT/PeVKMJnOFBWqHIZpETdC6YgmctoNXtlpE5g4tTQd9CI3L4gE8xmh+SjftqmnGS5CaMQenFyLDjKQZReUAO63YBxXKajwa84XvAiJwxK5KeICdJrVsMrobDY692IUeL5H981RqPu87MLi66uvGpMtoSJsiJujeuPNSVAsB985x1aMcmSW7Qr5ND62xaeMba8a/RbGEW8W81fDfLbYLim+qOJL7/5G6rAsGHge9xopm3hYx/u7byzSHH31hE3sbEax2asXJLOps4fvfJbm/gumkuxRydaepu5LIG5fU84nAMp78yb672ISDX/fIw1hQWLHta9LLdi5xuxRxtD0ROjE0eC+SYPIbCkGPCkGPCkGPChCHHhCHHhAlDjglDjgmTjiOn8TpI/uCOV8KPfS3OI3JhJ/jEbV0xAyrmilVoiyBdwtc3ns7UR02/hX4nKEG2GV/VOe6q9DgFH6sWwvHp13i+dLvSMRg9bUz2UXocbeD56ihXxwD0DDcbJV1QY8xcTHN1rdIxkjTO1bknDKg0VwVOg84Eg+IShbgw/0remIY77uJ0i55Szcmk7Y6KUipOwN+lbjto0gSnTIlagoj65viISui8UHHiGXQkty1Nl6PtOchpPoCGLZ9T0dKF5X+v0K3J7ALijLliFdoioEtT5KoDlRg5Liw4JSWl2Ei0e3pJMupKldN5OKHlq7wC0pJ8vNV8otOx4iS3SAMX5n4+N8QpD671I0nFzIU0qrnBKUkumIxyzx3QbVy4+wWcBs4U565b3yBVKMEdPQ8T02Q7ndIaGiHH50K+KcfnwIVXXZQUuatzzyGFqVpCbqK+qrlHS3Jc8Aku3BGKE88guSRdWViwXV0bXti43Qq23zvyQ89BrnwO9LdqVovIJWwzqPy74tm9MVesQlsEdGmCnG6dfxJGTvOf50yOB0J/coGQIsO7gTIIlcyec023BvV0xoIKjEHVQmiIaNr3YuZCmmj0OwHsHxDth7q4HNFRtfAEOYM0Nyp0Ze4pkzSJC4ztJiJHrOF6LZ8x99g6Umy5ZyT6LnBBLeM9Q/TlE3CGmLBE93goTjyDqh1daAKXuup4ceOFqXC0IFD98JE7GuPieFSbMVNJxD4z0S/JTRmQDwbeTeFxis/Axl7zn5l89EEwRKdClHA5zyW7IXeF+xL90IXMBPeCTqPciFPZQ2tHHQw9BNesVa7/NQquww0NuYLnUQTkIxUUJxL9kt281dXbXZC30h8H75XfOAnSBXoYzgWrSe5IoX2V2Ri5KgHc6hBXyIMiBz8SqLPVF+Ouv0buNkikL0FfwQ3I6QJP8BQ5mrmQpgilSUS9nbCgjGSC0nyAHLwWsXfOmEbnedAgSdOQgH0zdsT5UO75EKVHuoS/qxewihjABtEIoLrQmgn6UobRD9XczARATjyDcWowhQtbvibIafYWPnzknI6VHJ+VRzTPRlYuURlQeGOdt7p87jFtzgU+A3uJbG911d9vGS9PdD9vSJ6bJyDHR/sVVsdUktMO56BNIotvrMNNiP/O2VcpHNJ4uedXr3M9qk1EXoVHuUIR+qR8okKio8euCzqv4OLSwPXaRI+0ZpJko64PV0Cu4LZUoGWpJ0aFIKdycMWhkG5NQGFRTD5oms9fhTpwYY6ujquFhcnRC2rAN+bzpeuoOcmArDWerjQuo5lL0+hzUCkqh9Wu84+Se6k6CkK2RPCa1WH2mcY04Y6rTNMkQhFcuF+h/jhYVAePtKIopzzRyAmGGC5TrHYNSNeKamm8IouP22eK+kJDnjKUep5DhjCSTxCMajSxoZdiwbz+4+OYYl639+NPvt2urvus8NI/Pv74B9Oj7V2R31WOVQMOQoLcLGKuVXPyJC7gIJ8IDSRcTm7F8B0CcrT1NF47UAuuhxtSLTqRaBJsCIdwUlQGKpXHuSYsqOSF+zURNTLqJPDyt9B6CLjUNAnShSfWBdwW/p3o1yAiV52SkpvkEGnIoCE7F6ZQ2qOgOyetODfEvcwYNVWHKJWudKCh8wKnpk9NgyjKr0HMXJJG44lMGBfmdKGEBnWJSqd0qHyUUunh4l0jplE5BBeSyJCmwbXBbaH54JzRiRudLEGuytMvrSTK/pxRLQNq0EhJlAf2UhFQyHPR4NsF5MgZbNAKwgq5S/u0l0KKuUsYOQ5FbqZHIeTrGch9cFCKHOpviFV0gYrgC1ohYC/TrTlnvLxqoYcriIgcn+HisauSOoVo73voMGVKh/+mvwRaURnILfE413JPx12FAnKo9GhHlLs7ZiZbQE5IAroI4VbG9P8LxJfi+IogR6K9BTUk6AFIPNIAEjoqrsIq8aULiVPXa6OxDYdIcL3gl7KFuCyDZkfT6ItC/NTkb3xT8np98lzEjL4+EUAR0lQkeNeIITJOgzXPViA1HQlyGhyuUUcqIIdjMnLrimoJBpDoi8YVub7eNRmIZ4qcUBMe6EIDiLrPfkWLYwuMyJkeNR1l9CjkVOjmydmsDNYKExmqBTVS5Mi9JcZyfHW8j1MeNX7tQA7nyuuT1jqckCCXQKN2/fEgV9fGyJFJFdzDibP+L3CH2IQS5OBialxVGItEOi4iCpZ6HqW3UobDCUqcWhrg08yNafDyM5Vg+VFBJPrCVStfiDQS0uDfCBUhDUFu1i1j+AbOwCAZSRCuyCCD3ChErZzNq1zxYJboi10IyjBM6ejoqFQ6npPW5MgP3JFPvv3226hft6ZJkWt0tAcjlzHnFu06YSIjOtJgvJwMq/BdSZHD5/Bp3ZptJNKlbRouONYGiWMV+IkWJgOgp4zIUaeoCwwo1Da2cnRSBfcs5EnJMkWOC/duSCSdTJSg9SB3UikeGZIxIjY0EAoYx3EoHc1cLaQhpGbMukXuIrChOEPSClwgzlxQCENT9cEJMQ0uvJz6P6xO+exzeOjbIEVOhVqcWDmiVrbTMTU+RfXVeBLkMvUlINHrYQBjrAlyrCicQ5svN7JyJkd7TCyHkJt7NLUw2/5UqjrR/mhxjs96rSq2shpsPxo+gC2q+vs16eUZME7I2Z4OwXme/rhyG3c8XXt1LuDkV1gSNStTHD7o1kUaVLPh7+15xuGDhB+Sa84xdem6SKIC7cuAwpLjW+5BQdVRjZAjdpEPdzpVkoxMI2RbFAXeRV8PnrDewEUHp+VGOZ2D6AwNH07r1kCIlOyyQ6tadaw4yQf8XKlbQFpKSgqa+Q7DYZou0B0dKOQzAi7kJqGJLzFzIU0CmitzARyi3dNhfHQQqAhIg4gN40xCfCFN+UIUy4GyYho8fIh2P1+SHFoGkV5xUYjoR5DUc2t21BtACz+keKaoFkp2FcJGUd9op9MlaFxHQgcg3FgT9IVf2oIwGCXkGiBw0+3FyPFH9nH1pke5vT0HOQiDPdJ1gcqA/ET3Q3iGojrEBQWraJIknAweJJfzyW54ykMXpXSMjNpGZsy1Gq/gtWQKgM7JV3nCXXqVzKbTQyb8kFzpLApWgdgjKBs9D0h2cw34ZY4pcuFkIjE8kszWkGwhEowmUzx86Xb0KAG5oxAlqQYawULux31A40roUDLvj7J0wr2eSKaH1huqY1wVHse0xsyFNPgRBn2SAJechktK184kTyxULsTSNFFITIOrhCZJIG/VnK/c0GSJ0ciVzyaF40kdnCNVSxfiGHA0aodRX/1xNzKgpshJasLztWGVaGz6SYpWs/fj+J8DMXIF//gkpdHR8MqHjVxzD6AWNJ4ProLBQ+C59kwst/XJQ5tybawLe7rZ3EPAtj1XAGP3u0Cu/c8ymHS71Ma0ASYusfNmghlyTLpb2MtLTBhyTBhyTJgw5Jgw5JgwYcgxYcgxYdJx5PQ3buhZ4zHpRuRugLDGY8KQY/L4IHf73//+t/D3z9999ytrWCZdjBwQ98//ofIdCGtYJt2A3K9UGHJMugO5f/7zf8UP//Pdz6xhmXQxcvpDJawtmXTriLUF5LL6jl9mgfbw5A6Q7Sk2mb+prpsm86dbWETIfTaMxHt7oi00hJ3OL019Wj7jAt5yfPh2NR8hd15mPv6XDTgjJgy5+yAnk4+yAMLi+potmiCb2yAgd/tl+RsTBp+OMJdZjzKXb+HP9JW/MdKc7Cp382WZ9Z/7uNdOgl8Wcn8tXDRugrm52Xt9ZT1xqw4mPQy5J05zm8zH/jZJHlpU8HK/PAG5rL790vl6PsK8f75msQzOy9y1wBjayIxbYT62ki/VxvWBS+Is+uVFmKO9yeRbOJSYdRlD7j7I9csErsb+C2/MK3siXUDuzmsy+bhYA9BUA+ef/dcI+X4esalG+8/I0CaC5NPNEWb70UWA3E6enGfCkGsLci+YBSUnJydpBeQMBcssZGZ7KHKjCXIrBOTwro0YsOsMOYZch5D7bZpsrpa7koLwqgEH6s9XG6rmyfyRz9TMk70pOlbd3u33VpiPruSuFEsdK0OOIdcEufpWkas501dmPdK8f1kWDBMmmMv8rz857r2RZifR8GGkudlJcfiQ1dds/80RMG4wnwOY4uGDgSHHkGuKXOrB04ZWkUOTIGjjXd3ukXLnZTL/gmUj5cNjYTA6fHKf4bEGcZLk7hTbfGEPUHGShCHHkGuC3LZt2053JB8cyzFhyHUIuUg9Q45J9yFXsm1bKmtMJt05fGDChCHHhCHHhAlDjglDjglDjiHHhCHHhCHHhAlDjsmDSH1STHxuD0dOF2gvfEl5tmQfXRNJVJxoLcsEpWTjDl6Pvsc+4PR9vzo5wb7Jl6Pj78RPNxhVw985P/1WgmM7v0ddrEnOWldFZKNKcccvtKpX48JUDnS3HuNeOi2V6i1s0pTQQotpPljQlseEXJh7XmufW5QS5/Hjx9vFP+rIXXXwrpTmp1TMX0X3JmgfcrpAxeog5axMk89BQaE14Yodhg4hp3Jx3JXW+CvBNZ6t5cY1KewhIKfxMm2cxp9btHHO4+1Axqc+2siVe5rcgdFKP+jj0n2V7UdO5bC+gU9Uij0uqladou6YlWtWcdXsVgFuUthDQA6U0Lb6uQVJwsTZjfft+ciRfWhQR4FvQ7vV6AL94l0UAZVoUxelx2ZoQO64m9LjmBZOJLn43cP7zJNNvQKd0iU9WL5wVp7gYN3Q/jd8gtNXbgqPPOjp9VrMAs7paANCrhRw1aF9bQpJuxaiftvBJxLfJiIHHZiIzKZmzYIK5HwD0oXikt3Ibi85dIOZZLJtDkUuXEk9c9XCyChlJN5BpzDRBVLg0wlO533w1jw0T3IVpkXIDyUJUhqRy5jpp0a76QQXqhzQZonR9pl0Dx+KHIdbBiN3dbsr2iwo22EXXHHMgPc93jUbkCMZ8Nn2p9Y5ZYrVxzWMhhpzgbNuBU6/ZuwC1MjTr1UtDA6ByjW01rkxFDlnQ09HDm3kG+NfBh0FkUzQIRf3CnCPjrvWKrZpyz3tdx1ygQZMUASnhdifw/HV+kRF8KHtZJNGtDkR3jCo8Z2eMXd1rI99JkR6iuDNSu8aATk44HEoxL8CkNNBk3NhUMAqwZnqb6xzyuOjiW+TIlc+2xtIctjGBdrvivd1olhnOwSkhUDpV2c77tqsXFBR5RkQuxa6jiJXHaYITknxQsgplY5HSUnVUYqAlDQSgyLN3Mt0NE9yFaaF5odc+yE3I3KlLgvKyj0dj8W7eP8aCHXTeHr/StpOqDtpGaSxLtAD8ljfkK2EPFxmXUP1jl2rXFBDM1BnQ8M5XROrXw7to/GCm6Fq4fp7GDnaBSJyCo9Yn9ajahG5Hm/lymeT7U2pO0IwQB3hrl9Qk4h6HxqwaqF3DYQU0BiKSC2+GYlUAZIpIUpx98BE5Xqt1NXswDc8yk5ATuPpVEZKORVmj/YhdMqTumV7tNN3qto4fMC4n+DCpuchm6JSQh+oHA7q9gUF7dEmQLerHHZwaBNoADVEZ5wAAA7hSURBVBXHj6Bpg1gTUjYgZ3/KIJSULbjuBGxXFCeEPPFVpDCaH5iyBl4lIvcLuh+wFUy0z0QRQIbiIG07ATnaMtSx6gJnVWSj3OEzrrcGmlTIIFvpni+pvm7NrArV3PkLarIVBzmMHO0CETnogRZjHzp6wMTZ2cb0eOTQDYUsPdQHbYiqRLcoeBGN54KacNSEEBKpHLCP8r6HI5oMMGwnG2jUpMVENY1ncrZDTttwzAZ9KCIHEZuW2Bgwlw3Y1qwWXXNRaohx4EGHD8RNZSh2gE1pIDu+KXagvde8GzKA/wQH6E28bzZY0NLNrjOVyIA0Rg7dE7QkI3KkbjuEPOlVCmN++IYTYznswunWbfbnqqB1wmbdom0n1J22DCq4OmaVKzh1XBx4TdVsaBkUywkZED2M1Yf7J2FB0vRr0dOvUeRwF1QYkVOjcLdVnxkPA9bxtr71PT+WQxMbs/KgjZGbS4uWIIfvd4TcbL/ikpKSYhpE52x2ISY/GxMSLuwID+5BiOUSFQFpycpWkFMcI44zebvSnmzzXKJFwb2wa6mJYwUL6/0zlJWoiERqaIXJA8AgskFE5KqLx4WfF7aEHC2pKXI0z2aRkw4fznuCSQq3P0U0iLY/vxCqQtpOvN1IyyC/AB63OFCCnIOfFiMnZED1EKuvcjgauL5q4dE13uoOI8en+jo7x3QRcZ09YoVGgjYuR42YIEEOOx/cC2jL0HqtOG5DNzke/oGlqvKadYsvIaOJcKVfPgrIy8CnkNCNIleO7nLRsRrQ8fLZZPcw2qnRqOkzwDPrU7VNkIOzQVAKdnV8vUEYLR9F9BFHCKqC50WOtSXkSEkSxwqagRkX8hSRE/PLVtIhD43lQDmI1tBEXz3Ky1Wqn2jhq3CznchWRCJUROTAR1bQWIVkIOpBq6/xmj/7BBc4HwbUHUcOWlb0GCX0r/rckp4Xy3nuSlmHrRyQdT7JxYhchWq2Y2w8jqeUEA27RBLkEgLS4omx4sIVq2PdoHUT6ZSrbp1S4bpqpuJgmP2pXF+JY9WtgSDdBR1Q+sWGOJWh4wmKHVxgcFoUcaYqF8XqzTPhcDPDBwQ36n8uDMoLEYxqNkTksWnFEO577IqBcB+UyA1pybHqaEkqB4+gdOLaUYDvXSPkKSIn5qfxAp0lwwcYvJzQecEIYS3cNuBS4a6jbScgR1smAYUiAcVREscKpUA6SCJkgE/ojNWHQBbtWI22mH0Q5Hi8JTuf6wujCDQnXB9i29/Wt6SHIYdn/U8bkE1KdoPx3GwjcvhAksMJnkvG29ES5HLEiQG00y2eC8kQAttq/PQhXVvqo/Q472VEDs3EeJxfiCZJcGnoOGCYiba5pUN/NN+A5l4SHU80tXL0UzUuj/reKi8UFoG3o5MaOrQpb1gLyOlpSWjjYTzaTlCsdkPTGEKeRuTESRK0mfF5T+MkSbmLexneKRmFXwA4fWJC1mUi5GjLIFMZ5eoY/K0ROb46ROlxLBAopRngE3pJ9bFNVbnAFQ+GHCZuvC2K6uJ5g3N/JHb1PQe5zhKu/Zv7PniZYdAh+mj7DpacYH/uwUqfdavHPmv1tbVDIwk7Q3x/IjGPHnIZWxoeAnJOu1LifTq6newDIocm0noqcSUYODBzqb79h2DknB895PQP4/vjqkNcxSmKbkfuAZN3LXLjjcj1f1SRY9KTpN6ZMlcSg63ckP4hDDkmXSrxttjIbQZ7h42cbQlDjkkXM2cH0IVAtJNrhwasnfLaJkOOSavhXGoStWyp8an3CbRLcktAcnNLDLlU6hlyTLpSYma6YVlbMpNKCkOOyQOI4X6n4918KHJuGDg3QM7AkGPShYFfM8gxK8ekC41gd1u5DqyCarvoYwpZnzLkTJHrwCqodojK4Rzr09+JY/UhyLl1uWPtyCqotkuiPUPu92Xl8B9dauXuswoKvZukVNpn0uVKeJmRR1mUqyJYDS7ZB6+n4tGbPX5J5EWg0u1kBRZe+KSOIosXmPwuJklm+uTSOZJ3utLK3W8VlCIyxUcR/CtdrlS1UBEc7+LofihEEclnOPgdilfStynRciSlX0O5l9OuQy5OmXTRWA5cl8KiuZ4uJalEcutTU1JTUlJTU0q60rG2ugpKF7igAq1GIn/PqsDvCYZDCtXs9Wi9JXpprAKfm36N162Zfi0BjUQylOuFhU/4ZbbkoKCgfNaxPUhyU02Yivchsrb1aKrzkWtuFVT09FN69CoiXa6EL0IYlc/21ngqiEvmhaXs4faZeNUAXPUrXfiEkYtGC5pYP/cc4Hzt7OxC6o2x3PY/EnnHIEg3Itd4FVS1r1LpdNQgLFeSInfPyykPLVUyfntCmBG5GrrwKZqx1vPcqLOds/MMCXMicjMfhpXjG62Cyp5zCq2hEpYrmSCH8dLTxVjoRX2vWbfwirAMsn4a5cyQ63kSYuc8A/7ZpTaxcg8DuaaroBSrY2PT1MJyJSlyDarZjrsOueFlp7pABfpqhm1aMnyYlSksfEpUBITmsV7uUeILyNkBcjE9Arkmq6DIKn3vGrpcyQQ5vA5qdT6xcva7XBzRxElpiKtjQLq48Em3Dn/RDBOGXBtF47W+gS+979dcteGbsJj0nFm4Ro6V71HIVXl6HEqJd1nfwJB7hIYPvsjG2YU8tBFr63J1ravSI/K+3yjIkOvB0mQjyqIQGLPGSLDq5nk5Jo8fc3xR82+a8/XxVEoYckw6LFfOHj6b3cZrc/+DSjxDjklH5eyXSM62Gzn2IjqTjrlU1ZdfHP7iiy++VPH69iHHrByTjhq5L8DIfQFmrp3IMSvH5EGRY46VSTchdxiQO9x+5B6qY730A+u5320sVw28QSjX/liu66wct+Iv+Osws57J468/uR/9WTvJ3cCtGKsWr9iCf9Oz+IItzWceMbote7dUbZhgbu2cdr/LuOVOBgbNA4sKWbkvrrSJuO6xctwKub+2W5ErmDJ887d7l/XLZ8h1j527dPZSdduI47vlO0m4FTYD93QncnXzbNFiCK7IwJDrHuaMPx9QOg05x41D8tuKXO0yC2ufSoScZsNIuV0hf/elr6aYD98DbFycIJ+xDJArQJcU81lP/Wtq/7IDr8rGxZqCk9Vrp5DxkQmycRd4bnHQRxZmq9T8GVCBP/NUHnjekfIZFzBy3wy+oPkIPhE3XDDV3Dq0QbgOlS13+W2ZBZQPmSyzGP/L7pHWkJGY8eqPLN4bim6ouNEVjy9zevRfj0LOqW7eX9RtQ45b8U5xVThGLmJ8yk9T5jTcHTM89ueN/dL568+vKrn88tiau6+tKqma567NGmrjG3N76K6iS/tMkYt4SnhlM2vQoZ/n9S/jlpsF5R54bqeInGaxbVrp3n0Iue8HnebjhqTp9v5AFFtVciXGiNwY27TaSTahhohn0rnlg4/pNtosVP/4/H5jxjbjPklZPKeBr5vmzwxmT0LOcPPlAC1GboTMEsRc1hJymsVztdSxou8Gjnu24u5rcPL2CzvJ9eBYsT3Jeqos67ktBoCo6euZm54tE/6EgOH6wNPc8jlaXrPY3SCgdH3oSepYfxx4DDIdK2y0efulnYgcETlUdgSUd/ul/dgkngFANfPcDWLGowvJ1VmD2GuiPQs5/szAk8TKhaLTP41p0bF+/9y4kGIxljvzbBnu9jsvbKmbBj2NkNskt7GxGfViHu7l2nlmzj+Qrw3fZC7rl25q5ZDcHHoS06Jb7CQid4ZcwS2XmTtB4psvD/clK3l1G/vM2KdtjFwlRh5lgspE7Eoz5u+O2cJterOBAdOzkNNtGvJfbRs+VG2dOiQdIVe198+jnpYi50+Ro71LDIvu0jILPCDmS3NycrWmsZzuyPujRpm1gpzT9wORvavaO9VsD0lyZeurYyvvi5w0Y4B99L9e2M946WHI8bXThrV1xHp3zE5Arm7azBSDxMrpFoN3hN6tieufJ0Gumci9bhoZseY0bLJN0UqtXNagzEaOlYt4ES/1R2fFADBduK5F5KQZg95D33uMBw89FjnwXv0aIWeblJycXNwoltt6yPDj0NOA3J0XdvJVG43I8WcGH9NffHVsTe2kmamlRzzVGLmCwOKqeXMaOTVwlGhebnD6Yiet7puBRuTuvOCvvTylHxk+HAlFx+umvdNwZJ+2YApWofazFO6b0WXCdS0hp5NmzMMRGRs8iIPXnoMcDpBMkJMhgSGAFDndgQmy4bFaQI77ZqTNjP/+gxE5zUcjrd0O4EmSp+UzLhArV7DMQu7c5CtJqpaNxE8ffpxg86evXjMix198VQ55okkSC/mMNHz85ojQS1Mt0MwHgmfDSPm4C7xwXUvImWSMqjb4MRw86Huulbu/CMj9XuWxHDwAcfrqJoeqqymL9SUGhlzXyd3HcvBQevbw4bMqE+LQ09bD6PF+fYyvs288Q67L5DF88qDnSw9jMb4/ouevfI5Fxdf72jk7G1dO90DkmPwOkTtLkDtsDOn0X36OFkF8flgfj4gD5nIZckw6TaopcaKZA7f6OWLu88+/LF07gyAXz5Bj0mlGToKcIBg5IO5LlS9FLoYhx6TzoBMcq3HUWk0d65f6EIpcKkOOSeeZuSsYuC/OSsM7hNznn1/ic53x8MHXwJBj0glioIsWshFyZyWPG/T6s2jAihYXoi9uneHb/h1aGXJMWrNzepWq0Vwwr8rOLsV/1KcmNT9crS+iwjbHZNJGKYkJ2UxiNNXZs9n6Rhjy93sQVuRMZEYqQ45JmyTXzhZkM3jRw8iLHjZdZqPXi47WwJBj0hlSj3Y3Hz/eNp4/Sx41HG7n432GHJP2STwizna8rbP+y8+Fx1t6hhyTrpMQbOTG29qVUuI+z25fBgw5Ju2TGIqcczWzcky6Z7w6HjP3HzFo3hcR90U739RkyDFpdzCHxLeerz7cESPHF82gwpBj0tZZkhD68iV6ynq2ur3vo9fTXTKT2NdTM2mzkBk3ilrnroBgyDG5D3T6Tl5zw5Bj0s3CkGPy+0BOf+OGnjUek25EjgkThhwThhwTJgw5Jgw5Jgw5JkwYckwYckyYMOSYMOSYMGHIMWHIMWHIMWHCkGPCkGPChCHH5Hcl/w8BjRR6N/l6dAAAAABJRU5ErkJggg==)" ], "metadata": { "id": "e1rB3l8pTRtN" } }, { "cell_type": "markdown", "source": [ "# Search-Summarize-Notify\n", "\n", "The next section builds on the Hacker News example. Instead of just sending a notification on a match, this workflow will summarize the match first.\n", "\n", "There are a number of alternative combinations. For example, the summaries could be built at index time. But this example will do everything on the fly when searching." ], "metadata": { "id": "Nsr7vVKFkA11" } }, { "cell_type": "code", "source": [ "from txtai.app import Application\n", "\n", "workflow = \"\"\"\n", "writable: true\n", "\n", "embeddings:\n", " path: sentence-transformers/nli-mpnet-base-v2\n", " content: true\n", "\n", "summary:\n", " path: sshleifer/distilbart-cnn-12-6\n", "\n", "tabular:\n", " idcolumn: url\n", " textcolumns:\n", " - title\n", "\n", "textractor:\n", " join: true\n", " minlength: 100\n", " paragraphs: true\n", "\n", "__main__.Slack:\n", "\n", "workflow:\n", " index:\n", " schedule:\n", " cron: \"* * * * * 0/5\"\n", " elements:\n", " - front_page\n", " iterations: 1\n", " tasks:\n", " - batch: false\n", " extract:\n", " - hits\n", " method: get\n", " params:\n", " tags: null\n", " task: service\n", " url: https://hn.algolia.com/api/v1/search?hitsPerPage=50\n", " - action: tabular\n", " - action: upsert\n", " alert:\n", " schedule:\n", " cron: 0/1 * * * *\n", " elements:\n", " - select id url, id title from txtai where similar('software development library') and score >= 0.4 and id like 'http%'\n", " iterations: 1\n", " tasks:\n", " - action: search\n", " - action: tabular\n", " - action: textractor\n", " - action: summary\n", " - action: __main__.Slack\n", " unpack: false\n", "\"\"\"\n", "\n", "app = Application(workflow)\n", "app.wait()" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "SxljaDiPkvv3", "outputId": "770ef0cf-f9e3-4e6e-ae18-084422731efb" }, "execution_count": 7, "outputs": [ { "output_type": "stream", "name": "stderr", "text": [ "2022-02-10 17:19:48,847 [INFO] schedule: 'index' scheduler started with schedule * * * * * 0/5\n", "2022-02-10 17:19:48,857 [INFO] schedule: 'index' next run scheduled for 2022-02-10T17:19:50+00:00\n", "2022-02-10 17:19:48,848 [INFO] schedule: 'alert' scheduler started with schedule 0/1 * * * *\n", "2022-02-10 17:19:48,864 [INFO] schedule: 'alert' next run scheduled for 2022-02-10T17:20:00+00:00\n", "2022-02-10 17:19:50,368 [INFO] schedule: 'index' max iterations (1) reached\n", "2022-02-10 17:20:02,233 [INFO] __call__: Sending alert: ('https://datastation.multiprocess.io/blog/2022-02-08-the-world-of-postgresql-wire-compatibility.html', 'Every server-client database has a wire protocol. A wire protocol is the format for interactions between a database server and its clients. It does NOT encompass the actual query language itself, let alone database semantics. Proprietary databases like Oracle and IBM Db2 find value in developing their own drivers.', None)\n", "2022-02-10 17:20:02,496 [INFO] schedule: 'alert' max iterations (1) reached\n" ] } ] }, { "cell_type": "markdown", "source": [ "And the result in Slack. See how the text is now the article summary vs. the title.\n", "\n", "![](data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAmYAAACvCAMAAABgiuLtAAAC/VBMVEUzGAwfHSE1FiQhHRwaHT8fHzMhICosHiUtIyE7HyEjJjE0JCAjKFoxJFoqKi0uKikkLD09JEArLCo7JE9RIiElKmY/JUlPIy1JJDgsLE4jLHJLJENFKSUrMTZQJEo4MDdXJjxRJ1RCMCYmN1InOzAzNGtHLm0qO047ODRtKiVeLyEoOH9rKzBhMC9BPSg9PUFVOCh2MCQpQnJrMFUhRH4lQop5MiAzR2UeUzt0PSiDOyIbVW5ASmlkRC9FS1NgRjdVTCpXTEV8RiQnV6KORSc4V46RRSEVZIpaWCxDWnx0UTohYq1AXYgVcEYgZaWYTCCIUTg3ZJkUb7EdbMAlbbigVilAabY5bLFTa16jWSMeeawzc6hSarBpaW8aes1ua2mdYTRUcJB3bi81d8N7Zo+JZmKNZ0knfr8pfchhb6lLdquvYx+uYymGaXuGbltYeKRQfoBtc6QthtR2dKCEeywVkdtSgri8cC4Bp1x6fKxOico2k9GBhmBkiLVIjtVXjLiUiS+0fUxrjo8en+Wvf1yHhqusgGtDmtGqhILKgTukinBnls19k7JUnOKIkp+djqdtm8mxkWKoj6DXkEZBsetdrPF2tEbImGh3qdZnrum9n2W1qC+sobi3n6vgmlDHn4MA33LWn2KCsM6ir4ODtLqUrdF4t89RwvZuuvjEqYduvefVp3W3rqGpsL51vfOXuabrplvMq67Pr37AsbyNvubpr2yDyPngu4tx1PzZxjeF0Pr2uWywzJrWvrvhu7i8xcybzfDpv4Sxy+GV1Oak0Ojcyn+t0PP5xH2V2fvKzcrgyqjUzNGS3vqH4v6u2dvszJ3zysHc1LD60ITU18f50Y/G2uDr08aX7f+n6/7G4/b93ZS+6fz73az537bh4+H63c+27/7a5+jp5dj95KL15MXW6vS09v/g6+X559ne8rzq8rS9+v3I+v7/86/S+/3/8svY+/D/9r799sff+///9tn7+9bp/vbp/v3/+uX0/ezx/f7//Oz6/v/+/vX///+Yv8OOAAAAAWJLR0QAiAUdSAAAAAlwSFlzAAALEwAACxMBAJqcGAAAAAd0SU1FB+YCChEiHy7qY6kAACAASURBVHja7Z0JXFXluv/dGqhpoKippadBy7nUm+ZwKo9oViSlolzngVI0Oh5NsyKno2aUpDlcJZVOljldPZhDXvg7+08c6nAQDUEBRbh0VBQ3tLd7WJ/7PO+03rX3AjcCndT36ZObvYZ3/K53Wu9vP9U0Zcqq3KqpIlCmMFOmMFOmTGGmTGGmTGGmTJnCTJnCTJkyhZkyhZkyhZkyZQozZXcAZsdGNw8QFhQQGBQYGBQUFBiAH/j/6GO3HYUry9xcqvTvNcyOAUyAVRAQBmwFUaOsBdQMRNweuW3OskozVfr3GmajA4L8/fz8/QP8/f38gwJoa4a8Va9es2XNmsjfaH5LUVe/bfDhnmGJ8GiQbrze9IDmXNnRP9alMFPmjRlQ1mv16mnT5syZs9rfP4g1bIFB1bu9917/91pWx791zCx1kz0xO/fmpy7taG1LrLarjv/zu1VrpswUM7/V414KfKlXr6Bxq+/DIVlAIFIGkLUE1PpXDzJgZnkm0wOzhBrwpXj+2Ez3jBohDq9O89L4yaezLs7/0/MjF13Ygv+eVpjdg5gFBfpNA8rmjHvqpcDV/jWZAWAtu9XsFgQfNYMCZcws4VaCmfv7V2rVe/lI8Zg6FmL3HR6KH/9Z575kreRFC+k8EbPvR765DjCbmXX87XVbppw+/uclMmbOL6cUapr9y9EjF1nxO/sLP9aTAwdHjhz92SHXubcPkMtXzrSqmrsjMQvym/NUrznjWk97qtd/vSfsb/37t+zWrX+3/ticSWOz/6wNwy/ATNtV2/J8xxp1D6/saGkxevSfa9+X+H13S4uR3/SwRLpSazfKYa3ZxfmrVs68gJhdenMVYJY1f+YFHbPij/6KmG1Z4rDPX4UH2F/48eUqJPXgTId27u3Mc29ORr7+MVphdqdi5j/tpWnj/Kf1eqnXf3Vj1rJl/5qEsm793zNitnlGjfs2A2YlPSwRjmt9LJE3o7AHzWh4XyI0ctBpxtdodyXGEulgmB1/46fjbx9BzDbR1uxNuTW7dOjcO4DZmTwA7FP3yiUu+teNtzNhzIdnCGbFH+0+984moK7447+/rzC7Y1uzceP8xr007qWX5jSrjlYT/usfBC1Zy24tu3lgtrH4NUvdXpaI3Ib+GzV3TI3g6x6YFXXy+6YHdJwMs5UjR48evYSMzdabjc0oTNCuffyDduks++vcG1dg8vpGJsXM/Y83Ms+98wugd3DJOYXZHYvZtGn+L40b12ua/2o/tm7Wsma3v7XExozOAWTMtHPdYQgWkdsWMZvhjRmg17xOiI1h9k8YmGV9985paM3AsNP0mGkyzOwf7WSTCvyLYZZDx2YjPzsLl1kPzrwGnafC7I6daU6b1nrcuKem+c9ZfV9rXDfDlTOcacIc4L33jDNNwEw72AwwK+mjd5phMmZablsLWV0jmAFhhLVbYFb8MaeM/FVi7DTJZdbij0avcinM7ljMAvzn/M/q1avnrP6f1rg+S98C1KyJy2Z/e696gBdm7via+hQgGVqvei9nSpg5Z9ToUujzuhmB6dLHB5Ay6DTpX27jFIBipp1fYNUUZndqpxkQGOB/f7Vnh9UMwrcA9E0T4QyG//2r4zuoACNmmn1uDVzQeA4XNDTt/Os1W62SMLMPtcRqPmJW8vHokaPXl3wEA7iRM39ducRO/7JJCxo6ZuRTYXbHtmZBgc2r9RxWjbzT5K0Z/AcTgepB5E1nucLlqxmaenWuTMYsgGEWiDs0cFsGQY++Q8d3AkHlCRYas1kKI2WenSbu9wlgmDG4aGtGX6EDdwGqsJRVeEEDQILWrB9gFsQxI6QhaAEENFVYyiraabLWrB/pNNmOs0CyrkF3a8CnKixlFe80Aa5q/bxbs8BAOkpTmCmrBMxwCOaNmWyqsJRVzhRAYabsN2vN6H8KM2WVjhmuvwJmPas1DwhUrZmyKmvNAmrVab5wWPNqddgbAIWZssrHLKBWUL+Fw4YBaHUCAk0547e4Z5D92A/lV0mCMh5OtI8JLvQ6XrzGpn+xD9VlCPKJ1AeTbxH8ra/wsLQm23y91L49s/STCQ9d1r+UvChexUlW1P0DV2VEBUVoEhwpp/h2V7yvP/dKTR8i1qP07XpTzIICa/Vr1SoosNqwaMJZGa2Ze8YzPx07dmy/o8owiwr2fjWe2q7QHDP5xL8Xs6v/cdJHzI5WD4ytEGZlRmWOGSknM8yKXxuQ5ypHlD5eb4YZLs327NmqZ61WrYb1LGW2qWPWpbDqmldWRt7V1KUUzOQTVYBZeZL+mI+YuWcEzw22VllU5kVIyskMs6JOK8oVpY/Xl4JZv54BPWv3hH+iqwWZTjU9MUvAht8eFeE4P75Ovcl5WuqT/++5Rl80SsfNZkOwqbuGxzO14vmP3/9yJj9tcg/rP75/rlarmVYoI/eMEJsmLvj6OcuzB9wxtSwWDBruXvl4vbG9I1wQrP+rmezEpfGP15tkBYi+eaXWswc0jX03poBi9nfDFXY8dYSc+scrtVoswXS7Y4CBktfCHNqNTnEZTRK1+JAvHg+2Qfroec0dNXs+jY8kH5MKwe+qY7H4b8Rg6qEkBtP38gHy8ccDRsyudog7+iBFgRRiDFS+c0ZYSY9YGpWVZR4tPvS7jpBPFhULjUXFIkYFtuXZVS7nlo6WFqscHLMbPWJdInusnOLbff0chqbpMeS2hRFQJE92UftvXvfbFt/382Ytdh4kEbu3vAAVwKM0v74cmA1r3jOgVb9WAf2icTOQD5gVtYmDSB/dXdT93bxLYyIcqU2eHrvolzbQG+Q2prL0vnmX1mbaB/c9a5/bpZCdNrmHPtdpjWdfOL6YYyYuqN350KkxEOEy3mjFN133v981i9DiOx/6Z2/gEU/YBw84/X3DWI1e3SiHffdIAcGMhcfvSKh75OL2H0h22s++sJJWftID6VpGw0b5WuoD6QSz5i0+W59Wf5XjHx3iSAm8df/s0981m+Wieeo+4PSZMc9kakcbnGTZ6B1us0d1PnLxy/XkY2WDRANm8Y1ySvpEkvb4avuNWkkPv0QszRuIGUZ1g2WeXGsZcPp472Arieo6D41EdYNHjEWyaYkNPrK+axbr0jGTskcLMN4fQuse4SiSYihqs0JPdlGnR8Z+djbe71PHrkdezTzfO9KV+uC6U2Pa5dAoS7m+PJj1q9O8X6vmPckujVtPAR7Kh6bfqsV3KUzAhjj1yfzU2rEueGogN/GkSu1DwzEbadhNFbXfTU+b3cM7Eitr8REzcUF9KLKjD5wUmN3oAfVDOk24MR7qjp6AoKNCHOTq3PZx7PtNYwoIZjw8dscywwCgqGssJW6jFt+z+zZtWbCVYHZfsuaMgtZNiw+2EcyI3AEeHUw+aZDwmcOKoNk42iA5g47pMprsxpIIc0iYlbwIQS17KId2/1CRT4yb5Tr6QA7BDHU6PPMEM7wu6YGTJCoRGqlzHvGNHmTTFf1Y1u6KhJmUvWWi01ymlz/HRiS7qGsImylcxQaD3Z/2cKIRM4/rfcesTqth9/drVatn8w2tqvEfBCp1CnD6xIkTDi21wcmSPrFazP1PP/106ydy6LAnrUliyYuUnV3Nnl10Vkvyx9OPbKSnze7JbWOxhJX0merSMfvVcEFqg2RRYLmAgBibJcF0V5RkTLCNXM1KHb8bUyDGZhgeuyK3bYuxh/QZYB9yqzMqwj44Liai5DVoGhAzKPWSF0mKyLPgfivMhd1gPoZGuljoYyNdWBE0G9BGJdGxAP0AIiXMUpHAjMZxtGXrUrgsIqnL9ZgQG8EMonLzzJPzCATgRRIuQiNE84hp58H6EGiJpU5Typ6OWXy7QjkGxEYku6hTLIuUNFqscKH/MWLmcX05FjSqBcyb13NYq+WlNWbeUwCoFOhVsLKk0XXJ4Ii0Bun0ykufPFf3ZBJb+WCnTe5xArR5npjJFxgwa8gwK/7yhdaPc8wuffKn1nUYZtAkudh3YwoM4fEr7JteYWOL43/+U+talNCEdr88ln603b/+kKzpmOn7NE0xizBgtqJ0zNwxloCAgMAarPX+w099tl39j5/g2eOYidIRmKU9XAZmEWVgJmXPiJkUg4zNCm/MnJv+3Lq1XymYrSgnZsBZnWo9hw2DeSbZ/xPoy0wz/pm50GQmsDE8m8QlNBqn/4hGUdcVrPfgp03vIcNqY6cpXyBjRvJVPDiiZPCAsy7emuV2mJknWrOMxtv4d2MK5PCkK6A7RICOPrguj7OU++iHwbai9h9C0Bwz3qkLzPAACY31XbHGTjOt1E6zqNMHuEf9q7rJtAf9y5P5zqi/wDxOYJYgraqxA+k0a2adZuyNrqV1mi49ewbMrsgxGDtBT8yux3Q+5PBqzTyu9xWz0eQ3zKpVq1anWlApq7OjvdfNcjsEbMPsDDh9cdNEK0OmqBN7fOxfHnIcbJJIBsGb3mB9qvk9tCuBQeXbmQwzwwWIWXzdwztIhzWj8xHnymYROBgqnguY4Yn/D0VwvjdgVnvS6X/CAD+Vfr9pTIGMGbvC9v16x/nesaz3ch9sSDGzDw6IhZgCproEZjjoPX3mo09dBLOm6y5uaRJHQyuCkfjFMe0yYbywau/ZNMxG7xCrfShE+vmndArwIJkCFHUndyfUZeVE41oWANPyhECoNoEZzzxJlt+SC8fbwlwIoxKhkahExMtg7L9pipVMAToYpgB69kg52Thmcgx0SM+S7Y0ZjEmdX9XfTaMs5XpfMTtGXp3Tn2os5Z3mMe+3AM4osgwDk+P7YerOkCGTADQ+v8ZJ+LOrHOy06T3kvu+fs7RaIi9oiAsQs2uv3/8qWZO4Nr5WiyUxEa6vHn/65a9hEIsnrs995PmxXwBmT37dES8rpt9vGlMgY8ausPHlBwj39UeenzmX9YzxdaHfT4LC1TEjU/ixmbQ1e3U8WdygySdJhRPFc2vCzB+1XrhUUDy/Dq4lkLh3uhCzc20w9OLB4TZaTnQNK7U6Tr7bwimBGc88TcmHj9ebbGVR8dBoVDxi+8qOeBTXNVotkRc0ND17tAA5ZnIMBBuebO9O8+ALTz//TffdLErz633FTDs2ku5gJHv/TVoz338UtGTwXS82oWOz38ZMXw/dqe80K9FS+QRAYaYwqzLM2BsAhZnCrEpbM2XKFGbKFGbKFGbKlCnMlCnMlCnMlClTmCm7JzEzSBnOr/dlofJo/d1lnfZagvQtVHO7fdVThe0qedunEWWWeRZ80m1RS6ib7iFgWlamBoPtQ3FH4bZc55cv1PL/43qHlsT9glR+5jGHPr7SrDBmyyJuAQQR1JQXs1uG6iNm5VM9VRpmuKfJPAs+6bZ0zIwCpltg1hBf91PMisf4jd363+Nr97UWHDv237377q8KHRrmsAoxk80941ZAJPggg/LE7Nah+ohZ+VRPlYVZGVnwSbelY2YUMN2qNXsK+UXM3Mua7sRDac1wF0PJa1XyaozksAoxI/tV+n6OuqKSPjUsli5WoqzZ6aIqF6o4YsqeIyioAat7UuwLYlolsrGm1RIbxYzpbagEiYb6haT54UIoD70TzS5V3lDEKqR6YjIglht31Oy5dWaTrYJpDU7KB8kOFBYKKwiy76feXxuuoBUQwgpGz6Ivui2WIh0zoSbC2/1fHg+YGcRMO11SIWb84e9tP3AQzIo6Uf8y7phG+QbMPIuPqa+kqulY79PzpJigMuHiJQ4PCZiIj+bwl06zx9dpgb92ruu+KhMzppChTUf8E0fcW3An5SNjP8tkiiOm7KGPIO685Lobri260ePdC8fpkISE1jvYyiVIJFRZ88OFOF56J9JKUeUNCahCqicmA2K5cb/V/I+fHcIQ3DHBNvkgKoV4qKIgMLtza3LMbCQLUhZ90G2xFMmtmdixmlYbbu8OhSbETDQ9UgwZf0jGsQlillqdbRdOQoGNjplntLyORNU0HpCX1viPB+xRXQqLujZdd2pl/d1GCZgUH8lhUdcW607NrZusSbqvysSMKWRoXJ3icLviLKZyoYojvjOaYya2EHNt0dX2cS6500x64CSXIJFQZc0PF+J46Z2YpfGNfBVSPdGreW7cb0FtahmP7iacGw4So6HygsjAjfis0xSY6Vn0RbfFlGBmmLlndLFSRRITM/H0SIUImLljnshEzJKYCFQjvOmYeUbL6kivGigM+9AQBwoKSHdY8iIpS10CJsXHqj6WZFvWfVU2ZvAPiSujYXPUyczSO+qkh/KZ5EJgJgQRfP+qM6bmy+sdOmYZTXZzCRJrI4Xmh+uc8r30Tsxwr7pWYdUTlQHx3NDdPhCaI6ldjvEgN2zlWEGgtNMLMz2LPum2aIrMMCt5EaePuDGfi5lYem7qhQiYacWDg22lt2Zuz2hZHRmqhkgHIF5SmfB02FhGaeFIlaZjVtRmhaz7qirMchvTeSTFjCmObomZph3/pCPFgKl3ErkEiZIiaX64EMdL7wTGlTdahVVPVAbEc8OISmp0GZpVj4OaCFVg1sgEMz2Lvum2SIp8wSyCp0cqRMRMy+0QTcZmEaZjM89ob43ZWyE2DwmYiM8Ds7K3TFcGZryJoO0sUxylldppSoikPnhSYAb9Ae9oaR1Jmh8uxPHSO2FRMuWNVmHVk0ZkQCUsN4yoojZ/fTRRZFFgxkPhBUH6oavtPTHjWfRZt9V1hRlmzigUi+mdZixPj1SIBDNtV/2nYKYZ05T0mmkNPzDMND2jTTPpNA2YlfSYWuQlAWPxGTCTdV9VgBmMVU5t1RKarrtw/M2dLE6qOGLKHi2+bs4Jm5gCdIBxJsvjtaVn3V/RoTtX73AJEgnVZdD8UCGOl96J5J0qb7QKq56YDIjlhhHljgnCcZHxIJE1k1B5QZT0gfG7NAUgWbihZ9EH3davNEVM/0QwE2qiXU13ur5vCwNzLmZi6ZEKkWLmjLGIdbNPmvXFutcx84yW1ZFLrhqBWdfOhy7C4N4oAZPiIznkmMm6ryrATEtrW+9dK06IYbZO21muOGLKnmuv+/fNRMy47kZoi1Ckw9Q7fT8n6h0hQSKhSpofIcTx1DuBceUNWsVUT0wGxHLDiUptjEMBj4M8FIGZdu4Vy7Pf9BGYkSzc1LPog26rkKaI6p8oZkJNhCs1k7+gCxpk/YWlRypEipl2jfw4h528BVhEx4WGBQ1DtKyO5KrRW7Oxz1me3akZJWBSfCSHv3DMJN2XeqepzPd3PT4uvKpX58oUZsoUZsqUVaopzJQpzJQpzJQpU5gpU5gpU5gpU6YwU3YvYnb8B+9jpe15L9UMvmbKbUQF450O94wQB77n5LsH8G1sack2nCvNfiPVFMRxrkOs+WvojIYWC38JW2ZxlFU9BpMUDAY9lq8/eYWlnNZkW5m/xlVxzOxDV/wuMDNJB8EspvMRlwlKnpeXG7MqU00VoZypDMyWHDv1ec1Yl4+YmVZPqZjJeqzfF2YlPf79mJWSDiwAZ5RecjJKnpeXG7MqU03lluWPSctovBEz5vOP65lWT2mYGfRY5cFMu8VvC94OZty5ERHr5LatYbFEoj6EiESYDoZjRjawzI3QluGerRj4x+jASffvhJixcIVYCL5bLJbYo7j/GTdBs/MQBET50GUu7qGMZJB0aFvoMaqz0SVGEkr0FL0cb2ZKLHJOckRUy1OvUx7VlFEjRerCmLCo2eNrdf4JQnzXyqVZPNJdTMC7gquhJLdSHDP0SaB7d4LyFF6kyNFf8Kfa5XzepFEwsRY2dWl02xDXY9F063osUg9cb8a+G6VgeqoQs4wmiQSzXU0PmGqcbkcOzJwbMbEOeRg4ZlwHwzFDqdGWZjpmHg6chH8n8nvmLFzhV6jNp45zbSflcczE+WcynXPrHnExcQ/jx0XSkfHwqv89vt7FdDa0NQsztGZcgiOeYaaCIue4IyIzvU45VFMeGinavBsS9lbTnc6o5hPz0hrHcWmWiJRsmb1KtwpiAUt+l0RrFtPuCvPuRJVO3IsU9flUAsVhyCeLgom12uUw1xUa12PxdNMdsXI99A62su8eUjA9VRJmuxALM43TbXaaRV1j+b5cA2Zc8CN+M3wqkRoJzIwOnHT/TrzTRKdJQiz0WDqEhcom1prxeKERSnsyn4t7XDJmVO3EdTa/emPm5hIcvaugKig8Z/Cp4KHXKY9qymHUSBHnFsaE4ZOPTuiKB1MPYWkPJ/JIBWa8gI0qYMTMuaXZB8y7E1M6CS9S5KhpPiEKqsvKbRhHk0b2rLMKpOkWgwJRD0lUPgbfjS64EvVU6ZgdrL9TM9c43SZmJX1mcRWFB2YaUTYwzLjUSGBmdOAk+XdimEG4YmduUSdozTps02TM8HwCtGYQFBcbGTC7NsZv7A8OrrOxeWMmJDjywBdVUHjO4IjIQ69TLtWUUSOFR40JI5jh9mceIDrl4xtROWa8gI1upXCmafGDvpZ6d2JKp5vMvc9l5r7CJJ8QBR0+OaOCbUnUXZOsx8J0C8xEPRC/KvS7UQqmp0pgZqkBhJlrnG4HM+rcyBwzpoPhmDX0xMzowEn374SY0XD1DeBJj7TGrcMCM+ZU6Vqf5o/A4EAX90iYac7j42t/wHU2bjPMmFZDFD9TQUmYrTDT65RPNWXQSBEzJkzGjEmzSsVM8rvEZpoXHGyELtw2mWAm55NFwUbpRxscZvMiocdi6aaYyfWACljuzMooBROpkluz3Zq5xuk2MGPOjbhYh+SjiLHGBT/cD3InaKugFGidYBNkdOCk+3eCEmLhCsxKelDpKVYHYsbPJzxjpczMcnljRsr/OutuTDATEhx+OVdBeXWaHnqd8qqmJI2UPnUTCZMxY9IsL8zc+m+6SJmgUwBNk7w7FbWJ5V6kbBwzQz65+othVtLnL4/SoTPXY/F0E8wM9QARSM6sjFIwliods5vLnsg01zjdBmbcuRET6xR1nXTqEAwM8y6N8d/IdTAMM/eMputOza8T4Tpaf6f7YEeYAhgdOAn/TuRBpOHq4rwoS0AAKlnafOA437tROo83ra0loPnLB7i4h2NG0nFuQV7xmBAb09kIzNLeSBZTACbBIZdruhKLnJMdEXnodcqjmrIZNVIYtUfCJMxuMmmWHmmTbWfykA5awDk0MPuYcKs3ZlzpJLxIccxchnyyKPiaQzzzfqdxPRZPN1EriXpgejP23cMFl+4NSpoCFA8OsZpqnG4DM+7ciIl13F81a7XKRYQ9PTZyHQyfaaLUaN1g6BioMsfq4cBJ+HdCzFi4ArO0h49kXfwCxgYHO/q//PVj6ez8zbnhF7KOjwnmeiqOGUnHpfF17mdz8HpjMwVmR2uvEAsaTIJDk60rsfhih3BE5KnXKYdqymbUSJHldWPCJMy4NEtEWjy3VotE0giRAmaBlQwONsGMK52EFymBmSGfLAqOGU4CqDE9Fk83USvd4PXA9Gas3N1GKZjuDUpe0MjtEGuqcfoN3mnKa5qeM4my/Dstw/60qL2nI+2rpMGPL5dLeudHJ+/ud4bl87vCJgB316vzMjAr079TUtNvHQVzvX7Tq6TPpDz38dJ/fMbM0j51KMz0ComKcN1LmJXt38m55QVThen58Y/f6t3xPWflwqzcbwLvCMyUKVOYKVOYKVOYKVOmMFOmMFOmMFOmTGGm7N7ATHgEMjVfVv/4/gj8ScKEGhbL/a2WWDX3DP4Dv2kNI9nmawuem5kn/xDSedlLw/ev1KqbXkrgviUr/gnvNxFlvLgw0wyYumFCQ/WT18qpWYRlm9jaVv5b72DMdI9AlYNZ3cPHTn1eO9blnmEJI+jaoywcswHHjh37pOMzmTpm57q/un9Td/ZKM63x7DyX9m/HLKqUN6yoflKY3R5mRo9AlYAZblt0Qk25Z7SkfhNS6/fimJHPc91DbAKzeBSusb3D1KFCxTAzs3JiVqqh+ql8b7XLxuxe6jRlj0BEoZTD9DzQndWC7gzrU9L0eAmWJO9GOmbutxCz4aRZKB787lADZlp83ZNFnWbPr4ObaQugB+W/ZB5Tg4iAYs0Cv8HdLTHpEiZLCKy4DyiNiIR0JZOmK5jYNXSXcnyXQqbo8VBAEb9GxD8ODwNug1Th7hKqfsp/awBVMPFIIULd9RNzl0R+cT4J1SkJ7a5HQU7JDWRUQJ01UX9YCQ9dJrsGcSs/ExARKZNti17cdwtmsg8NolC6znQxKLO5+PEPWJ+SpsdTsCR7NxKYuQ822QZjs7DcR7dhU5UfZcQsrXEcCn8urmQOE8U+NdxeZfAhJQUu3C1RKZKMGXdexDHjoiISJFUwudg1N4fiBvc+s7iix6iAon6NCGYsDHtUKNzW96xohtxv+c288F3tOOExCTHjrp+4u6SS1yJhzBAQ63IOjbjJbyA5Z86aiD+ss3ArecCSGuVwARGRMuVS3dRdhZnsEYgolLguhnk3yECVjq7pIajJgiXJuxHFzBIQUMtvtgMxuwkMXOsz66YHZrkNY6k7hNdoZ3a0/k7NiJlJ4MzdknAiJWHGnRdxzLioSNMVTMLBEeYqtYHwJ+W1mRv9Gs1gnTqEcaPrRn03HMUMWjZDgIgZc/3EPT8ULsMG68Ng69X22/gNdM8uc9ZE/WHBrUC8i+25JQIiImWS3Ffdna2ZkFvHBP8aIzx5yJoeNFmwZFDTsClA1sXj3cOh0wxz5XaIS3gix+nZmjWMk30IQePmMsHMI3Dmbokm9aF8CbPr3HmRjFkRvUsomPg1KE+LCbFxRY+XAgrS9CvHrAhFd+HW4jE02QwzGJvhHl4eIMGMO0sRfmwaJB/t8q/H0pPYSbbPnnvRKaRD0wSqQch9NFEIiEjOqW7qbhub6R6BKGZUFyNhZtD0eAiWZO9G+tgMByaImbasRUfoOqJMxmbUhxBEndYhjhepF2Zy4NTdEpcuGTCTZISemHEFE78G+t1/tY/TuKLHSwEFaZIx03I7tH56slUzwYwFaIpZSY8VMVPtg+NihjhugRkwBt+FIHmEJgAADbZJREFUgIju1Sa6KcddhZnsEYhgxnUxeqcpq448BUuydyMJs4S6JwlmRT2gAfLATMw0SbhpHfS9sJ6YGQKn7pa4FIliRgVW3HmRCWZcwSQcHEGP+SEkiSt6vDvNHlPdEmbuGREOwxRRUCMcT0mYCXdJ7ph+7WH82q97nCZjJpw16ZjZo/r1iRWeE/Vfuoj3+Wc17qh1M+IRiLllobqYok4DTl/6aDfWp6Tp8RQsyd6NWKf57YlTm9oGk04TGr9DmoQZXTfrnKlxH0JabtsB+3fs2JpnipkhcOpuiUuRMFlcYMWdF5lgxhVMLnFNyYtBOFxjih6DAoqlScZM21U7IKA5Wz4mrpg4NSxAm4yZ8GSlHQ2ELGQ0w+mQhJnurElgpiUF4nPJBEQ050w39VXnzLvpLYDuEYhgJnQx515BYRBZ0NA1PZ6CJdm7kXgL4I8zfYoZec0Q5f0WoP1fX0EfQvah4pdMTDAzBE7dLXHpEl1nYQIr7gPKGzOhYNKvIaNsrugxKqCoXyMZs2uvr8rKOt6WtubEFZOghgVowIy7S8JlIhcQHWwzYsadNUmYwWzAoQnRFck5000t+92u3qp3mhUwM08gR7HanVFTXap4FGZVh1lu4yV5Bd9V7q83KswUZp4GnRnvj5UpzJQpzJQpzJQpU5gpU5gpU5gpU6YwU6YwU6asUjGzT9lA/3AfKoR/1pf6e3buNRu0vfPIfoXsQT+Lwye83+8WHJDfzeCvPS+e7vvP5BUM+ZHeZZ+y/Pbe8biXTi30SLnI5b5Bl0vLnHvHpNAJibgde83w0IWYrYJFoQPxD3GGZXlm6ITNjjIODw+dkMyTToo1e0j5NiqunU5DlQuaBbdjUljZrzrdP9jKF4mBhveXO6oWM/uIn7GOS/WCUxBx2fn+j565dy/e4HXlvnkSHms34EXR5cPMPvHnCmE2q9Aj5bfCDC7RtodtPr01fA+kNvLA6UWjciAB0Uf2T4YnhJ+hdiZ84dn94RtcpR3eF3bywo6wHzWpWCsPs5SwPY6ySyU7ovD2MXNWOWaXhmB5DCoVs+3zXNmslZBy73zHG7O1EmbOBRvKmSDEjKSl0gxSfivM+CXOxfNcKSEnCePavlFXoF4H/SzOsAdrng3DyS/lsHMBPB1ufpgVa6Vhtm/UrSBKGVUBzKqw09y8ZsDAzY6UAaHUQn7cN33/5NDoHE3bOxlaf3w6SXP6I2mZoCOZsA5yXzAzFO4qmAh3zNNOwIVwER7bdnMBHJpaWLAIeh5rwWQaKBQ6HCA90NLl0Cctp60b/B06IX/H8IHwfTsyDP8AZjwtGzSeFPvEw4tC95B4sF9iYRUsGjBwoVVz4kchTa2LFqBNREdTznNJMMNOEZPrXjMJIkHu8BIty0FggTRg4tbCv3msptkZCk7BCGy/LsG/5ocZZqwlJlnZkz3k8EwW5XBSsuKOFCxc5JllLXvIT5PD8pEAXtDsYpqd7ZhgekzUEs8OLYa1eEU6z9rSDWtCl2s7aMmQ6rGRgL+FNpZjlhKWDwWPj9TaeTffX+6iSWBlW3mYhSVCG59On5wULPR9odFns96fbi0Yssdx4oCWEoGDgZSpVvuIfM05Bc4txid8eR72DMUTAb3s8MQLa0ZBn7rcmrXT5l46D8s5+uyJARvoeTzifB9vhK5oadjyvBPhtHFZGpasrR2+0HoGrpIwo2nBW3lS7FMGLlyXuTcsMe/EpA0u5ztwdH26E9JwcfE8x9rpmVnrC2lqOWYiOpJyyGUk5BI6OMDMvTTy8IWtkPZ9kTnajrCTDnoJrc2JG0jy9WZvbWQhP8M6pZAfpU7A5PBeCPZMBN/QS7KSHTrhCKSm0L00OtO1I/IKb/9c2trQ5S6IUeNZyw4fvnCnFQjgBc07M5Yd0sJSzFjRiOzQYqDtHcuaa+nwCeuO7I047DoRns6qhwS8YJCOmX3iHuh0QvFZ3ONEzDAJ11nZVmqnaYfCkjDDf6CzyB6kDzZxDLZvHrYG6bToXPRWghF+yR7yI/xN9Du0nlykWRaYQXdC41kKueNtwNLpLlI/tBExw4wlxT4FGij7O9hEwHO3LyyfdiBXSDdPS0NKLWAmoqOjR97BOSBA4JHUsQM79+wh+dIA0w1zFZY2htkZNvZyi1lMCql6mhbTw+7FoaFhy60GzJBB+CsbpzdYrRSUyEL7xHXw+E7cI7KWHbqBFpwoaNbXs+xImLGi4dm5yaAgmPGsLSVDzA2k47DT6hEBc8www9kRi4Av6PERM0wCK9v8ysVsigGzUfBPQfge5+LQhQccfMQESdijURhY7rF7KOZPczF0AWtDo3daOWY44plu45iRTojghGjxSwhmEBheYI4ZSwpJZTHpl6BkabemrR04DCwi/8yACTszNSm1gBmPzoUp540Pwe9yCim87ZGF5JHHWMklWOBrp+ZohtasYOJmlzijpWBHLrdmZofdiyccubB/0jwHOcwww7EZMJYSiikeuMc+EccR2UN+zo64PiQdCl1kjY7igABe0ORSvfSw+undvGhEdmgxSK0ZhIAFXDBiAkQ6fDmrHlGDEAlNItyxfV7K1MLt2Algp3lSL1t6RdVhlo2P8YlFoWzmAS1Z9ijCgZWNVxa9C0MshtmJRcOHh8D1WVsnTS2k9VSwdfbw4ZWDWbYnZmspZlAsNA9ZWyfjLF+k1oAZtsHmmDkXDB9OBkr7aDvgXhuZw0Zl7F/oEuUz2oW8PAdNBRlYmR7ODkfetkNB4mEPzHjzkJWXlwc94Ybty6El3T7PYYYZL2i81IAZPcSLRmSHFQPBjGcNC5iGTcsJqkfGjCaxYMTPC/bAjBhmawIzUbbkiqrDLIX2QayRxqWMtdiuk4ccLk0JS7SK1mx75BFWUFiWhKGCicvz5NaMdZpTNpSK2b5RpWEGSaGpZD3L5X20tljfKc2XWWpFpzllg0YXYWinCfdjpxnOOs0UNnFm6zQcmpRBJ9lqCmTCQBm9mnROGL75YdqsbeedmwGzAlHjtLWPhpj3Rb+/Qc+awIwXtNxpTjF0mqxoRHY0fkUhDjZp1rCAER2x2jHkZxJUitRpApObh+S7l26GObHATCrbSu40wzefyEwJ23PIui9kc17WZGi+Vlnda6bbUiJhfjC10D4infQiy60XF4X8jNk9M2m5yz5l3unMpZDHvdDJrs/U9kZchlHz6UM4Crm4YLoNqivrLJkCTInOzFqDUwCG2d7IfBkzVzY0hycmMczC92RZCWYsKRQjMQXAUeyaRJgJZGZtjf11TbLjTPiPLLUYLE4BeHR0EcY+ZSp8xWedTgGycPqSPSl04MDok+wS99qwbw8dOnTW4V4ceeT0olH5WsHk6CNwxCrO8AWy5af3D4AxmPlh54IJRyCVfP2JFCvHDMA8fOHEp7wGswdEXNEKJuEDLaYAHDNe0IwEnh0JM1Y0PDtuWgwAyOFDVp41LGA4sjnvxKJkVj0FExfmHZscImGmbR+Io+6B0GIKzFjZWisdM/camBbDAGdCzr6wdWQyC3Pa0OhMsqCxBtcWSKr2wiz6pxE/OxcMfHfzGnhM9g4YuPnM5OHvHn5/D59ZZ08eGPfrmuHDFu6HlhfO08kbzpB5O04wY8hxzNw7YMq+n2KGaUkmmLGksKHQDrGgEYpLGCTIZLYWwFKLwdIFDRrdmg00l9/OxMUZfUEjGTBZmHchCx4Fekk2XUSBaiRvAWCQtpgtq+hnpOV+m1bKYVx7gD+s4v0ClAnHjEa92SHKHsrFuQD7J541gRkvaLGgQbMjYcaKhmeHL4nApDH6Cs8awQzPDIyz8urBgPfLrRnkZA9OMJa7dMx42VbhO03SHBtZHJHvXLDn3/D2zDsp5X2KRpTa+BePwN5t39SbI/LvxBeLZRUNz1qp7168131/+1fnFa7b3xFmZZhzQXSeljV5g3ZnWllFc8usKcx+u6SQ3i3RcRdidsus/R4wU6ZMYaZMYaZMYaZMmcJMmcJMmTKFmTKFmTKFWbmMyZT43nbUEOE7yL0R6Z6KGqNiSjYz+YAyhZluXKakYzZLYOahqDEqphRmCjOfTciUZKUO1VV6KWqMiimF2b2OGRHbpAt9zL7orZMGJp+ZjJtqsgf9RLfPeMqUsoccXkROrJ1uQ8xSBl0mippviFSV7DAxKKb2QEiTB26+SOQ9CrN7EDOyK25/oi73Cd1s2ztwVubFBfMc2aETDl9YHJbvJVMiSp01YTkSZmTXJtOv7NE8FVPZodHWgonDTrrXRBYqzO5BzIgwhn3gVl988U93Xk8tJNvayRcPmRLZiIxCGw/MiNyMKlaNiinsZd2LoZVLGZSuMLsHMWMqCF3uA5gRRIAWMgRjUjHDxn5+wguz7HAqh/XUGBDMcA8tfFOYKcxMMJuy3OUhU3LxE16YQXuYPSRdU5gpzIyWTfWuQh/jhVnBiD1eMiV+wgszLSVsEd1irjBTmMlTgMU4Bdh2k8t9DJiFRudlLYq84iVTyg5dSE4YMENFDdyK8oW9kflGxZTCTC1oDBi4MF3oYwyYhX87CX8NxO0pU8oe8g39mRAJM1TU4BGU3uwdlWNUTCnM7nXMyupQh5TbTa34KRxlCrOqw4xNAJQpzKoSs7XTbaoaFGbKlCnMlCnMlClTmClTmClTmClTpjBTpjBTpjBTpkxhpkxhpkyZwkyZwkyZwkyZsorZ/wGcjPUbx3f96QAAAABJRU5ErkJggg==)" ], "metadata": { "id": "KqtbguMdpsSO" } }, { "cell_type": "markdown", "source": [ "# Search-Summarize-Translate-Notify\n", "\n", "One more example to really drive this home. Let's do the same as the last example and add a translate to French step." ], "metadata": { "id": "JNL1UX9aqC2z" } }, { "cell_type": "code", "source": [ "from txtai.app import Application\n", "\n", "workflow = \"\"\"\n", "writable: true\n", "\n", "embeddings:\n", " path: sentence-transformers/nli-mpnet-base-v2\n", " content: true\n", "\n", "summary:\n", " path: sshleifer/distilbart-cnn-12-6\n", "\n", "tabular:\n", " idcolumn: url\n", " textcolumns:\n", " - title\n", "\n", "textractor:\n", " join: true\n", " minlength: 100\n", " paragraphs: true\n", "\n", "translation:\n", "\n", "__main__.Slack:\n", "\n", "workflow:\n", " index:\n", " schedule:\n", " cron: \"* * * * * 0/5\"\n", " elements:\n", " - front_page\n", " iterations: 1\n", " tasks:\n", " - batch: false\n", " extract:\n", " - hits\n", " method: get\n", " params:\n", " tags: null\n", " task: service\n", " url: https://hn.algolia.com/api/v1/search?hitsPerPage=50\n", " - action: tabular\n", " - action: upsert\n", " alert:\n", " schedule:\n", " cron: 0/1 * * * *\n", " elements:\n", " - select id url, id title from txtai where similar('software development library') and score >= 0.4 and id like 'http%'\n", " iterations: 1\n", " tasks:\n", " - action: search\n", " - action: tabular\n", " - action: textractor\n", " - action: summary\n", " - action: translation\n", " args:\n", " - fr\n", " - action: __main__.Slack\n", " unpack: false\n", "\"\"\"\n", "\n", "app = Application(workflow)\n", "app.wait()" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "nooA9cFNqNhb", "outputId": "febdcfe8-34ce-4952-fb94-ccb5c200ec35" }, "execution_count": 9, "outputs": [ { "output_type": "stream", "name": "stderr", "text": [ "2022-02-10 17:25:04,448 [INFO] schedule: 'index' scheduler started with schedule * * * * * 0/5\n", "2022-02-10 17:25:04,449 [INFO] schedule: 'alert' scheduler started with schedule 0/1 * * * *\n", "2022-02-10 17:25:04,451 [INFO] schedule: 'index' next run scheduled for 2022-02-10T17:25:05+00:00\n", "2022-02-10 17:25:04,457 [INFO] schedule: 'alert' next run scheduled for 2022-02-10T17:26:00+00:00\n", "2022-02-10 17:25:05,357 [INFO] schedule: 'index' max iterations (1) reached\n", "2022-02-10 17:26:08,125 [INFO] __call__: Sending alert: ('https://datastation.multiprocess.io/blog/2022-02-08-the-world-of-postgresql-wire-compatibility.html', \"Chaque base de données serveur-client a un protocole filaire. Un protocole filaire est le format pour les interactions entre un serveur de base de données et ses clients. Il n'inclut PAS le langage de requête réel lui-même, et encore moins la sémantique de la base de données.\", None)\n", "2022-02-10 17:26:08,310 [INFO] schedule: 'alert' max iterations (1) reached\n" ] } ] }, { "cell_type": "markdown", "source": [ "And just like before, Slack has a summary and a link but this time in French!\n", "\n", "![](data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAm8AAACVCAMAAAA5fu3sAAAC/VBMVEUzFCIzGAwaHT4gHiEhICkfIDQtHiU4Hx8tIyEkJjE0JCAtKSgqJlpRHx8qKi03I1UjKGUkLD0rLCosKks9JUk8JVA/JkEiLVxGJEw6JmJMJDhKJENQJC5FKSckLXQqMy4pMzk2MjpUJ1VDMSZoJiwnN1MoOk0mN35dLiBcKlM7NzNtKSUkQjMqPm9jMi0kP4Q+PUFVOSp3MSV4Mh8pRY1rN1I0SGceUztzPSYaVG1gRTZ6PEWEPCJVTCplRzJKUFcnVqF9RiRMUWNYUEqQRSA6V44VY4laWCxDWnx1UTqTTB4VcEaHTjYfZaUjY65AXok3YZUTcLIabcEkbLeeVShBabVTa148baWiWSI7bbIceawaec17Y3M2dKlnanBTbo9tamhTbLJ0bi2dYjZcbaiMZ0mIZ2KCalh5aJEyesYnfsCtYiivYx9vcKtSeq1YeaVsc6RydKAvhtMAn1mEeywZj9m7by1ShLoxjct8eqmweDVOisy0dkdeiLRIjdRLj7lxhbOIh2CUiS+Kgqgin+WogWmvgF1DmtG1hFKAja+rhILJgTsEu2REndyHkJtjmM5UneSejqdonNptnMiDnompkqKjnF7WkEY/su5cq/BNsuN2tEZmrenHmmp3qtZ/q7G1qC/gmlC5oK0A33Kuo7mBsM7XoGPEo4rJo32VrtNtuPhRwfalrb21rKJvvOakto3UqnZ1vfLrplvFr4TOra+MvubCsr7pr2yAyPr2s2Vu0vyAz/DdubfZxjfkvI3ovISG0fqdzPHPwcfGxc2S1Oajz+exzeP5wXTHzqjowr35xH3fyaiW2fqE4f7rzJyV4Py82eDzy8D6z4T50o7s0srS2d3c2MeZ7f6l6v655fvH4vWu6f763q794Jrh4+D53c/E7fvd5+j048T647jt5dva6fO39v/559W++v7I+/7g9+Te9v7/867S+/3+8svc/P/+97/+9dni/fT++cn8+9D7+P3p/v3s/vf7/Nr/+uTw/f///Oz+/vX6//////8NsLZZAAAAAWJLR0QAiAUdSAAAAAlwSFlzAAALEwAACxMBAJqcGAAAAAd0SU1FB+YCChEoFTTQYj0AAAAZdEVYdENvbW1lbnQAQ3JlYXRlZCB3aXRoIEdJTVBXgQ4XAAAgAElEQVR42u2dCXhV1bXHgVxCSCCVONAiCFYBH4iIWJE+oNiiYjFoCBRBEWSwJlSMTIYKSCUMEQGR5jE8plBQEYqATAKPGfwwKCEQIEAGIAMQQhKVyJ3O99Zaezj73CGDSTTYvfzk3pxz9rD2/t29zz53/++qZWjT9tNZLd0E2jRv2jRv2rRp3rRp3rRp07xp07xp07xp06Z506Z506ZN86btFuEtdUhYqLSw0JCwkJCwsLCQUHzB/4ek/uhiXBm+zaV74D+Wt1SgCvgKA9QAsjBmDLrQwBDkLvRHA5fhz3QP/MfyNiQ0zFa3rs0WarPVtcFIR6gheLVqBTYPDGwOGA4R1xZ1rLsRh63xAVFua4bXn719n+Fc2sH2oVvzpq0U3gC3ritWjB377rvvrrDZwvhQFxJWq9OY93u9/2AtfG/yFtD4qCdvl16d6za+qhcw2dgWbPvjFj2+aSudt7orhj4d8nTXrmFDV9TB2zaYQQG3MWN6PQjM9aoVZuEtoH2WB2+bguCP4umD0+Bwb4fXfHpqRExyRvb0P/3h5TUXNsC/8cmat/9k3sJC6o4F3N4d2ubpkBW2QG61Or3/YKfATs3hJTAsROUtYEA+8eba3S2oUc9DxcOCA8jqHHwB/rX9JRhGwJt/tn3g5rztfvnV1cDbpIzPX1q9ISY56a/zVd6cn8QUGIb94yFD4vPxb/mueNoWQnrPy0OGzDnsPv/SPrp86ZQS3Xu3Nm9hdd/t2hVwG9um6/+O4fb+mH/16tW8U6denXrhAKfcv/2lnu1tN/BmbKtn+0OHgMbnPukQ0HLIkL/Wq7N39+MBLV9e3iVgtDul3m3n+Ph2avr8pZMuIG8XX10GvGXMfOOCyVvxtDkTgLcN8x326cvwgHjnXDqfDZV7pjiM8y+lnX81phD++maI5u2W58029umxQ21juz7d9X8f7IT24IMPNu8V+CDi1qnXGCtv68cH1VkPvH3fJSDKcR3+cY7EyTW9YZ29bD5NDGp9ZRac47wlvZac9NIhZXx7VR3fLh4+/xrwdhJGtA0fupbOd7N3bmP3lHxD8lY8bfv5qR8vg1n7vc80b7+A8W3o0LpDnx769NPvNquFFgj/9WoOYxuA18mDt0+L/xzQuGtA1LWGtk8N16yAR6968Fb0SN3lHescFfdvS2E+/NN8//dvxBuOdO/tMy6eFe/OvzqHT7DAm+ub19LOT7j5UpqxZ/55zdutz9vQsbanhw7tOtS2oi4uT5s3D2se2OlfD+LwhguG5hbejPOPw31a1LUHkLfxAd09eQMGmwf3LuG8XYSbt4wNbybD+AaG86nH+pTzZn9vC1+B4DsY6RyuT6Y46P7t5SFzzhrnJxTumXJ9Xpbm7dZfn9YZOzZsKNy/2d5dUYe+X8AncLXGvN8LFgxjxljXp8Cb8VWzAGU+dY0MiFR5M9IfCKCndMTbJ28CYUmvri6Dt2KJG70rfg/Gx/N4Z0fzKV42obB42pBlbs3brc9bqO3d/1uxYsW7K/6vDT7wZd8vBAbi47d/jakV6sWbKzGQrxfuC2icBuNZo8FpCm/O8QHtC8r9/I14u/7ePsQN5lP2zrV0mRzfJG/GpamFhubtlp9PQ0NCbfVrPzYwMKwO4Ma+zCLgYK3QqxZ+zRVq5c2wTwwSz0PSDOPSM4Gt5iq82V+wfWiUk7fv3xvy8pA1N6fDTd7LU35YOt/O3pUUywckCm/0qnm79ce3sJDQ2p0H1qbvT8X4Bv/BqqFWGJ2tUN4p9W7L4m/19/XafPAWGtKceAvB/SG4KYQYDGseRu/hr4pkbR9pm+zWLazN33yK245COW9sqwgf39j39gBgqG4wbVX3PASI4uNbmOCNkEPiQok43WDaqmw+ZePbU0/RfMp3wIXQYxG2VwRedYNpq8L5NFTwZh3fQkLYnZzmTVtV8hYS5os31XSDaavi9YLmTdtPP76x/zRv2qqPN3ygC7x1rt08NESPb9qqf3wLDQpuMWNg89rB/LsFzZu2auQtNCjsqRkDB84Y2CI4NMQncObV9k9+H1T/sS2Ookc+KHdhrr9FlusLhwXia/4yjqm27fZ9Vd4433X5wEhsfM7ruH1zVmWyLTV5+t17rQd2PxPUAKqw6Z7L4oivKqF9eQclPdH00/LUImlfxSuhHD/RZOOPKcLyvDckaGCrzmEhtQfGEnCljW/Fw+oOXvfvEYFLaghv6b/bYlQPb+28O/fa745VJttSk3t29Ymmk3JuGFbe2lUBb/YXFlW8EhXjzVcRCm/4XVbnzq06B3VuNbCznzWqJGfB7dS/2TVmfKuOb2qRN5/d8dtK8VZqcs+u3tT6Cns1efNnFeLt+y6LKl4Jr3GvwkVYeXuqc2jnep3hn9jaYT4XqOLaokeiHOLdOyOCW6KkYPp9tufSDOP8M0Et//nbM/a+UW7j5p/hn93dgloyzYvrb+FL72s0Kt9wffL7gMdgsP3mmaBW80uMSyOC8SgbubvZeo4AtnwdwyIwWeLzS++rD2Wl/OazbpSPc2mHgMe2uMWrawP7E+w6ZPNGlnF9+n31e7IEt/0PSnjsI6McvIgvf3UG+uo3Z4r+e/mzYn/opRFBkAx523RPrkhddP9y8G2Ze1twQICN+lT4jJ3Nc3GNfGd6cKO3qOb4HvwtZOWe4dXnycH3RpPyWR499xlOqHPLZQ7qUuEO2KwglLzZPiDeIAm2JFSJ1VW5zsqbWgkjMfzzDlhLloZXIv0B3JvtxiYYnMYbcJm7rErg8YsjqAvTm+yFUiYGT1b619r+vIjSeBvYonNoq6dahT4Vi3uSSuEtpZb4DBV1bLn61MwGR2GUP5z0RO+Soo7PJ198ooHJ24m7ljm+eWgJ463upNTPm012p9yx+tSw1rnfdXnrQtKqkqLH38q5OIwBDCP3haTH2xf4OmYf2e7QqZnQqIm255OTHo9ypDRrd/jUMAAxsd0h14Y7jorX9LuXXUhag566xj+ZfDEhyz7yybP2mY8WpDR5eHB8UVuoTHqT7aIIydsj9w6eQ8IJ4/rjzyefem8f502klq5+decxMbORzxbe0McNTbm/9bm/WO5VUX1KzpwaUIJOZX+8xki8fXXGhmaz3dilwh0qAoYWVo/LRf89KXUppEfesK5Z6nVW3pRKGIkB2FrdCylNsqjEtbaLuJ/D2mdR6UlTSsqqBBy3930+eXfDD4i3vzV/bM4+pX892p+KKHV8G/hUcIunWrXo7PWdvSdvX961XY50MOdQztC/ifdc3tTgDDW+4M01MhKgSexeQu1PsgYABG4GTty999r9SxCKTe1htE75TS7x8WghzZ2+jgEjeFMQ6UD38UAKDu9f/upY0SPgrnPk5O/461e/OiZvIQYgsid+De1WdP/2lHofuiE/qEyiWYTkrWNvsYVz021nxHwKnStSS1clb9xnK2/go/0F+lxLf6lcWX1MTpUwvoK+YfdB33XBvVsLWl+BLhXuuD14w1Yr6khVorparvPgzawEVBBacRu0EqaRlaAuIz/h88dKL7sSNJ9CC47s7SDegFSn0r9F1vYvk7fgVgPrP9UqqHPo4ha1xc8jlTm+YScUtWU3Ol/cc3kWwqHw9n2PFg8//HCb7oXy/g1nKINGGOeswJ5r8l2z6uMVyKlxswe20YL23/o4VvAFUTDr0QLiDf5JwY9fyp1H0xtSGZPF601YzKxhI/y2Zo/FnzW+sOHxezdSAkhx7PseHxiiCHN8+wALCwhoXTCLqit4E6mlqyZv5HOulTfwETrBrfpL5crqY/Lve4yGU9fu/5QdNdKbInZfNjgDXSrc8OKNGmMyG9+gKpbrPHgzK0E50HiOaWQlEAYX+QmddI1KL7sS4v5tVvcS4g0KUPvXo/3L4A2/XAiNi+s8sMVif8Obz/s33gnFH/++TRtv3qCB3Nb1AnzenB//tU2buvBJuzitw6PQu3Jr+E3qBmDL1zH/vDVlo614NZxJI+pFswpe/Lhb46OcCYPx9n2PySlAhyjCwpsrOzU11e3BG0/tzRvzuTK8LfLFm3DDk7ekv/6pTZDCm3qduVAA7Lx5O3F3abyl++DNVyXo/m3an9oEm7yp/evR/mXxBsAF1+48cCCsTmkbUinjm2sWW59mnhWdcLPv82fdML4l3pZLje98gc+n41nfSd5wJpnV7rCDRnZA4NfH+OxFFR2JwgeYgXwds86nCm98OjDEKzVR+yvyDnMRn7Q4b3BuJkwtoogUpEfw5ms+Fam9eOM+56q5ePGGDUDl+ppP7zx6wns+Vd1QefvqjtU56vhmuY7y6J2PK6HWV7x5A48Yox7z6Tny6LuOXvOpz0oghw9NylHHN7V/Pdq/DN6G0G+81a5dO7h2mJ/HvfL3uIzrPfD527R6s0Un4F148UTonIajUpO6wUS4oF2afWY9WC80nZR6atqHrP0br87e0GQJzvnOj+7afj3hrOsjXDY8n5z9yfAS9th2vWv3A+0LfB2jW2u8Yfbkzdh0++oLSa/uE6/n43OKh9HNmP3jwz/sabKXUn7+WhbnLb1h6EZsHVZEUdu3Hd88fpvKW9EjsOiZtl2uF1hqyduJJvN34cJC+ExJeC5W3pi/jHNZfUp+osk7F3CtQUc/mevGW/XdTT+kW3XhjgdvcM/p2vOQyZu4zj5xAOvybfV6rvv3CFhjW3mrO/9C0gNR7MmVrERRx/6nDheJ9cICKP3zmMKyKgHH8XN16QmTN0PpX4/2pyL885ZK39ezH7f08/1pqp/vF6ATXB/d93DPz+AGf0+H+s99DpNL8bCgVvP/DoMcLvwHp/HnAyNo7fzN71v8cfkj28XzDViV1+/JPLPPhNX2hkcLfB6j62GJ7sUbrsNbTcoXr5iWPaYQC3xKuczBeXOOpOdJoog9HWw9P/utyhs+1Gn0xlnxPISnNm8dJgZitQ3pMxrPxcrbc8xfVq6oPkuOqrY3YDy6Pj0YH7ZQ1eebjyLQHQ/eLj177x8m/V3hjV93s68YYqCp6/eEEqy8Nf5HMD6UYWlEJVwfNWu1zE1NAC1lX0pHy6oEHC+eeO/Dg5cqvCn969H+rAi/vBmpL7OtlaRX8DG+lf/3VOlmRlt5n29XryW2vlIjvz+tQtO8ad40b5q3Xypv2rRp3rRp3rRp3rRp07xp07xp06Z506Z506atErx57ezf08FWtl7HuL6q3D9K6XO/vn1k94KKZOY2jPMPvV2hZ64VqOKPNg8tRjkfyPp6iF7Wg/UTTZeU6n3SvnL3hqXtS2mtTQ3OuMZ7hBeqct6uPfRWTjm6NaUM0Us5eCssf2ZFqIM6/9CHFeKtAlWs+bwV95jrKO18qSItb94Ky9NaPwlvKXeWS7H0RSV5q1hmP0pG9cUviTfcgV6alSrSKk9vfPFT8mYKc1ByVPdTsfmH1EiXce9Ru6yl96FMiOmYXKjzoZ2MUrgkVErX7v+Udlcbib033EefI/SQ66LsM+kF043vXcJFWSIzVgkmFRKyL5I8HdrG4nnZFslShMBK6MLQuEiLS75YrrmewiOxWVkqw+ggq5bQJGGav9Ae0sT2Bfygt1u+9GeJrT/rRjIvrr+yasrIUPT2D2DL1zG12dF96aVwQCitFK/ZGVNB5eGLqW/DK1tNyofewLYXtbb2gZJvdfKmCnNwE5qUGJEa6du/3b7FObHF8Hy4hZA6JvHBlsIloVIyO6ZFyzkkPaARnOmiNjU+lP3xPsmbEGVRZrwSTCrEZV9C8kTbcHGTqShFCKy4LozmXC7SEpIvytVLeGTyJi7DGrJqcU0SpUl+IdJBEgN+0NstX/ozlmn3Qq6/8tCUUS1R9DaswRlfx8xm5w0jZWTCAV4ZxWtxRu7Atfpi6ttI6Zw0VfAmam3pAyXf6uRNFV4Rb3I3MlNB4d4IVHAUY19xHZPJGxcuCZWS2TE8ABLyJnRR5vRDvHFRFmUm1EHfolTI4LIvsf9b8Ca1UEJgxXVhVAwXaQnJF+fNQ3hk8iYuk7Oi0CRRGtqIDvcW4qCXW770ZyxT9Ijprzw0ZbwPae70dcxsdqlWY14KB76TlRFeS9ckb1ZfTH2b2C3OeRO1tvSB0prVyZsqzCHepNqCdrMSb3hbJ2RpuM/f5I0JSa4KlZLZMeJeBjwUuqj0B1oOPmzyJjb1YmaiEt/KnT/pTbZzCYrkTWqhxAZdpgujDIVIS2wZ5rx5CI+svPE6smoJTRJzqajtEtes3iXioJdbvvRnUkDFLpjsoSnDo7NwSAO2fB2zNjs0jPBSOHCNV8b0WromebP6YurbWNMJ3n4Qtbb0gZlvtfKmCnPK4k3omPzxtsg3b1wXZdg/foYJ3714E5VgGXLZlz/eFpnaRdKFUa9xkZY3b6p/Pnlj1brGNUmsBq5Z3YvuXyKFSj5489afCQHVXq6/cls1ZfSp4KI3X8f88iYcMJVWwmvpmqlosfhi6ts8eeO1tvSB0prVOp+OV59NWOZTT96kjsnCG2TwrVApFaHjnrylmz+JIgQBnryJSrAMuezrhO/51OwJqvGvac4QIi1v3lT/hNjMyhtV6/suKvFQsX+0zpVCJS+3fOnPpICK66/chkVTRlfcw0Rvvo5Zm13hTTigqqaY19I1VUGl+ELZk77NNdI6nwpJm5U32ZrVu15QhDlyvYDqHi/epI4psfHBrfmGIlySKqW+T+ZfHGaz8iZ0UbvX5F96/AOv8Y0y45UQAhGSfQnJ04km60/m03qBlyJ6guvCmCNMpCVBwly/9RIecbGZhTdeLa5J4rzd7BGGI4JQTnm55Ut/JgRUXH/loSmjIarhW44k/JEMH8esza7wJhwQlVG8Fq5JBZXVF1PfZqQ0gfXNS2mcN1FrSx/cZPmeuHd7NT8PUYQ5xJtQ93jPp0LHdOnZ+k9mGYpwSaqUzj8T8NjyLlbehC5K/LKLB28sM1YJ1ttc9iUkT8UTg1rupY8wL0X0hNCF8echKNKSIFGuXsIjLjaz8MarxTVJYu84WxkIcZOXW770Z4lPsodDXH/lsmrK+Nc39Z/77L/O+DxmaXaVN+EAr4zqNT8jFVRWX0x9m+Ha3Q0yL1Geh2CtLX1wleX71V0bq4+3SlrN2Niv7T/l+3rNm+ZN86btl8qbNs2bNm2aN22aN23aNG/aNG/atGnetGnetGneymE3eyzypdtRt7vLaFHpDQMCbI/RBu30hmzbgfJ1oIciopSv4MoyseGuvJdPvPdtRym6LDUwl6iV6p+faFbXn220sZwVqFw0Lg8retyXHs1vc5Yl4DJI5Hazb+/CCre7T81DlfBWWBpvMlpUesO5qadmBmJzLGhBPzDqHH/76n9Pa3Du5+TN9dHwq89udPvVZVkCc/nkzWc0K/vEuedfOlquGlQyGleleCtLwIXblBbhdoUBNYO34r4+YzWpRUmVVDr+XLtrPP0U7DtdaAtkW5Oxn2184+ZXl+U2Suet0lbJaFzl+1T5a84bZaUst4SrWnkTGiDXxL2koxCSIB6HCYoSW1vNYFPEm7HpttPGl3ceW4Absa61Nff3eSi+sIG43EoGg4LZNyAg4J7LPIIT7tsJCKizVw3gZF96X6PBT1iDdlnFVru72Z77n0cLRLgELkmyqr3MZJ6BuUze5NbdTWLPbeLzS5u13LKHYlUpAbs6NJp76ZkgNcqXGDnoT8NsIFNrxoNfsSheBg9yZa0R7wRb/6vTKXyZ2FJEMQ25iszUWSnNSc7xE9xdteYejU4SLqbiKiUwmtnuiuxMifhVed6EBojtSTODaFEcJofaHzI4BhvfZrW+4oQZmHZBO2fZBh92mLypobaggbjcSgaDSmyX5pzY+JCTR3BKuWOLaw+ksARwIq2SR9AuVWwl9FGSNxEmSlV7yWSHPQJzlc5b3bk/bLv3ybRL0O4yYFfT/vknmv5xn33kowWGR7gr8adsIKk14/7yKF4iyJW1RtQJ7dKuP/Hw3B8+anBMatCIN64iU+OIyeakrmAnpIBLqblno9P+QRy6SguMJttdlZ0pEb8qz5uMmsV443+Krct+eXNuaDbZnd5wCXiA+0ftn3RjSkkvxRf1LJNbyWBQ48HREyhpYhGcNkEXFrXdqOqovsNpGi61Bu1SxFY/CH2UGVSGSZIsai+ZzDMwV+m8wZ0C3SHMerRABuwCMsBVB2789gh3Jf8UDWRqzZi/Yq+7CHJlqZFbdMICLPX+T6UGjXjjKrIvFZ2VbE78k52Q7qo192x0wZtRWmA00e5qKDQ14lfV8MaUAJI3+JPHYfLDG6xPA+rCPJGIDbipATt8cVidvZw3S6gt3rPpTbaLveLGJhjfSCVikM7lxN37XHt+dU5NRYWC315Bu8SBqyIWl+BNSJIsai+ZzDMwlx/errUNCIikXibBAoZeUgJ2kfYCWsEj3JX8UzSQRWuGiVhUJRl0yFIjkzdU5CkaNMkb/HNd6qyU5qTFM52Q7qo192h0yZu7tMBoot3dquzMjPjlri7e0kvnbW5q6g0KiRYaGhoSwKvB9Sseii9sIC63kq5f7/LwvSgu5xGcnLNa3As3IZYATg2Z315Bu8QBqY+SvAlJkqr2ksk8A3P54c2ZkZqaY+VNCdglebOGu5J/yuhIqtbML29KJr54W6TyZuqslOZkYymekO6qNfdsdL+8+Wp3tyo7MyN+VRtvPA4TTd1CmmS9f8M+uGtZRkZG9t/bF9hzqK5LxHyqhtoa39vN5VbS9U1sbJMRnH57hl1ppqJ6FPf1GbSrkM1ZOI1jhEge7kqKl1S1l0zmGZhL4U36Z5lPRa+pAbsEbx7hruSfooEsWjMaz8V8yoJcWWrkyZvUoFl4M6TOymxOefuIavNCr5p7NrqcT0sLjCba3VBlZ2bEr2rjTcRhQnGVkCaJYFOCN9d4Nmin3LXxy8DBB/79BCnjPRVfkKuQW0nXTzxgC23Rc5+I4HT9CRgnWy2zBOga3+6Qc2kzj6BditjK2HbX/FMbOrQvEOGuZJgoVe2lxPazBuZSeJP++eRNDdgleBOZXHqVTwI8b9lAHlozN4/iVSTXC2qNPHiTGjSVN1NnpTQnpuQnRKMoNf/Ws9GLOk7OO4tDV6mB0US7q7IzM+JX9fHG4jDRUCqkSSLYlODtWlsmsbv5594/wHJBKKA8FF/Ys1xuJVx3ThxwISNpWPerPILTtieTM07NhE+doqO6PgzW3x5BuyxiK9JHwdAqwl1JJZWq9jKTeQTmUp+/Cf988qYG7JK88UzSH2DPqUTesoFUrRklYlG8DB7kylojT96EBk3lzdRZKc2Jf4oT3F215h6N7nZ91KzRXBrTSgmMZra7IjszI35Vnrefw679DhcWiWIUt/dF2lN+8yMi2Sz4CX5zqxT7aMkt+/VnpQMH3UK83ezRP8eV9JDoLFTTw+p2gONW463otULN261gMFZLsS/7sl8GTLylxrcbhuZNm7afwjRv2jRv2jRv2rRp3rRp3rRp07xp07xp07yVy+iru7JN/X1YYWXJC5zj26X5ObXrRb4jPnMfZHHSvxwlc3SBa94OfOeaF2duMVrjJcFyrVOf/OKVmQO+Ln8zHOlz+UekstR0wA7PmgsvnRPW+nPOyI4Pj5iB+xxOTgqPXu8AR7aOCo/ei2/WDYqIlS2Dh2OPuf0epnwKLA2UEFch9UdeP+a52tDi1JTwhe4az9tXfnEzeduFvq1c6DeflWuNvKjLns2QGeX1tYL9ldNKR+L5kxXlreKpSuMNal4Wb+hcTOyhA2+MK4Tki88eGLDWbWyOXJ+6DvJyzYs+mBofKb4FgMPJ6yK/9nfYHjM7KyN+XInaAFXGm2tebJa75o9vhv+zkreV4Jtrnl/ekKLNzH21GY5785bXT+HNx/ly8FbxVKUY1rwM3tglV6C6fU6Dd8DKrj659hi41jk1zp3Z7xh4NZynpDeuhHElfg4f75NL+agNUGW82V9fW4Xz6TvTg/FHillkLI/4S3TJ9enBjf7ZcJHhV8bDJEdMiiTjUokAUVznY4bOYrwlwzyQ5Zwazm2c/fWd8f0jNjoMJ7zMKIAWx4KOxDnsr8OHemv/8Bnx0Az73wiPPmashATQRasGhceeM1yr+sPLyf5wbIdyHqw3tP7+UTRLZfaD8qLZbsjMPgdHRSzOnoJ/Fw+HQQn/Ad7MVAmLIevZhcaucQf6jyvMjh8UMQNrjnntdLjoxW1ABhEbS1jpsrt4cazm6OUbeJZ4g/kyYkkhTHyTsBT8hOElN3KoRqfzXsHREf+94OBdnuPAqvGeZighVb4PE2+ZffiGKtZACXHrBoUvzMe5FpqUby1YCWOpcx6U7HxzrSFcA4fDF6IDsqGZMXfyhkN2cVU1n3ZsuTp75l3bRWQsa/wlQgt3HM4MXGT4kvGYkiMhveKKIqFrEjofM3QW8RY++2xGTJzDhZ9B51RofXtM5PqcrTCPrByXlb0GeFvowM/5DuP4aOj4yJ0XDvSHz/yAnTmrYNDYhR/fXdGHUqeMKzkSdcx1IA37DOc05TwegYQ5B/qvdWeGRx+6MI+NX5nhs/Pzhg885loVVaDyZqZKCF8MqRYauyKiV+9zvhl7FpLmsrw2luyKOnhj/4AzzgkL8zO2iNI5b6I4VnPyEqpYiLzBfJmTOiWuxB6z2JEXMzufX8IYiCrI7P21MpDkDeenTvbhE+dmHAcNdpWvw5BtiWvqYrEFglxBN/YPWGs4YxbmZ8+Lc0hC84YDmJn9TkvXEgbFrj7KHaCG5rlwd2jMrcr5lESILDKWNf4SFkxSBiWKk1XGIyVHXIokFEVC18R1PpetOzloptkcVaDytpDmEde82Q7lPuyKax402ASadeOojpnYLESOm6bBzZwi+qSr5+GI/U0cR6BTqKPYJQbOSK554xzY9H54g5NYu12Y6ggOGvbhawEwir9HbZ+wEI7gn5vlHAzdJYujmnMvoRTkDccVBIPm/QS8ji4heuBTxgYqOysDasdGIyYfcYsAAAkDSURBVGeMIIiVk8fuEX0dxgE+Wi4jGG/gBrSacYQqxMc+Ozo7I2aHsXl0oXDNSHgxlxwwG5ouFe5U7Xz6AW24pzvNL+7JtcZfwoK/QJEOCTl8yXhUCQhu1Rc7UoWuSeh8zNBZkje4d1F5W0ujfcnJ/tHrxc9wwEIir18um/VEMxTDnLNL9PLxPpfzYiKWpUmYlPNwpJhmKehLuuvJZDcoxBuWm+mXtziGMtVyJYyvWDib8WBsiB4EttDYGh67Jd/gpTPeZHF8CUTp8/rtAN7IRyzJPmGtI5uGL7FKgsHMbajjm2vlaMaGM342dMtmmOO/Vsc3Ooy3Ii8WyMMnByxOxmUEHb7CeUN4EuKMlRFY46hcygiOwoJ/M/yzULrGHEYHREMfhxl0rXSHqkWHqoo3GJ1EZCxr/CXk7TbGmx8Zjz/e+OZsqfORobNK4w3ufI0b697gyy/7K2dgAetg0wvxtn/+oEG9OU+udZMGDQJMXAfi4fPNeFPPVw9vLEu07HX9Rxfw0j15w5pzLzMHfK3yBgPRoOiNJQa/hG75HSJbKgNwyxK44Wf7Rk5OjkPcqJ2WhzPUw7nYcNSGdNjK22a+bKWMYLS7+srpzKirr3ztzZvZ0HBpvpU3OlRFvMH0KCNjecVfopCF1+5f5EfGo0iOVN6ErknR+fDQWaXwxnrFEPcLR8aVOCd8bbC5Fttlc/Qhhxi/nPNi06C9aQUIuRBv6nl1Pr3slzdsSd+8QWdQLdmkE7OW5WXwaZVN9yxDtg5U5tPLWHPhJaRn82kJDUTw3i2cY7jR3Ei38Lg+Bdyiz3Hc4sztw3Ih6ucw3QLYxTLCwpv7SKSydAeyl48rsb++HK4QrkneRENb59PLVTqfdmx3OHtm46M8MtZla/wlvOL7Lk/mXJwYuMjwJeOxSI5U3oSuSeh8WL72iezneQRvRsLo5MOuebHJh+wx0WkXVkXCXfxRx0kYEmC94IIbanwkijfU52DlhJ9Dh7EfhpMjkQcPX4VGcK3qc3n/lvzsN/F+fP2BLPP86dQSsV7Yj+sFzhvcHRaqvMGdEtxKhzPeZKoEuGvGG22qpTNmdtaFVS/y9cKHBUdgXZMaf9S5Js3Y/yIvHbMV6wUszsWWArsi1+dnvBHnEOsFfEDmgrVjRPSSEn5J3qjYQ4cPH87H88kH+i92u1ZGrocDZx3OqdF4Jks+aNuZig/a/Bw+MmB9TsYc+VgOG6hE8OacEJt24cDsQvEwLQLoWRkBLEnXBG+ioZX1ArpTpbzdv/wZ/LEVF4uMddkaf4kuIf1Sj0WGfxlPmuHFm/xFOK7zYfne7NvdylvmqIjZcMsWsdgZM2MS3fCyNTjcuToyoy4bCeSpc154xPqtMIiNGhR7cMIO+IyHx+buHzUw9mC/y/icYWEhoBcefUw5PzkXyXHJ5yGCt3EFKm/45DwaUyBvMlVCbHw4Ppphz8/waQI+8sBnBdFH3VjBiCWF9DTmmMFKx2zpcYIojj2i3hW7jh5EqM9D8t4+mJNzYMAOdolrHnsIA+fxUc76Elg9k0UVbGZvRN/TFwlQup/DmDwiVv6eGDbQFcEb88BcS+Bwd7z3DsU1yZtoaJ45d6cqeasp5mPNvXKxO++V3J+hLhV8UOrji4PFfjPYTBPf6ztKuUR/X//z8PazWaV5K8WOR+503NgqHoVo3jRv1csbTcczsgzNmzZtmjdtmjdt2jRv2jRv2jRv2rRp3rRp3rRp+8l4259WvuuO9PERfGRX1JnKln8Dv9KLWSxFAPaYhQ4oC1+Yqss0+nYUzUu3Jfbka6vpvOG+2p+TN/vrp628ORlv+MJUXT5489Jtad5uFd6cUyvDW+VN6q2klEspa6Uf3o5r3mo6b1IXJQVEpD9imhx8j5uQhVCJdtdwSRJuz5m9Ls4tGKC9krT/L7Pf6eMw8b1+MD58h1AzMSM9UJZFFhSdu3VQxMIS1EKNIqUT6p7WO0hvRRt1TJHTQiwLXkjVNXoObk61k9Ius9/BKaZui3YB7TyJ+i/NW83jTeiihIAor99OR+o+dzEjZ6dxcsD6EiZU4jIfLklCKc/+/iZvKMAwjoSPA3RGFyBvMREzVmdxNRMfM9+Mzbqw5pwpC4rcaawcNLswc/gOY1d4LGmZ7DGLc7ZGfs32h2MlTJGT4M3Ndq/hXn62n1qqr5huK3wxVHhyWvabcQ7NW83jjemipIAok2nNiLcjowtwr5bBhEpc5sM0QbSvGrceS95op+LifrkoXybe4kqkmolvoY7MZbOikAVxbHCr+K4+TMuEEitMZfImRU5W3lCh6koglZtUXzHeIAHVfvPoAs1bzeON6aJ+EIIO57zwGYccrMfwxzRO9uNCcS7zOc0kSVLKAwzg5BuHKkX7K6en7rC/8rVBvK011Ux0BY2AhmGVadAu3IQ4rmXiejeoi5U3EjlZeUNs816RigRTB4gJqHD4S/NWM3lLMHkzjNT48IUOJuEGxmLh7ovJRcWvU5AkKU/hzbhA0p2VcSdHF26efRLGQcGbVDPRFSvL4g1/gyN7zVuDwj15I5GTB28wQu4aXaB5uwV5gwFFCohoxnvxCvGW1++MuUZUZT4JcTAPe8ynxvEX5wAnUXPiHJI3Vc3EJ1LDKguy8Hakz+njkTvzvcc3Ejl58GasjJ2y1tC83Wq8MV2UFBBlLstHgRncfqWmwUowIiJ6JxcqMZlPCRdErYw8mrNOXS/AgIP6oQkROwzJm8HVTHy9EBOblbFur0UWJHnrzbRMu6KuGCf7L8SfbsgoJN6kyEnyRqouBywTMC1XXBFvKEsq1LzVbN64Lko8D8mO749PEoxd/SPWn5yclXNha59cRah0lEuSmJTHMr65EnB628x+qYfzxtVM8nkI/UiZKguSvEUtZ1qm+Ii31q+CMWwV1Ip4kyInkzdSddGKwRCKK+KNdFuat5p//+bb2HcMmf1y/V9QZTv91ce65X0kPHyH7vZfEm/G5ug0d0b8bEfN5I2vFrT9YnhDUW/E4kKjRvLm71cjtdVk3rRp07xp07xp06Z506Z506Z506ZN86ZN86ZNm+ZNm+ZNm+ZNm7afxP4fxWyxW07I+e0AAAAASUVORK5CYII=)" ], "metadata": { "id": "pSi8e8rGuAah" } }, { "cell_type": "markdown", "source": [ "# Wrapping up\n", "\n", "This notebook covered how to build workflow notifications with txtai. There are many directions one could go with this. Build an activity feed, alert when semantic events occur and more. More ideas can be found in the [txtai application](https://huggingface.co/spaces/NeuML/txtai) on Hugging Face Spaces. \n", "\n", "Everything in this notebook can also be written in Python. The benefits of YAML workflows are that they require little to no-code. Work is ongoing as of txtai 4.1 to make workflows easier to containerize and ultimately run in serverless environments. Keep an eye on this!" ], "metadata": { "id": "Fr99QHPtTMJt" } } ] } ================================================ FILE: examples/29_Anatomy_of_a_txtai_index.ipynb ================================================ { "nbformat": 4, "nbformat_minor": 0, "metadata": { "kernelspec": { "name": "python3", "display_name": "Python 3", "language": "python" }, "language_info": { "name": "python", "version": "3.7.6", "mimetype": "text/x-python", "codemirror_mode": { "name": "ipython", "version": 3 }, "pygments_lexer": "ipython3", "nbconvert_exporter": "python", "file_extension": ".py" }, "colab": { "provenance": [] } }, "cells": [ { "cell_type": "markdown", "metadata": { "id": "POWZoSJR6XzK" }, "source": [ "# Anatomy of a txtai index\n", "\n", "This notebook inspects the filesystem of a txtai embeddings index and gives an overview of the structure." ] }, { "cell_type": "markdown", "metadata": { "id": "qa_PPKVX6XzN" }, "source": [ "# Install dependencies\n", "\n", "Install `txtai` and all dependencies." ] }, { "cell_type": "code", "metadata": { "_uuid": "8f2839f25d086af736a60e9eeb907d3b93b6e0e5", "_cell_guid": "b1076dfc-b9ad-4769-8c92-a6c4dae69d19", "trusted": true, "_kg_hide-output": true, "id": "24q-1n5i6XzQ" }, "source": [ "%%capture\n", "!pip install git+https://github.com/neuml/txtai\n", "!apt-get update && apt-get install -y file xxd" ], "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "source": [ "# Create index\n", "Let's first create an index to inspect. We'll use the classic txtai example.\n" ], "metadata": { "id": "0p3WCDniUths" } }, { "cell_type": "code", "metadata": { "_uuid": "d629ff2d2480ee46fbb7e2d37f6b5fab8052498a", "_cell_guid": "79c7e3d0-c299-4dcb-8224-4455121ee9b0", "trusted": true, "id": "2j_CFGDR6Xzp", "colab": { "base_uri": "https://localhost:8080/" }, "outputId": "4c16f389-2cf0-46d9-9cb8-bdda04d06559" }, "source": [ "from txtai.embeddings import Embeddings\n", "\n", "data = [\"US tops 5 million confirmed virus cases\",\n", " \"Canada's last fully intact ice shelf has suddenly collapsed, forming a Manhattan-sized iceberg\",\n", " \"Beijing mobilises invasion craft along coast as Taiwan tensions escalate\",\n", " \"The National Park Service warns against sacrificing slower friends in a bear attack\",\n", " \"Maine man wins $1M from $25 lottery ticket\",\n", " \"Make huge profits without work, earn up to $100,000 a day\"]\n", "\n", "# Create embeddings index with content enabled. The default behavior is to only store indexed vectors.\n", "embeddings = Embeddings({\"path\": \"sentence-transformers/nli-mpnet-base-v2\", \"content\": True, \"objects\": True})\n", "\n", "# Create an index for the list of text\n", "embeddings.index([(uid, text, None) for uid, text in enumerate(data)])\n", "\n", "# Run a search\n", "embeddings.search(\"feel good story\", 1)" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "[{'id': '4',\n", " 'score': 0.08329004049301147,\n", " 'text': 'Maine man wins $1M from $25 lottery ticket'}]" ] }, "metadata": {}, "execution_count": 26 } ] }, { "cell_type": "markdown", "source": [ "# Print index info\n", "\n", "Embeddings indexes have an `info` method which prints metadata about the index. This can be used to see when the index was build, what settings were used and when it was last updated." ], "metadata": { "id": "pHqeRmHtw1ui" } }, { "cell_type": "code", "source": [ "# Print metadata\n", "embeddings.info()" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "o7nKY0AWxBWU", "outputId": "be7eca6e-dbbc-40c5-df1f-9726554de476" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "{\n", " \"backend\": \"faiss\",\n", " \"build\": {\n", " \"create\": \"2022-03-02T15:18:41Z\",\n", " \"python\": \"3.7.12\",\n", " \"settings\": {\n", " \"components\": \"IDMap,Flat\"\n", " },\n", " \"system\": \"Linux (x86_64)\",\n", " \"txtai\": \"4.3.0\"\n", " },\n", " \"content\": \"sqlite\",\n", " \"dimensions\": 768,\n", " \"objects\": true,\n", " \"offset\": 6,\n", " \"path\": \"sentence-transformers/nli-mpnet-base-v2\",\n", " \"update\": \"2022-03-02T15:18:41Z\"\n", "}\n" ] } ] }, { "cell_type": "markdown", "source": [ "# Save index and review file structure\n", "\n", "Next let's save the index and review the file structure. This section prints each file, and runs commands to show" ], "metadata": { "id": "BYWUFBUGyKyY" } }, { "cell_type": "code", "source": [ "# Save the index\n", "embeddings.save(\"index\")\n", "\n", "# Show basic details about index files\n", "for f in [\"config\", \"documents\", \"embeddings\"]:\n", " !ls -l \"index/{f}\"\n", " !xxd \"index/{f}\" | head -5\n", " !file \"index/{f}\"\n", " !echo\n" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "aPH-dnV2ZuL1", "outputId": "6d8d1329-a2e8-4538-b197-0e2959b9eef2" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "-rw-r--r-- 1 root root 295 Mar 2 15:18 index/config\n", "00000000: 8004 951c 0100 0000 0000 007d 9428 8c04 ...........}.(..\n", "00000010: 7061 7468 948c 2773 656e 7465 6e63 652d path..'sentence-\n", "00000020: 7472 616e 7366 6f72 6d65 7273 2f6e 6c69 transformers/nli\n", "00000030: 2d6d 706e 6574 2d62 6173 652d 7632 948c -mpnet-base-v2..\n", "00000040: 0763 6f6e 7465 6e74 948c 0673 716c 6974 .content...sqlit\n", "index/config: data\n", "\n", "-rw-r--r-- 1 root root 28672 Mar 2 15:18 index/documents\n", "00000000: 5351 4c69 7465 2066 6f72 6d61 7420 3300 SQLite format 3.\n", "00000010: 1000 0101 0040 2020 0000 0001 0000 0007 .....@ ........\n", "00000020: 0000 0000 0000 0000 0000 0001 0000 0004 ................\n", "00000030: 0000 0000 0000 0000 0000 0001 0000 0000 ................\n", "00000040: 0000 0000 0000 0000 0000 0000 0000 0000 ................\n", "index/documents: SQLite 3.x database, last written using SQLite version 3022000\n", "\n", "-rw-r--r-- 1 root root 18570 Mar 2 15:18 index/embeddings\n", "00000000: 4978 4d70 0003 0000 0600 0000 0000 0000 IxMp............\n", "00000010: 0000 1000 0000 0000 0000 1000 0000 0000 ................\n", "00000020: 0100 0000 0049 7846 4900 0300 0006 0000 .....IxFI.......\n", "00000030: 0000 0000 0000 0010 0000 0000 0000 0010 ................\n", "00000040: 0000 0000 0001 0000 0000 0012 0000 0000 ................\n", "index/embeddings: data\n", "\n" ] } ] }, { "cell_type": "markdown", "source": [ "The directory has three files: *config*, *documents* and *embeddings*.\n", "\n", "- config - The input configuration passed into the Embeddings object. Serialized with [Python's pickle format](https://docs.python.org/3/library/pickle.html).\n", "\n", "- documents - [SQLite](https://www.sqlite.org/index.html) database. Stores the input text content and associated data.\n", "\n", "- embeddings - The embeddings index file. This is an [Approximate Nearest Neighbor (ANN)](https://en.wikipedia.org/wiki/Nearest_neighbor_search#Approximate_nearest_neighbor) index with either [Faiss](https://github.com/facebookresearch/faiss) (default), [Hnswlib](https://github.com/nmslib/hnswlib) or [Annoy](https://github.com/spotify/annoy), depending on the settings." ], "metadata": { "id": "oH4Yd9BOlo5u" } }, { "cell_type": "markdown", "source": [ "# Config\n", "\n", "Given that the configuration file is serialized with Python pickle, it can be loaded in Python." ], "metadata": { "id": "xO3CokBlzCfc" } }, { "cell_type": "code", "source": [ "import json\n", "import pickle\n", "\n", "with open(\"index/config\", \"rb\") as config:\n", " print(json.dumps(pickle.load(config), sort_keys=True, indent=2))" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "aNQSCiXHzOTj", "outputId": "00b5ebdf-961b-45ac-d90c-e6b824c11979" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "{\n", " \"backend\": \"faiss\",\n", " \"build\": {\n", " \"create\": \"2022-03-02T15:18:41Z\",\n", " \"python\": \"3.7.12\",\n", " \"settings\": {\n", " \"components\": \"IDMap,Flat\"\n", " },\n", " \"system\": \"Linux (x86_64)\",\n", " \"txtai\": \"4.3.0\"\n", " },\n", " \"content\": \"sqlite\",\n", " \"dimensions\": 768,\n", " \"objects\": true,\n", " \"offset\": 6,\n", " \"path\": \"sentence-transformers/nli-mpnet-base-v2\",\n", " \"update\": \"2022-03-02T15:18:41Z\"\n", "}\n" ] } ] }, { "cell_type": "markdown", "source": [ "Notice how this is the same output as `embeddings.info()`." ], "metadata": { "id": "_LJvaPzFzqId" } }, { "cell_type": "markdown", "source": [ "# Documents\n", "\n", "The documents file is a SQLite database with three tables, documents, objects and sections. Let's take a look inside." ], "metadata": { "id": "i5_m92oSz3eK" } }, { "cell_type": "code", "source": [ "import pandas as pd\n", "import sqlite3\n", "\n", "from IPython.display import display, Markdown\n", "\n", "# Print details of a txtai SQLite document database\n", "def showdb(path):\n", " db = sqlite3.connect(path)\n", "\n", " display(Markdown(\"## Tables\"))\n", " df = pd.read_sql_query(\"select name FROM sqlite_master where type='table'\", db)\n", " display(df.style.hide_index())\n", "\n", " for table in df[\"name\"]:\n", " display(Markdown(f\"## {table}\"))\n", " df = pd.read_sql_query(f\"select * from {table}\", db)\n", "\n", " # Truncate large binary objects\n", " if \"object\" in df:\n", " df[\"object\"] = df[\"object\"].str.slice(0, 25)\n", "\n", " display(df.style.hide_index())\n", "\n", "showdb(\"index/documents\")" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 619 }, "id": "32TmOeRZ0Lec", "outputId": "895b569c-3509-4f38-c4eb-36340d718d15" }, "execution_count": null, "outputs": [ { "output_type": "display_data", "data": { "text/markdown": "## Tables", "text/plain": [ "" ] }, "metadata": {} }, { "output_type": "display_data", "data": { "text/html": [ "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
name
documents
objects
sections
\n" ], "text/plain": [ "" ] }, "metadata": {} }, { "output_type": "display_data", "data": { "text/markdown": "## documents", "text/plain": [ "" ] }, "metadata": {} }, { "output_type": "display_data", "data": { "text/html": [ "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
iddatatagsentry
\n" ], "text/plain": [ "" ] }, "metadata": {} }, { "output_type": "display_data", "data": { "text/markdown": "## objects", "text/plain": [ "" ] }, "metadata": {} }, { "output_type": "display_data", "data": { "text/html": [ "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
idobjecttagsentry
\n" ], "text/plain": [ "" ] }, "metadata": {} }, { "output_type": "display_data", "data": { "text/markdown": "## sections", "text/plain": [ "" ] }, "metadata": {} }, { "output_type": "display_data", "data": { "text/html": [ "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
indexididtexttagsentry
00US tops 5 million confirmed virus casesNone2022-03-02 15:18:40.591760
11Canada's last fully intact ice shelf has suddenly collapsed, forming a Manhattan-sized icebergNone2022-03-02 15:18:40.591760
22Beijing mobilises invasion craft along coast as Taiwan tensions escalateNone2022-03-02 15:18:40.591760
33The National Park Service warns against sacrificing slower friends in a bear attackNone2022-03-02 15:18:40.591760
44Maine man wins $1M from $25 lottery ticketNone2022-03-02 15:18:40.591760
55Make huge profits without work, earn up to $100,000 a dayNone2022-03-02 15:18:40.591760
\n" ], "text/plain": [ "" ] }, "metadata": {} } ] }, { "cell_type": "markdown", "source": [ "`documents` stores additional text fields as JSON, `objects` stores binary content and `sections` stores indexed text. The only table with data as of now is `sections`. `sections` stores the input (id, text, tags) elements along with internal ids and entry dates. \n", "\n", "We'll come back to `documents` and `objects`." ], "metadata": { "id": "-nmu31TQ4gSv" } }, { "cell_type": "markdown", "source": [ "# Embeddings\n", "\n", "Embeddings is the ANN index and what is queried when running similarity search. The default setting is to use Faiss. Let's inspect!" ], "metadata": { "id": "v3SsQCCD7lR7" } }, { "cell_type": "code", "source": [ "import faiss\n", "import numpy as np\n", "\n", "# Query\n", "query = \"feel good story\"\n", "\n", "# Read index\n", "index = faiss.read_index(\"index/embeddings\")\n", "print(index)\n", "print(f\"Total records: {index.ntotal}, dimensions: {index.d}\")\n", "print()\n", "\n", "# Generate query embeddings and run query\n", "queries = np.array([embeddings.transform((None, query, None))])\n", "scores, ids = index.search(queries, 1)\n", "\n", "# Lookup query result from original data array\n", "result = data[ids[0][0]]\n", "\n", "# Show results\n", "print(\"Query:\", query)\n", "print(\"Results:\", result, ids, scores)" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "ofIHY-pV7kWH", "outputId": "f990cc01-e235-4010-ccfd-fdbb5692cabe" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ " *' at 0x7f68631cd750> >\n", "Total records: 6, dimensions: 768\n", "\n", "Query: feel good story\n", "Results: Maine man wins $1M from $25 lottery ticket [[4]] [[0.08329004]]\n" ] } ] }, { "cell_type": "markdown", "source": [ "# Index compression\n", "\n", "txtai normally saves index files to a directory. Indexes can also be compressed. Nothing is different other than the files being in an compressed file format vs a directory." ], "metadata": { "id": "s9aLt2zF2ZW2" } }, { "cell_type": "code", "source": [ "# Save index as tar.xz\n", "embeddings.save(\"index.tar.xz\")\n", "!tar -tvJf index.tar.xz\n", "!echo\n", "!xz -l index.tar.xz\n", "!echo\n", "\n", "# Reload index\n", "embeddings.load(\"index.tar.xz\")\n", "\n", "# Test search matches\n", "embeddings.search(\"feel good story\", 1)" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "0oOC8ToG1pyn", "outputId": "6fa8a8a7-3831-4307-a818-a4b62f8a81e8" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "drwx------ root/root 0 2022-03-02 15:18 ./\n", "-rw-r--r-- root/root 295 2022-03-02 15:18 ./config\n", "-rw-r--r-- root/root 28672 2022-03-02 15:18 ./documents\n", "-rw-r--r-- root/root 18570 2022-03-02 15:18 ./embeddings\n", "\n", "Strms Blocks Compressed Uncompressed Ratio Check Filename\n", " 1 1 18.1 KiB 50.0 KiB 0.361 CRC64 index.tar.xz\n", "\n" ] }, { "output_type": "execute_result", "data": { "text/plain": [ "[{'id': '4',\n", " 'score': 0.08329004049301147,\n", " 'text': 'Maine man wins $1M from $25 lottery ticket'}]" ] }, "metadata": {}, "execution_count": 32 } ] }, { "cell_type": "markdown", "source": [ "# Content storage\n", "\n", "Let's add additional metadata and binary content to the index and see how that is stored in the SQLite database." ], "metadata": { "id": "lGmiYXyqyjtQ" } }, { "cell_type": "code", "source": [ "import urllib\n", "\n", "from IPython.display import Image\n", "\n", "# Get an image\n", "request = urllib.request.urlopen(\"https://raw.githubusercontent.com/neuml/txtai/master/demo.gif\")\n", "\n", "# Get data\n", "data = request.read()\n", "\n", "# Upsert new record having both text and an object\n", "embeddings.upsert([(\"txtai\", {\"text\": \"txtai executes machine-learning workflows to transform data and build AI-powered semantic search applications.\", \"size\": len(data), \"object\": data}, None)])\n", "\n", "embeddings.save(\"index\")\n", "showdb(\"index/documents\")" ], "metadata": { "id": "Ef4-Gd8ZtzUF", "colab": { "base_uri": "https://localhost:8080/", "height": 713 }, "outputId": "0f290fdc-2bb7-4022-e4a0-1dc54b080bc5" }, "execution_count": null, "outputs": [ { "output_type": "display_data", "data": { "text/markdown": "## Tables", "text/plain": [ "" ] }, "metadata": {} }, { "output_type": "display_data", "data": { "text/html": [ "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
name
documents
objects
sections
\n" ], "text/plain": [ "" ] }, "metadata": {} }, { "output_type": "display_data", "data": { "text/markdown": "## documents", "text/plain": [ "" ] }, "metadata": {} }, { "output_type": "display_data", "data": { "text/html": [ "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
iddatatagsentry
txtai{\"text\": \"txtai executes machine-learning workflows to transform data and build AI-powered semantic search applications.\", \"size\": 47189}None2022-03-02 15:19:00.708223
\n" ], "text/plain": [ "" ] }, "metadata": {} }, { "output_type": "display_data", "data": { "text/markdown": "## objects", "text/plain": [ "" ] }, "metadata": {} }, { "output_type": "display_data", "data": { "text/html": [ "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
idobjecttagsentry
txtaib'GIF89a\\x9b\\x04\\x18\\x03\\xf5\\x00\\x00\\x12\\x13\\x14\\xcc\\xcc\\xcc\\x13\\x14\\x15\\xbd\\xbd\\xbd'None2022-03-02 15:19:00.708223
\n" ], "text/plain": [ "" ] }, "metadata": {} }, { "output_type": "display_data", "data": { "text/markdown": "## sections", "text/plain": [ "" ] }, "metadata": {} }, { "output_type": "display_data", "data": { "text/html": [ "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
indexididtexttagsentry
00US tops 5 million confirmed virus casesNone2022-03-02 15:18:40.591760
11Canada's last fully intact ice shelf has suddenly collapsed, forming a Manhattan-sized icebergNone2022-03-02 15:18:40.591760
22Beijing mobilises invasion craft along coast as Taiwan tensions escalateNone2022-03-02 15:18:40.591760
33The National Park Service warns against sacrificing slower friends in a bear attackNone2022-03-02 15:18:40.591760
44Maine man wins $1M from $25 lottery ticketNone2022-03-02 15:18:40.591760
55Make huge profits without work, earn up to $100,000 a dayNone2022-03-02 15:18:40.591760
6txtaitxtai executes machine-learning workflows to transform data and build AI-powered semantic search applications.None2022-03-02 15:19:00.708223
\n" ], "text/plain": [ "" ] }, "metadata": {} } ] }, { "cell_type": "markdown", "source": [ "This section added a new record with metadata and binary content (truncated when printed here). The `documents` table enables additional fielded search with SQL. " ], "metadata": { "id": "gcgtUQnACf5c" } }, { "cell_type": "code", "source": [ "embeddings.search(\"select * from txtai where size > 0\")" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "lz7xwroECzx2", "outputId": "3740cb3b-5904-453e-af93-5ee98c14652d" }, "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "[{'data': '{\"text\": \"txtai executes machine-learning workflows to transform data and build AI-powered semantic search applications.\", \"size\": 47189}',\n", " 'entry': '2022-03-02 15:19:00.708223',\n", " 'id': 'txtai',\n", " 'indexid': 6,\n", " 'object': <_io.BytesIO at 0x7f6861408a70>,\n", " 'score': None,\n", " 'tags': None,\n", " 'text': 'txtai executes machine-learning workflows to transform data and build AI-powered semantic search applications.'}]" ] }, "metadata": {}, "execution_count": 34 } ] }, { "cell_type": "markdown", "source": [ "Metadata fields can also be selected and combined with similarity queries." ], "metadata": { "id": "9fOzYXY6DJFj" } }, { "cell_type": "code", "source": [ "embeddings.search(\"select text, size, score from txtai where similar('machine learning') and score > 0.25 and size > 0\")" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "DXWf90-UDM0H", "outputId": "7c31c4ea-5e2d-4873-d9cf-d9b7e6196754" }, "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "[{'score': 0.5479326844215393,\n", " 'size': 47189,\n", " 'text': 'txtai executes machine-learning workflows to transform data and build AI-powered semantic search applications.'}]" ] }, "metadata": {}, "execution_count": 35 } ] }, { "cell_type": "markdown", "source": [ "The `objects` table enables additional binary content to be stored alongside an embeddings index. In some cases (image search), the object content is used to build embeddings.\n", "\n", "Otherwise, it's the text field from sections. In both cases, associated binary objects are available at search time. " ], "metadata": { "id": "XvBaEBCDIUN6" } }, { "cell_type": "code", "source": [ "embeddings.search(\"select object from txtai where object is not null\")" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "RaJPqDV3I3sm", "outputId": "3c416f6f-2ca6-481b-dc53-193e89f7da3e" }, "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "[{'object': <_io.BytesIO at 0x7f6863246470>}]" ] }, "metadata": {}, "execution_count": 36 } ] }, { "cell_type": "markdown", "metadata": { "id": "aDIF3tYt6X0O" }, "source": [ "# Wrapping up\n", "\n", "This notebook gave an overview of the txtai embeddings index file format. This hopefully gives a basic understanding of the architecture and/or helps with debugging when running into issues. \n", "\n", "See the following links for more information.\n", "\n", "- [GitHub](https://github.com/neuml/txtai)\n", "- [Embeddings documentation](https://neuml.github.io/txtai/embeddings)" ] } ] } ================================================ FILE: examples/30_Embeddings_SQL_custom_functions.ipynb ================================================ { "nbformat": 4, "nbformat_minor": 0, "metadata": { "kernelspec": { "name": "python3", "display_name": "Python 3", "language": "python" }, "language_info": { "name": "python", "version": "3.7.6", "mimetype": "text/x-python", "codemirror_mode": { "name": "ipython", "version": 3 }, "pygments_lexer": "ipython3", "nbconvert_exporter": "python", "file_extension": ".py" }, "colab": { "provenance": [] } }, "cells": [ { "cell_type": "markdown", "metadata": { "id": "POWZoSJR6XzK" }, "source": [ "# Embeddings SQL custom functions\n", "\n", "txtai 4.0 added support for SQL-based embeddings queries. This feature combines natural language queries for similarity with concrete filtering rules. txtai now has support for user-defined SQL functions, making this feature even more powerful." ] }, { "cell_type": "markdown", "metadata": { "id": "qa_PPKVX6XzN" }, "source": [ "# Install dependencies\n", "\n", "Install `txtai` and all dependencies." ] }, { "cell_type": "code", "metadata": { "_uuid": "8f2839f25d086af736a60e9eeb907d3b93b6e0e5", "_cell_guid": "b1076dfc-b9ad-4769-8c92-a6c4dae69d19", "trusted": true, "_kg_hide-output": true, "id": "24q-1n5i6XzQ" }, "source": [ "%%capture\n", "!pip install git+https://github.com/neuml/txtai#egg=txtai[pipeline]" ], "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "source": [ "# Create index\n", "Let's first recap how to create an index. We'll use the classic txtai example.\n" ], "metadata": { "id": "0p3WCDniUths" } }, { "cell_type": "code", "metadata": { "_uuid": "d629ff2d2480ee46fbb7e2d37f6b5fab8052498a", "_cell_guid": "79c7e3d0-c299-4dcb-8224-4455121ee9b0", "trusted": true, "id": "2j_CFGDR6Xzp", "colab": { "base_uri": "https://localhost:8080/" }, "outputId": "f2488a78-6cae-4c25-985e-fb2dd674a534" }, "source": [ "from txtai.embeddings import Embeddings\n", "\n", "data = [\"US tops 5 million confirmed virus cases\",\n", " \"Canada's last fully intact ice shelf has suddenly collapsed, forming a Manhattan-sized iceberg\",\n", " \"Beijing mobilises invasion craft along coast as Taiwan tensions escalate\",\n", " \"The National Park Service warns against sacrificing slower friends in a bear attack\",\n", " \"Maine man wins $1M from $25 lottery ticket\",\n", " \"Make huge profits without work, earn up to $100,000 a day\"]\n", "\n", "# Create embeddings index with content enabled. The default behavior is to only store indexed vectors.\n", "embeddings = Embeddings({\"path\": \"sentence-transformers/nli-mpnet-base-v2\", \"content\": True})\n", "\n", "# Create an index for the list of text\n", "embeddings.index([(uid, text, None) for uid, text in enumerate(data)])\n", "\n", "# Run a search\n", "embeddings.search(\"feel good story\", 1)" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "[{'id': '4',\n", " 'score': 0.08329004049301147,\n", " 'text': 'Maine man wins $1M from $25 lottery ticket'}]" ] }, "metadata": {}, "execution_count": 14 } ] }, { "cell_type": "markdown", "source": [ "# Custom SQL functions\n", "\n", "Next, we'll recreate the index adding user-defined SQL functions. These functions are simply Python callable objects or functions that take an input and return values. Pipelines, workflows, custom tasks and any other callable object is supported." ], "metadata": { "id": "QTee7YMNDD4R" } }, { "cell_type": "code", "source": [ "def clength(text):\n", " return len(text) if text else 0\n", "\n", "# Create embeddings index with content enabled. The default behavior is to only store indexed vectors.\n", "embeddings = Embeddings({\"path\": \"sentence-transformers/nli-mpnet-base-v2\", \"content\": True, \"functions\": [clength]})\n", "\n", "# Create an index for the list of text\n", "embeddings.index([(uid, text, None) for uid, text in enumerate(data)])\n", "\n", "# Run a search using a custom SQL function\n", "embeddings.search(\"select clength(text) clength, length(text) length, text from txtai where similar('feel good story')\", 1)" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "rbsEXtysDDNg", "outputId": "f966be17-086b-49b4-e1af-62b766f1c995" }, "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "[{'clength': 42,\n", " 'length': 42,\n", " 'text': 'Maine man wins $1M from $25 lottery ticket'}]" ] }, "metadata": {}, "execution_count": 15 } ] }, { "cell_type": "markdown", "source": [ "The function itself is simple, it's just alternate length function. But this example is just warming us up to what is possible and what is more exciting. " ], "metadata": { "id": "epIV58P1DyZa" } }, { "cell_type": "markdown", "source": [ "# Pipelines in SQL\n", "\n", "As mentioned above, any callable can be registered as a custom SQL function. Let's add a translate SQL function." ], "metadata": { "id": "1Iw1WKR6FW3S" } }, { "cell_type": "code", "source": [ "from txtai.pipeline import Translation\n", "\n", "# Translation pipeline\n", "translate = Translation()\n", "\n", "# Create embeddings index with content enabled. The default behavior is to only store indexed vectors.\n", "embeddings = Embeddings({\"path\": \"sentence-transformers/nli-mpnet-base-v2\", \"content\": True, \"functions\": [translate]})\n", "\n", "# Create an index for the list of text\n", "embeddings.index([(uid, text, None) for uid, text in enumerate(data)])\n", "\n", "query = \"\"\"\n", "select\n", " text,\n", " translation(text, 'de', null) 'text (DE)',\n", " translation(text, 'es', null) 'text (ES)',\n", " translation(text, 'fr', null) 'text (FR)'\n", "from txtai where similar('feel good story')\n", "limit 1\n", "\"\"\"\n", "\n", "# Run a search using a custom SQL function\n", "embeddings.search(query)" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "83e8yXpXFh4F", "outputId": "0b17e9be-8983-418d-9903-b1e72efc5918" }, "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "[{'text': 'Maine man wins $1M from $25 lottery ticket',\n", " 'text (DE)': 'Maine Mann gewinnt $1M von $25 Lotterie-Ticket',\n", " 'text (ES)': 'Maine hombre gana $1M de billete de lotería de $25',\n", " 'text (FR)': 'Maine homme gagne $1M à partir de $25 billet de loterie'}]" ] }, "metadata": {}, "execution_count": 16 } ] }, { "cell_type": "markdown", "source": [ "And just like that we have translations through SQL! This is pretty 🔥🔥🔥\n", "\n", "We can do more to make this easier though. Let's define a helper function to not require as many parameters. The default logic will require all function parameters each call, including parameters with default values." ], "metadata": { "id": "Ck_XTyBEQtaW" } }, { "cell_type": "code", "source": [ "def translation(text, lang):\n", " return translate(text, lang)\n", "\n", "# Create embeddings index with content enabled. The default behavior is to only store indexed vectors.\n", "embeddings = Embeddings({\"path\": \"sentence-transformers/nli-mpnet-base-v2\", \"content\": True, \"functions\": [translation]})\n", "\n", "# Create an index for the list of text\n", "embeddings.index([(uid, text, None) for uid, text in enumerate(data)])\n", "\n", "query = \"\"\"\n", "select\n", " text,\n", " translation(text, 'de') 'text (DE)',\n", " translation(text, 'es') 'text (ES)',\n", " translation(text, 'fr') 'text (FR)'\n", "from txtai where similar('feel good story')\n", "limit 1\n", "\"\"\"\n", "\n", "# Run a search using a custom SQL function\n", "embeddings.search(query)" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "L2DDJrd0RAaN", "outputId": "0bb437ec-5c9b-4a0c-fe8a-07f641c94a49" }, "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "[{'text': 'Maine man wins $1M from $25 lottery ticket',\n", " 'text (DE)': 'Maine Mann gewinnt $1M von $25 Lotterie-Ticket',\n", " 'text (ES)': 'Maine hombre gana $1M de billete de lotería de $25',\n", " 'text (FR)': 'Maine homme gagne $1M à partir de $25 billet de loterie'}]" ] }, "metadata": {}, "execution_count": 17 } ] }, { "cell_type": "markdown", "source": [ "# Custom SQL functions with applications\n", "\n", "Of course this is all available with YAML-configured applications." ], "metadata": { "id": "mTT8nopiRdVH" } }, { "cell_type": "code", "source": [ "config = \"\"\"\n", "translation:\n", "\n", "writable: true\n", "embeddings:\n", " path: sentence-transformers/nli-mpnet-base-v2\n", " content: true\n", " functions:\n", " - {name: translation, argcount: 2, function: translation}\n", "\"\"\"\n", "\n", "from txtai.app import Application\n", "\n", "# Build application and index data\n", "app = Application(config)\n", "app.add([{\"id\": x, \"text\": row} for x, row in enumerate(data)])\n", "app.index()\n", "\n", "# Run search with custom SQL\n", "app.search(query)" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "FZ_7G6M4RUbz", "outputId": "4eca94f3-d2aa-4449-dc6f-f1091ad9dd67" }, "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "[{'text': 'Maine man wins $1M from $25 lottery ticket',\n", " 'text (DE)': 'Maine Mann gewinnt $1M von $25 Lotterie-Ticket',\n", " 'text (ES)': 'Maine hombre gana $1M de billete de lotería de $25',\n", " 'text (FR)': 'Maine homme gagne $1M à partir de $25 billet de loterie'}]" ] }, "metadata": {}, "execution_count": 18 } ] }, { "cell_type": "markdown", "metadata": { "id": "aDIF3tYt6X0O" }, "source": [ "# Wrapping up\n", "\n", "This notebook introduced running user-defined custom SQL functions through embeddings SQL. This powerful feature can be used with any callable function including pipelines, tasks and workflows in tandem with similarity and rules filters." ] } ] } ================================================ FILE: examples/31_Near_duplicate_image_detection.ipynb ================================================ [File too large to display: 257.8 KB] ================================================ FILE: examples/32_Model_explainability.ipynb ================================================ { "nbformat": 4, "nbformat_minor": 0, "metadata": { "kernelspec": { "name": "python3", "display_name": "Python 3", "language": "python" }, "language_info": { "name": "python", "version": "3.7.6", "mimetype": "text/x-python", "codemirror_mode": { "name": "ipython", "version": 3 }, "pygments_lexer": "ipython3", "nbconvert_exporter": "python", "file_extension": ".py" }, "colab": { "provenance": [] } }, "cells": [ { "cell_type": "markdown", "metadata": { "id": "POWZoSJR6XzK" }, "source": [ "# Model explainability\n", "\n", "Neural/transformers based approaches have recently made amazing advancements. But it is difficult to understand how models make decisions. This is especially important in sensitive areas where models are being used to drive critical decisions.\n", "\n", "This notebook will cover how to gain a level of understanding of complex natural language model outputs." ] }, { "cell_type": "markdown", "metadata": { "id": "qa_PPKVX6XzN" }, "source": [ "# Install dependencies\n", "\n", "Install `txtai` and all dependencies." ] }, { "cell_type": "code", "metadata": { "_uuid": "8f2839f25d086af736a60e9eeb907d3b93b6e0e5", "_cell_guid": "b1076dfc-b9ad-4769-8c92-a6c4dae69d19", "trusted": true, "_kg_hide-output": true, "id": "24q-1n5i6XzQ" }, "source": [ "%%capture\n", "!pip install git+https://github.com/neuml/txtai#egg=txtai[pipeline] shap" ], "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "source": [ "# Semantic Search\n", "\n", "The first example we'll cover is semantic search. Semantic search applications have an understanding of natural language and identify results that have the same meaning, not necessarily the same keywords. While this produces higher quality results, one advantage of keyword search is it's easy to understand why a result why selected. The keyword is there.\n", "\n", "Let's see if we can gain a better understanding of semantic search output. " ], "metadata": { "id": "snon4fqZbalQ" } }, { "cell_type": "code", "source": [ "from txtai.embeddings import Embeddings\n", "\n", "data = [\"US tops 5 million confirmed virus cases\",\n", " \"Canada's last fully intact ice shelf has suddenly collapsed, forming a Manhattan-sized iceberg\",\n", " \"Beijing mobilises invasion craft along coast as Taiwan tensions escalate\",\n", " \"The National Park Service warns against sacrificing slower friends in a bear attack\",\n", " \"Maine man wins $1M from $25 lottery ticket\",\n", " \"Make huge profits without work, earn up to $100,000 a day\"]\n", "\n", "# Create embeddings index with content enabled. The default behavior is to only store indexed vectors.\n", "embeddings = Embeddings({\"path\": \"sentence-transformers/nli-mpnet-base-v2\", \"content\": True})\n", "\n", "# Create an index for the list of text\n", "embeddings.index([(uid, text, None) for uid, text in enumerate(data)])\n", "\n", "# Run a search\n", "embeddings.explain(\"feel good story\", limit=1)" ], "metadata": { "id": "MnRR8pTzK8h4", "colab": { "base_uri": "https://localhost:8080/" }, "outputId": "956e405c-568b-44bc-b8ea-d4b6f3cea9b5" }, "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "[{'id': '4',\n", " 'score': 0.08329004049301147,\n", " 'text': 'Maine man wins $1M from $25 lottery ticket',\n", " 'tokens': [('Maine', 0.003297939896583557),\n", " ('man', -0.03039500117301941),\n", " ('wins', 0.03406312316656113),\n", " ('$1M', -0.03121592104434967),\n", " ('from', -0.02270638197660446),\n", " ('$25', 0.012891143560409546),\n", " ('lottery', -0.015372440218925476),\n", " ('ticket', 0.007445111870765686)]}]" ] }, "metadata": {}, "execution_count": 29 } ] }, { "cell_type": "markdown", "source": [ "The `explain` method above ran an embeddings query like `search` but also analyzed each token to determine term importance. Looking at the results, it appears that `win` is the most important term. Let's visualize it." ], "metadata": { "id": "ZEkXurdobRKL" } }, { "cell_type": "code", "source": [ "from IPython.display import HTML\n", "\n", "def plot(query):\n", " result = embeddings.explain(query, limit=1)[0]\n", "\n", " output = f\"{query}
\"\n", " spans = []\n", " for token, score in result[\"tokens\"]:\n", " color = None\n", " if score >= 0.1:\n", " color = \"#fdd835\"\n", " elif score >= 0.075:\n", " color = \"#ffeb3b\"\n", " elif score >= 0.05:\n", " color = \"#ffee58\"\n", " elif score >= 0.02:\n", " color = \"#fff59d\"\n", "\n", " spans.append((token, score, color))\n", "\n", " if result[\"score\"] >= 0.05 and not [color for _, _, color in spans if color]:\n", " mscore = max([score for _, score, _ in spans])\n", " spans = [(token, score, \"#fff59d\" if score == mscore else color) for token, score, color in spans]\n", "\n", " for token, _, color in spans:\n", " if color:\n", " output += f\"{token} \"\n", " else:\n", " output += f\"{token} \"\n", "\n", " return output\n", "\n", "HTML(plot(\"feel good story\"))" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 52 }, "id": "klJYGOrypXUL", "outputId": "88959716-1dcc-4fee-d784-cb398aa87eb5" }, "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "" ], "text/html": [ "feel good story
Maine man wins $1M from $25 lottery ticket " ] }, "metadata": {}, "execution_count": 30 } ] }, { "cell_type": "markdown", "source": [ "Let's try some more queries!" ], "metadata": { "id": "CCHqZffacPVh" } }, { "cell_type": "code", "source": [ "output = \"\"\n", "for query in [\"feel good story\", \"climate change\", \"public health story\", \"war\", \"wildlife\", \"asia\", \"lucky\", \"dishonest junk\"]:\n", " output += plot(query) + \"

\"\n", "\n", "HTML(output)" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 434 }, "id": "OOc8OfKfcpPL", "outputId": "75541eb6-f607-42b7-c2c8-43346fa7da8f" }, "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "" ], "text/html": [ "feel good story
Maine man wins $1M from $25 lottery ticket

climate change
Canada's last fully intact ice shelf has suddenly collapsed, forming a Manhattan-sized iceberg

public health story
US tops 5 million confirmed virus cases

war
Beijing mobilises invasion craft along coast as Taiwan tensions escalate

wildlife
The National Park Service warns against sacrificing slower friends in a bear attack

asia
Beijing mobilises invasion craft along coast as Taiwan tensions escalate

lucky
Maine man wins $1M from $25 lottery ticket

dishonest junk
Make huge profits without work, earn up to $100,000 a day

" ] }, "metadata": {}, "execution_count": 31 } ] }, { "cell_type": "markdown", "source": [ "There is also a batch method that can run bulk explainations more efficently. " ], "metadata": { "id": "WOa54eJ-c9nc" } }, { "cell_type": "code", "source": [ "queries = [\"feel good story\", \"climate change\", \"public health story\", \"war\", \"wildlife\", \"asia\", \"lucky\", \"dishonest junk\"]\n", "results = embeddings.batchexplain(queries, limit=1)\n", "\n", "for x, result in enumerate(results):\n", " print(result)" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "viuEwL4scQjx", "outputId": "cb65e678-b694-4fe6-eb94-dea821bda643" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "[{'id': '4', 'text': 'Maine man wins $1M from $25 lottery ticket', 'score': 0.08329004049301147, 'tokens': [('Maine', 0.003297939896583557), ('man', -0.03039500117301941), ('wins', 0.03406312316656113), ('$1M', -0.03121592104434967), ('from', -0.02270638197660446), ('$25', 0.012891143560409546), ('lottery', -0.015372440218925476), ('ticket', 0.007445111870765686)]}]\n", "[{'id': '1', 'text': \"Canada's last fully intact ice shelf has suddenly collapsed, forming a Manhattan-sized iceberg\", 'score': 0.24478264153003693, 'tokens': [(\"Canada's\", -0.026454076170921326), ('last', 0.017057165503501892), ('fully', 0.007285907864570618), ('intact', -0.005608782172203064), ('ice', 0.009459629654884338), ('shelf', -0.029393181204795837), ('has', 0.0253918319940567), ('suddenly', 0.021642476320266724), ('collapsed,', -0.030680224299430847), ('forming', 0.01910528540611267), ('a', -0.00890059769153595), ('Manhattan-sized', -0.023612067103385925), ('iceberg', -0.009710296988487244)]}]\n", "[{'id': '0', 'text': 'US tops 5 million confirmed virus cases', 'score': 0.1701308637857437, 'tokens': [('US', -0.02426217496395111), ('tops', -0.04896041750907898), ('5', -0.040287598967552185), ('million', -0.04737819731235504), ('confirmed', 0.02050541341304779), ('virus', 0.05511370301246643), ('cases', -0.029122650623321533)]}]\n", "[{'id': '2', 'text': 'Beijing mobilises invasion craft along coast as Taiwan tensions escalate', 'score': 0.2714069187641144, 'tokens': [('Beijing', -0.040329575538635254), ('mobilises', -0.01986941695213318), ('invasion', 0.06464864313602448), ('craft', 0.044328778982162476), ('along', 0.021214008331298828), ('coast', -0.01738378405570984), ('as', -0.02182626724243164), ('Taiwan', -0.020671993494033813), ('tensions', -0.007258296012878418), ('escalate', -0.01663634181022644)]}]\n", "[{'id': '3', 'text': 'The National Park Service warns against sacrificing slower friends in a bear attack', 'score': 0.28424495458602905, 'tokens': [('The', -0.022544533014297485), ('National', -0.005589812994003296), ('Park', 0.08145171403884888), ('Service', -0.016785144805908203), ('warns', -0.03266721963882446), ('against', -0.032368004322052), ('sacrificing', -0.04440906643867493), ('slower', 0.034766435623168945), ('friends', 0.0013159513473510742), ('in', -0.008420556783676147), ('a', 0.015498429536819458), ('bear', 0.08734165132045746), ('attack', -0.011731922626495361)]}]\n", "[{'id': '2', 'text': 'Beijing mobilises invasion craft along coast as Taiwan tensions escalate', 'score': 0.24338798224925995, 'tokens': [('Beijing', -0.032770439982414246), ('mobilises', -0.04045189917087555), ('invasion', -0.0015233010053634644), ('craft', 0.017402753233909607), ('along', 0.004210904240608215), ('coast', 0.0028585344552993774), ('as', -0.0018710196018218994), ('Taiwan', 0.01866382360458374), ('tensions', -0.011064544320106506), ('escalate', -0.029331132769584656)]}]\n", "[{'id': '4', 'text': 'Maine man wins $1M from $25 lottery ticket', 'score': 0.06539873033761978, 'tokens': [('Maine', 0.012625649571418762), ('man', -0.013015367090702057), ('wins', -0.022461198270320892), ('$1M', -0.041918568313121796), ('from', -0.02305116504430771), ('$25', -0.029282495379447937), ('lottery', 0.02279689908027649), ('ticket', -0.009147539734840393)]}]\n", "[{'id': '5', 'text': 'Make huge profits without work, earn up to $100,000 a day', 'score': 0.033823199570178986, 'tokens': [('Make', 0.0013405345380306244), ('huge', 0.002276904881000519), ('profits', 0.02767787780612707), ('without', -0.007079385221004486), ('work,', -0.019851915538311005), ('earn', -0.026906955987215042), ('up', 0.00074811652302742), ('to', 0.007462538778781891), ('$100,000', -0.03565136343240738), ('a', -0.009965047240257263), ('day', -0.0021888017654418945)]}]\n" ] } ] }, { "cell_type": "markdown", "source": [ "Of course, this method is supported through YAML-based applications and the API." ], "metadata": { "id": "WWH6l9nOdD-l" } }, { "cell_type": "code", "source": [ "from txtai.app import Application\n", "\n", "app = Application(\"\"\"\n", "writable: true\n", "embeddings:\n", " path: sentence-transformers/nli-mpnet-base-v2\n", " content: true\n", "\"\"\")\n", "\n", "app.add([{\"id\": uid, \"text\": text} for uid, text in enumerate(data)])\n", "app.index()\n", "\n", "app.explain(\"feel good story\", limit=1)" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "Nwrd3v6cdKR_", "outputId": "da949ae7-f681-4197-9df2-d5e2cb304f24" }, "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "[{'id': '4',\n", " 'score': 0.08329004049301147,\n", " 'text': 'Maine man wins $1M from $25 lottery ticket',\n", " 'tokens': [('Maine', 0.003297939896583557),\n", " ('man', -0.03039500117301941),\n", " ('wins', 0.03406312316656113),\n", " ('$1M', -0.03121592104434967),\n", " ('from', -0.02270638197660446),\n", " ('$25', 0.012891143560409546),\n", " ('lottery', -0.015372440218925476),\n", " ('ticket', 0.007445111870765686)]}]" ] }, "metadata": {}, "execution_count": 33 } ] }, { "cell_type": "markdown", "source": [ "# Pipeline models\n", "\n", "txtai pipelines are wrappers around Hugging Face pipelines with logic to easily integrate with txtai's workflow framework. Given that, we can use the [SHAP](https://github.com/slundberg/shap) library to explain predictions.\n", "\n", "Let's try a sentiment analysis example." ], "metadata": { "id": "RsRuLWYpdup_" } }, { "cell_type": "code", "source": [ "import shap\n", "\n", "from txtai.pipeline import Labels\n", "\n", "data = [\"Dodgers lose again, give up 3 HRs in a loss to the Giants\",\n", " \"Massive dunk!!! they are now up by 15 with 2 minutes to go\"]\n", "\n", "labels = Labels(dynamic=False)\n", "\n", "# explain the model on two sample inputs\n", "explainer = shap.Explainer(labels.pipeline) \n", "shap_values = explainer(data)" ], "metadata": { "id": "SnoPSv1-fxvF" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "shap.plots.text(shap_values[0, :, \"NEGATIVE\"])" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 118 }, "id": "YW_qbzPKpRdL", "outputId": "c21b3558-feed-4854-9444-954f87aa8422" }, "execution_count": null, "outputs": [ { "output_type": "display_data", "data": { "text/plain": [ "" ], "text/html": [ "0.80.70.60.50.40.911.10.5677350.567735base value0.995090.99509fNEGATIVE(inputs)0.192 loss 0.143 lose 0.082 up 0.059 Dodgers 0.054 , 0.051 again 0.017 a 0.009 HR 0.007 s -0.108 Giants -0.021 give -0.02 the -0.018 3 -0.012 to -0.009 in -0.0
inputs
0.0
0.059
Dodgers
0.143
lose
0.051
again
0.054
,
-0.021
give
0.082
up
-0.018
3
0.009
HR
0.007
s
-0.009
in
0.017
a
0.192
loss
-0.012
to
-0.02
the
-0.108
Giants
-0.0
" ] }, "metadata": {} } ] }, { "cell_type": "code", "source": [ "shap.plots.text(shap_values[1, :, \"NEGATIVE\"])" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 118 }, "id": "SU2hHRjCpc_I", "outputId": "83e1ada5-de45-426a-ae64-dae5b66b8e6a" }, "execution_count": null, "outputs": [ { "output_type": "display_data", "data": { "text/plain": [ "" ], "text/html": [ "0.30-0.30.60.90.538360.53836base value00fNEGATIVE(inputs)0.199 Massive 0.171 minutes 0.045 ! 0.029 ! 0.025 by 0.024 ! 0.016 15 0.008 to 0.007 2 -0.392 k -0.184 up -0.123 go -0.121 dun -0.118 they -0.052 are -0.049 now -0.024 with
inputs
0.0
0.199
Massive
-0.121
dun
-0.392
k
0.045
!
0.024
!
0.029
!
-0.118
they
-0.052
are
-0.049
now
-0.184
up
0.025
by
0.016
15
-0.024
with
0.007
2
0.171
minutes
0.008
to
-0.123
go
0.0
" ] }, "metadata": {} } ] }, { "cell_type": "markdown", "source": [ "The [SHAP documentation](https://shap.readthedocs.io/en/latest/text_examples.html) provides a great list of additional examples for translation, text generation, summarization, translation and question-answering.\n", "\n", "The SHAP library is pretty 🔥🔥🔥 Check it out for more!" ], "metadata": { "id": "8atTIz37iP9Q" } }, { "cell_type": "markdown", "source": [ "# Wrapping up\n", "\n", "This notebook briefly introduced model explainability. There is a lot of work in this area, expect a number of different methods to become available. Model explainability helps users gain a level of trust in model predictions. It also helps debug why a model is making a decision, which can potentially drive how to fine-tune a model to make better predictions. \n", "\n", "Keep an eye on this important area over the coming months!\n" ], "metadata": { "id": "YujdAMlGh0qT" } } ] } ================================================ FILE: examples/33_Query_translation.ipynb ================================================ { "cells": [ { "cell_type": "markdown", "metadata": { "id": "POWZoSJR6XzK" }, "source": [ "# Query translation\n", "\n", "txtai supports two main types of queries: natural language statements and SQL statements. Natural language queries handles a search engine like query. SQL statements enable more complex filtering, sorting and column selection. Query translation bridges the gap between the two and enables filtering for natural language queries.\n", "\n", "For example, the query:\n", "\n", "```\n", "Tell me a feel good story since yesterday\n", "```\n", "\n", "becomes\n", "\n", "```sql\n", "select * from txtai where similar(\"Tell me a feel good story\") and\n", "entry >= date('now', '-1 day')\n", "```" ] }, { "cell_type": "markdown", "metadata": { "id": "qa_PPKVX6XzN" }, "source": [ "# Install dependencies\n", "\n", "Install `txtai` and all dependencies." ] }, { "cell_type": "code", "execution_count": null, "metadata": { "_cell_guid": "b1076dfc-b9ad-4769-8c92-a6c4dae69d19", "_kg_hide-output": true, "_uuid": "8f2839f25d086af736a60e9eeb907d3b93b6e0e5", "id": "24q-1n5i6XzQ", "trusted": true }, "outputs": [], "source": [ "%%capture\n", "!pip install git+https://github.com/neuml/txtai#egg=txtai[pipeline]" ] }, { "cell_type": "markdown", "metadata": { "id": "0p3WCDniUths" }, "source": [ "# Create index\n", "Let's first recap how to create an index. We'll use the classic txtai example.\n" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "_cell_guid": "79c7e3d0-c299-4dcb-8224-4455121ee9b0", "_uuid": "d629ff2d2480ee46fbb7e2d37f6b5fab8052498a", "colab": { "base_uri": "https://localhost:8080/" }, "id": "2j_CFGDR6Xzp", "outputId": "e65238c5-d67f-4fe4-9cbf-c88b0ff3a8bb", "trusted": true }, "outputs": [ { "data": { "text/plain": [ "[{'id': '4',\n", " 'text': 'Maine man wins $1M from $25 lottery ticket',\n", " 'score': 0.08329025655984879}]" ] }, "execution_count": 1, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from txtai import Embeddings\n", "\n", "data = [\"US tops 5 million confirmed virus cases\",\n", " \"Canada's last fully intact ice shelf has suddenly collapsed, forming a Manhattan-sized iceberg\",\n", " \"Beijing mobilises invasion craft along coast as Taiwan tensions escalate\",\n", " \"The National Park Service warns against sacrificing slower friends in a bear attack\",\n", " \"Maine man wins $1M from $25 lottery ticket\",\n", " \"Make huge profits without work, earn up to $100,000 a day\"]\n", "\n", "# Create embeddings index with content enabled. The default behavior is to only store indexed vectors.\n", "embeddings = Embeddings(path=\"sentence-transformers/nli-mpnet-base-v2\", content=True)\n", "\n", "# Create an index for the list of text\n", "embeddings.index(data)\n", "\n", "# Run a search\n", "embeddings.search(\"feel good story\", 1)" ] }, { "cell_type": "markdown", "metadata": { "id": "QTee7YMNDD4R" }, "source": [ "# Query translation models\n", "\n", "Next we'll explore how query translation models work with examples. " ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "04iOgP_ojSfK", "outputId": "41e73130-75e6-4ae9-b690-685972a13565" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Input: feel good story\n", "SQL: select id, text, score from txtai where similar('feel good story')\n", "\n", "Input: feel good story since yesterday\n", "SQL: select id, text, score from txtai where similar('feel good story') and entry >= date('now', '-1 day')\n", "\n", "Input: feel good story with 'lottery' in text\n", "SQL: select id, text, score from txtai where similar('feel good story') and text like '%lottery%'\n", "\n", "Input: how many feel good story\n", "SQL: select count(*) from txtai where similar('feel good story')\n", "\n", "Input: feel good story translated to fr\n", "SQL: select id, translate(text, 'fr') text, score from txtai where similar('feel good story')\n", "\n", "Input: feel good story summarized\n", "SQL: select id, summary(text) text, score from txtai where similar('feel good story')\n", "\n" ] } ], "source": [ "from txtai import LLM\n", "\n", "llm = LLM(\"NeuML/t5-small-txtsql\", template=\"translate English to SQL: {text}\")\n", "\n", "queries = [\n", " \"feel good story\",\n", " \"feel good story since yesterday\",\n", " \"feel good story with 'lottery' in text\",\n", " \"how many feel good story\",\n", " \"feel good story translated to fr\",\n", " \"feel good story summarized\"\n", "]\n", "\n", "for query in queries:\n", " print(f\"Input: {query}\")\n", " print(f\"SQL: {llm(query)}\")\n", " print()\n" ] }, { "cell_type": "markdown", "metadata": { "id": "rAnEMaiWlOXm" }, "source": [ "Looking at the query translations above gives an idea on how this model works.\n", "\n", "[t5-small-txtsql](https://huggingface.co/NeuML/t5-small-txtsql) is the default model. Custom domain query syntax languages can be created using this same methodology, including for other languages. Natural language can be translated to functions, query clauses, column selection and more!" ] }, { "cell_type": "markdown", "metadata": { "id": "P9hOcgNfjSyL" }, "source": [ "# Natural language filtering\n", "\n", "Now it's time for this in action! Let's first initialize the embeddings index with the appropriate settings." ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "L2DDJrd0RAaN", "outputId": "c60bfc29-187b-4926-8656-04063b2ca85c" }, "outputs": [ { "data": { "text/plain": [ "{'id': '4',\n", " 'score': 0.08329025655984879,\n", " 'text': 'Maine Mann gewinnt $1M von $25 Lotterie-Ticket'}" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from txtai.pipeline import Translation\n", "\n", "def translate(text, lang):\n", " return translation(text, lang)\n", "\n", "translation = Translation()\n", "\n", "# Create embeddings index with content enabled. The default behavior is to only store indexed vectors.\n", "embeddings = Embeddings(\n", " path=\"sentence-transformers/nli-mpnet-base-v2\",\n", " content=True,\n", " query={\"path\": \"NeuML/t5-small-txtsql\"},\n", " functions=[translate]\n", ")\n", "\n", "# Create an index for the list of text\n", "embeddings.index(data)\n", "\n", "query = \"select id, score, translate(text, 'de') 'text' from txtai where similar('feel good story')\"\n", "\n", "# Run a search using a custom SQL function\n", "embeddings.search(query)[0]" ] }, { "cell_type": "markdown", "metadata": { "id": "MNd7QmFmnh-f" }, "source": [ "Note how the query model was provided as a embeddings index configuration parameter. Custom SQL functions were also added in. Let's now see if the same SQL statement can be run with a natural language query." ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "mnGSBNMonur2", "outputId": "dc14d898-b46f-42e6-88e0-434863cc15d6" }, "outputs": [ { "data": { "text/plain": [ "{'id': '4',\n", " 'text': 'Maine Mann gewinnt $1M von $25 Lotterie-Ticket',\n", " 'score': 0.08329025655984879}" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "embeddings.search(\"feel good story translated to de\")[0]" ] }, { "cell_type": "markdown", "metadata": { "id": "OnmVUK2DoZkh" }, "source": [ "Same result. Let's try a few more." ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "Js2b_M61oitg", "outputId": "fab080cb-d082-4cf3-8314-12d23acc3c02" }, "outputs": [ { "data": { "text/plain": [ "{'id': '4',\n", " 'text': 'Maine man wins $1M from $25 lottery ticket',\n", " 'score': 0.08329025655984879}" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "embeddings.search(\"feel good story since yesterday\")[0]" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "pcxPgriwomWv", "outputId": "7e71dd09-7c4f-4147-ba9e-47a45c1cf90f" }, "outputs": [ { "data": { "text/plain": [ "[{'id': '4',\n", " 'text': 'Maine man wins $1M from $25 lottery ticket',\n", " 'score': 0.08329025655984879}]" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "embeddings.search(\"feel good story with 'lottery' in text\")" ] }, { "cell_type": "markdown", "metadata": { "id": "1ob4Q_CLpGBm" }, "source": [ "For good measure, a couple queries with filters that return no results." ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "LMaf7JJzonAC", "outputId": "71080bd1-2d9e-41ba-9501-2cdedaf26d6d" }, "outputs": [ { "data": { "text/plain": [ "[]" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "embeddings.search(\"feel good story with missing in text\")" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "ADFNLhvto_0J", "outputId": "12e08ea8-7203-4fb0-cb84-1133b5db9dc0" }, "outputs": [ { "data": { "text/plain": [ "[]" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "embeddings.search(\"feel good story with field equal 14\")" ] }, { "cell_type": "markdown", "metadata": { "id": "mTT8nopiRdVH" }, "source": [ "# Query translation with applications\n", "\n", "Of course this is all available with YAML-configured applications." ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "FZ_7G6M4RUbz", "outputId": "b00ce1c2-8df2-4289-8d03-991174a0e74f" }, "outputs": [ { "data": { "text/plain": [ "{'id': '4',\n", " 'text': 'Maine Mann gewinnt $1M von $25 Lotterie-Ticket',\n", " 'score': 0.08329025655984879}" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "config = \"\"\"\n", "translation:\n", "\n", "writable: true\n", "embeddings:\n", " path: sentence-transformers/nli-mpnet-base-v2\n", " content: true\n", " query:\n", " path: NeuML/t5-small-txtsql\n", " functions:\n", " - {name: translate, argcount: 2, function: translation}\n", "\"\"\"\n", "\n", "from txtai.app import Application\n", "\n", "# Build application and index data\n", "app = Application(config)\n", "app.add([{\"id\": x, \"text\": row} for x, row in enumerate(data)])\n", "app.index()\n", "\n", "# Run search query\n", "app.search(\"feel good story translated to de\")[0]" ] }, { "cell_type": "markdown", "metadata": { "id": "aDIF3tYt6X0O" }, "source": [ "# Wrapping up\n", "\n", "This notebook introduced natural language filtering with query translation models. This powerful feature adds filtering and pipelines to natural language statements. Custom domain-specific query languages can be created to enable rich queries natively in txtai." ] } ], "metadata": { "colab": { "provenance": [] }, "kernelspec": { "display_name": "local", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.19" } }, "nbformat": 4, "nbformat_minor": 0 } ================================================ FILE: examples/34_Build_a_QA_database.ipynb ================================================ [File too large to display: 13.5 KB] ================================================ FILE: examples/35_Pictures_are_worth_a_thousand_words.ipynb ================================================ [File too large to display: 5.0 MB] ================================================ FILE: examples/36_Run_txtai_in_native_code.ipynb ================================================ { "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "provenance": [] }, "kernelspec": { "name": "python3", "display_name": "Python 3" }, "language_info": { "name": "python" }, "gpuClass": "standard", "accelerator": "GPU" }, "cells": [ { "cell_type": "markdown", "source": [ "# Run txtai in native code\n", "\n", "txtai currently has two main methods of execution: Python or via a HTTP API. There are API bindings for [JavaScript](https://github.com/neuml/txtai.js), [Java](https://github.com/neuml/txtai.java), [Rust](https://github.com/neuml/txtai.rs) and [Go](https://github.com/neuml/txtai.go).\n", "\n", "This notebook presents a way to run txtai as part of a native executable with the [Python C API](https://docs.python.org/3/c-api/index.html). We'll run an example in C and even call txtai from assembly code!\n", "\n", "Before diving into this notebook, it's important to emphasize that connecting to txtai via the HTTP API has a number of major advantages. This includes decoupling from Python, the ability to offload txtai to a different machine and scaling with cloud compute. With that being said, this notebook demonstrates an additional way to integrate txtai along with providing an informative and perhaps academic programming exercise." ], "metadata": { "id": "-xU9P9iSR-Cy" } }, { "cell_type": "markdown", "source": [ "# Install dependencies\n", "\n", "Install `txtai` and all dependencies." ], "metadata": { "id": "shlUi2kKS7KT" } }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "xEvX9vCpn4E0" }, "outputs": [], "source": [ "%%capture\n", "!pip install git+https://github.com/neuml/txtai#egg=txtai[pipeline] sacremoses\n", "\n", "# Remove tensorflow as it's not used and prints noisy log messages\n", "!pip uninstall -y tensorflow\n", "\n", "# Install python3.7-dev and nasm\n", "!apt-get install python3.7-dev nasm" ] }, { "cell_type": "markdown", "source": [ "# Workflow configuration\n", "\n", "This configuration builds a workflow to translate input text to French. More information on workflows can be found in [txtai's documentation](https://neuml.github.io/txtai/workflow).\n" ], "metadata": { "id": "AtEdP7Utw3mk" } }, { "cell_type": "code", "source": [ "%%writefile config.yml\n", "summary:\n", " path: sshleifer/distilbart-cnn-12-6\n", "\n", "textractor:\n", " join: true\n", " lines: false\n", " minlength: 100\n", " paragraphs: true\n", " sentences: false\n", " tika: false\n", "\n", "translation:\n", "\n", "workflow:\n", " summary:\n", " tasks:\n", " - action: textractor\n", " task: url\n", " - action: summary\n", "\n", " translate:\n", " tasks:\n", " - action: translation\n", " args: \n", " - fr" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "DPWrubv5oOn7", "outputId": "bdfba131-c36e-421d-8c17-a726be4615d2" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Writing config.yml\n" ] } ] }, { "cell_type": "markdown", "source": [ "# Python C API\n", "\n", "Next we'll build an interface to txtai workflows with the Python C API. This logic will load Python, create a txtai application instance and add methods to run workflows.\n", "\n", "Some assumptions are made:\n", "\n", "- txtai is installed and available\n", "- A workflow is available in a file named `config.yml`\n", "- The workflow only returns the first element\n", "\n", "These assumptions are for brevity. This example could be expanded on and built into a more robust, full-fledged library.\n", "\n", "While this example is in C, Rust has a well-maintained and popular library for interfacing with Python, [PyO3](https://github.com/PyO3/pyo3). Interfacing with the Python C API is also possible in Java, JavaScript and Go but not as straighforward." ], "metadata": { "id": "KIg_QDVRo-6-" } }, { "cell_type": "code", "source": [ "%%writefile workflow.c\n", "#include \n", "\n", "// Global instances\n", "PyObject *module = NULL, *app = NULL;\n", "\n", "/**\n", " * Create txtai module.\n", " */\n", "PyObject* txtai() {\n", " PyObject* module = NULL;\n", " module = PyImport_ImportModule(\"txtai.app\");\n", " return module;\n", "}\n", "\n", "/**\n", " * Create txtai application instance.\n", " */\n", "PyObject* application() {\n", " PyObject* app = NULL;\n", " app = PyObject_CallMethod(module, \"Application\", \"z\", \"config.yml\");\n", " return app;\n", "}\n", "\n", "/**\n", " * Run txtai workflow.\n", " */\n", "PyObject* run(char** args) {\n", " PyObject* result = NULL;\n", " result = PyObject_CallMethod(app, \"workflow\", \"z[z]\", args[0], args[1]);\n", " return result;\n", "}\n", "\n", "/**\n", " * Cleanup Python objects.\n", " */\n", "void cleanup() {\n", " // Ensure Python instance exists\n", " if (Py_IsInitialized()) {\n", " PyErr_Print();\n", "\n", " Py_CLEAR(app);\n", " Py_CLEAR(module);\n", "\n", " Py_FinalizeEx();\n", " }\n", "}\n", "\n", "/**\n", " * Initialize a txtai application and run a workflow.\n", " */\n", "const char* workflow(char** args) {\n", " PyObject* result = NULL;\n", "\n", " // Create application instance if it doesn't already exist\n", " if (!Py_IsInitialized()) {\n", " // Start Python Interpreter\n", " Py_Initialize();\n", "\n", " // Create txtai module\n", " module = txtai();\n", "\n", " // Handle errors\n", " if (!module) {\n", " cleanup();\n", " return NULL;\n", " }\n", "\n", " // Create txtai application\n", " app = application();\n", "\n", " // Handle errors\n", " if (!app) {\n", " cleanup();\n", " return NULL;\n", " }\n", " }\n", "\n", " // Run workflow\n", " result = run(args);\n", "\n", " // Handle errors\n", " if (!result) {\n", " cleanup();\n", " return NULL;\n", " }\n", "\n", " // Get first result\n", " const char *text = PyUnicode_AsUTF8(PyIter_Next(result));\n", "\n", " // Cleanup result\n", " Py_CLEAR(result);\n", "\n", " return text;\n", "}" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "rv64SWDkpmIx", "outputId": "1c19ae3c-7f06-43ac-8638-37ee7b5e4eab" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Writing workflow.c\n" ] } ] }, { "cell_type": "markdown", "source": [ "# Run txtai workflow in C\n", "\n", "Let's now write a C program to run a workflow using command line arguments as input. " ], "metadata": { "id": "K19FZK5StVbf" } }, { "cell_type": "code", "source": [ "%%writefile main.c\n", "#include \n", "\n", "extern char* workflow(char** argv);\n", "extern void cleanup();\n", "\n", "/**\n", " * Run a txtai workflow and print results.\n", " */\n", "int main(int argc, char** argv) {\n", " if (argc < 3) {\n", " printf(\"Usage: workflow \\n\");\n", " return 1;\n", " }\n", "\n", " // Run workflow using command line arguments\n", " char* text = workflow(argv + 1);\n", " if (text) {\n", " printf(\"%s\\n\", text);\n", " }\n", "\n", " // Cleanup\n", " cleanup();\n", "\n", " return 0;\n", "}" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "apBnMta8pepD", "outputId": "e6573cec-6b92-424c-e594-aa46c7cd18b6" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Writing main.c\n" ] } ] }, { "cell_type": "markdown", "source": [ "# Compile and run\n", "\n", "Time to compile this all into an executable and run!" ], "metadata": { "id": "5kVxaL0FuOgu" } }, { "cell_type": "code", "source": [ "!cc -c main.c -I/usr/include/python3.7m\n", "!cc -c workflow.c -I/usr/include/python3.7m\n", "!cc -o workflow workflow.o main.o -lpython3.7m" ], "metadata": { "id": "KjQYctCqp1Vi" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "!./workflow translate \"I'm running machine translation using a transformers model in C!\"" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "t5IWOF8ap-a7", "outputId": "071f3d26-955e-4e10-cce4-5ed9774960d5" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "J'exécute la traduction automatique à l'aide d'un modèle de transformateurs en C!\n" ] } ] }, { "cell_type": "markdown", "source": [ "And there it is, a translation workflow from English to French in a native executable, all backed by Transformers models. Any workflow YAML can be loaded and run in C using this method, which is pretty powerful.\n", "\n", "Embedding txtai in native executable adds libpython as a dependency (libraries from 3rd party modules such as PyTorch and NumPy also load dynamically). See output of ldd below.\n", "This opens up an avenue to embed txtai in native code provided it is acceptable to add libpython as a project dependency. \n", "\n", "As mentioned above, connecting to a txtai HTTP API instance is a less tightly coupled way to accomplish the same thing." ], "metadata": { "id": "4eNk20mCxp3r" } }, { "cell_type": "code", "source": [ "!ldd workflow | grep python" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "SLq5ix5mGPAb", "outputId": "a46720e6-c721-4df5-92a7-a1bd897511d0" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "\tlibpython3.7m.so.1.0 => /usr/lib/x86_64-linux-gnu/libpython3.7m.so.1.0 (0x00007efcba85e000)\n" ] } ] }, { "cell_type": "markdown", "source": [ "# Machine learning in Assembly?\n", "\n", "Now for a more academic exercise perhaps bringing you back to a computer organization/logic class from college. Let's see if we can run the same program in assembly!" ], "metadata": { "id": "n6E-yHIjy0UY" } }, { "cell_type": "code", "source": [ "%%writefile main.asm\n", "global main\n", "\n", "; External C library functions\n", "extern puts\n", "\n", "; External txtai functions\n", "extern workflow, cleanup\n", "\n", "; Default to REL mode\n", "default REL\n", "\n", "section .data\n", " message: db \"Usage: workflow \", 0\n", "\n", "section .text\n", "\n", "; Print a usage message\n", "usage:\n", " mov rdi, message\n", " call puts\n", " jmp done\n", "\n", "; Main function\n", "main:\n", " ; Enter\n", " sub rsp, 8\n", "\n", " ; Read argc - require workflow name and element (plus program name)\n", " cmp rdi, 3\n", " jl usage\n", "\n", " ; Run txtai workflow with argv params (skip program name) and print result\n", " lea rdi, [rsi + 8]\n", " call workflow\n", " mov rdi, rax\n", " call puts\n", "\n", "done:\n", " ; Close txtai application instance\n", " call cleanup\n", "\n", " ; Exit\n", " add rsp, 8\n", " ret" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "osrjXJonqcTm", "outputId": "b2d662d0-2a71-4b53-a956-c9277c4a7d2b" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Writing main.asm\n" ] } ] }, { "cell_type": "code", "source": [ "# Build workflow executable\n", "!nasm -felf64 main.asm\n", "!cc -c workflow.c -I/usr/include/python3.7m\n", "!cc -o workflow -no-pie workflow.o main.o -lpython3.7m" ], "metadata": { "id": "sQMEx0LFq80Z" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "!./workflow translate \"I'm running machine translation using a transformers model with assembler!\"" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "YG6faU7HrCrH", "outputId": "a7a77cbf-5806-4241-cd67-03e79438e6f2" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "J'exécute la traduction automatique à l'aide d'un modèle de transformateurs avec assembleur!\n" ] } ] }, { "cell_type": "markdown", "source": [ "Just as before, the input text is translated to French using a machine translation model. But this time the code executing the logic was in assembly!\n", "\n", "Probably not terribly useful but using the lowest level of code possible proves that any higher-level native code can do the same. " ], "metadata": { "id": "SNXAqFm6bNSw" } }, { "cell_type": "markdown", "source": [ "# Multiple workflow calls\n", "\n", "Everything up to this point has been a single workflow call. Much of the run time is spent on loading models as part of the txtai workflow. The next example will run a series of workflow calls and compare how long it takes vs a single workflow command line call. Once again in assembly." ], "metadata": { "id": "dB_5UWoDH0nY" } }, { "cell_type": "code", "source": [ "%%writefile main.asm\n", "global main\n", "\n", "; External C library functions\n", "extern printf\n", "\n", "; External txtai functions\n", "extern workflow, cleanup\n", "\n", "; Default to REL mode\n", "default REL\n", "\n", "section .data\n", " format: db \"action: %s\", 10, \"input: %s\", 10, \"output: %s\", 10, 10, 0\n", " summary: db \"summary\", 0\n", " translate: db \"translate\", 0\n", " text1: db \"txtai executes machine-learning workflows to transform data and build AI-powered semantic search applications.\", 0\n", " text2: db \"Traditional search systems use keywords to find data\", 0\n", " url1: db \"https://github.com/neuml/txtai\", 0\n", " url2: db \"https://github.com/neuml/paperai\", 0\n", "\n", "section .text\n", "\n", "; Run txtai workflow and print results\n", "%macro txtai 2\n", " ; Workflow name and element\n", " push %2\n", " push %1\n", "\n", " ; Run workflow\n", " lea rdi, [rsp]\n", " call workflow\n", "\n", " ; Print action-input-output\n", " mov rdi, format\n", " mov rsi, [rsp]\n", " mov rdx, [rsp + 8]\n", " mov rcx, rax\n", " call printf\n", "\n", " ; Restore stack\n", " add rsp, 16\n", "%endmacro\n", "\n", "; Main function\n", "main:\n", " ; Enter\n", " sub rsp, 8\n", "\n", " ; Run workflows\n", " txtai translate, text1\t\n", " txtai translate, text2\n", " txtai summary, url1\n", " txtai summary, url2\n", "\n", "done:\n", " ; Close txtai application instance\n", " call cleanup\n", "\n", " ; Exit\n", " add rsp, 8\n", " ret" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "wzTYCSaqCSZu", "outputId": "1f48015f-7654-4d86-9f2a-2e462c22f4e6" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Overwriting main.asm\n" ] } ] }, { "cell_type": "code", "source": [ "!time ./workflow translate \"I'm running machine translation using a transformers model with assembler!\"" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "BSmdBuPhIIbw", "outputId": "b45c1ea5-e0fb-4331-f033-5370cdc2ead0" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "J'exécute la traduction automatique à l'aide d'un modèle de transformateurs avec assembleur!\n", "\n", "real\t0m19.208s\n", "user\t0m11.256s\n", "sys\t0m3.224s\n" ] } ] }, { "cell_type": "code", "source": [ "# Build workflow executable\n", "!nasm -felf64 main.asm\n", "!cc -c workflow.c -I/usr/include/python3.7m\n", "!cc -no-pie -o workflow workflow.o main.o -lpython3.7m" ], "metadata": { "id": "HoRsPeorC5rC" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "!time ./workflow" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "7Da6TZlDC8Mj", "outputId": "e2b46994-62d1-42d1-ab77-0d690a7d7d67" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "action: translate\n", "input: txtai executes machine-learning workflows to transform data and build AI-powered semantic search applications.\n", "output: txtai exécute des workflows d'apprentissage automatique pour transformer les données et construire des applications de recherche sémantique alimentées par l'IA.\n", "\n", "action: translate\n", "input: Traditional search systems use keywords to find data\n", "output: Les systèmes de recherche traditionnels utilisent des mots-clés pour trouver des données\n", "\n", "action: summary\n", "input: https://github.com/neuml/txtai\n", "output: txtai executes machine-learning workflows to transform data and build AI-powered semantic search applications. Semantic search applications have an understanding of natural language and identify results that have the same meaning, not necessarily the same keywords. API bindings for JavaScript, Java, Rust and Go. Cloud-native architecture scales out with container orchestration systems (e. g. Kubernetes)\n", "\n", "action: summary\n", "input: https://github.com/neuml/paperai\n", "output: paperai is an AI-powered literature discovery and review engine for medical/scientific papers. Paperai was used to analyze the COVID-19 Open Research Dataset (CORD-19) paperai and NeuML have been recognized in the following articles: Cord-19 Kaggle Challenge Awards Machine-Learning Experts Delve Into 47,000 Papers on Coronavirus Family.\n", "\n", "\n", "real\t0m22.478s\n", "user\t0m13.776s\n", "sys\t0m3.218s\n" ] } ] }, { "cell_type": "markdown", "source": [ "As we can see, running 4 workflow actions is about the same runtime as a single action when accounting for model load times." ], "metadata": { "id": "zNalTEg5cmhX" } }, { "cell_type": "markdown", "source": [ "# Wrapping up\n", "\n", "This notebook walked through an example on how to run txtai with native code. While the HTTP API is a better route to go, this is another way to work with txtai!" ], "metadata": { "id": "4L8smyyXc8q8" } } ] } ================================================ FILE: examples/37_Embeddings_index_components.ipynb ================================================ { "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "provenance": [] }, "kernelspec": { "name": "python3", "display_name": "Python 3" }, "language_info": { "name": "python" }, "gpuClass": "standard", "accelerator": "GPU" }, "cells": [ { "cell_type": "markdown", "source": [ "# Embeddings index components\n", "\n", "The main components of txtai are `embeddings`, `pipeline`, `workflow` and an `api`. The following shows the top level view of the txtai src tree.\n", "\n", "```\n", "Abbreviated listing of src/txtai\n", " ann\n", " api\n", " database\n", " embeddings\n", " pipeline\n", " scoring\n", " vectors\n", " workflow\n", "```\n", "\n", "One might ask, why are `ann`, `database`, `scoring` and `vectors` top level packages and not under the `embeddings` package? The `embeddings` package provides the glue between these components, making everything easy to use. The reason is that each of these packages are modular and can be used on their own! \n", "\n", "This notebook will go through a series of examples demonstrating how these components can be used standalone as well as combined together to build custom search indexes.\n", "\n", "_Note: This is intended as a deep dive into txtai `embeddings` components. There are much simpler high-level APIs for standard use cases._" ], "metadata": { "id": "-xU9P9iSR-Cy" } }, { "cell_type": "markdown", "source": [ "# Install dependencies\n", "\n", "Install `txtai` and all dependencies." ], "metadata": { "id": "shlUi2kKS7KT" } }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "xEvX9vCpn4E0" }, "outputs": [], "source": [ "%%capture\n", "!pip install git+https://github.com/neuml/txtai datasets" ] }, { "cell_type": "markdown", "source": [ "# Load dataset\n", "\n", "This example will use the `ag_news` dataset, which is a collection of news article headlines." ], "metadata": { "id": "408IyXzKFSiG" } }, { "cell_type": "code", "source": [ "from datasets import load_dataset\n", "\n", "dataset = load_dataset(\"ag_news\", split=\"train\")" ], "metadata": { "id": "IQ_ns6YvFRm1" }, "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "source": [ "# Approximate nearest neighbor (ANN) and Vectors\n", "\n", "In this section, we'll use the `ann` and `vectors` package to build a similarity index over the `ag_news` dataset.\n", "\n", "The first step is vectorizing the text. We'll use a `sentence-transformers` model. " ], "metadata": { "id": "AtEdP7Utw3mk" } }, { "cell_type": "code", "source": [ "import numpy as np\n", "\n", "from txtai.vectors import VectorsFactory\n", "\n", "model = VectorsFactory.create({\"path\": \"sentence-transformers/all-MiniLM-L6-v2\"}, None)\n", "\n", "embeddings = []\n", "\n", "# List of all text elements\n", "texts = dataset[\"text\"]\n", "\n", "# Create embeddings buffer, vector model has 384 features\n", "embeddings = np.zeros(dtype=np.float32, shape=(len(texts), 384))\n", "\n", "# Vectorize text in batches\n", "batch, index, batchsize = [], 0, 128\n", "for text in texts:\n", " batch.append(text)\n", "\n", " if len(batch) == batchsize:\n", " vectors = model.encode(batch)\n", " embeddings[index : index + vectors.shape[0]] = vectors\n", " index += vectors.shape[0]\n", " batch = []\n", "\n", "# Last batch\n", "if batch:\n", " vectors = model.encode(batch)\n", " embeddings[index : index + vectors.shape[0]] = vectors\n", "\n", "# Normalize embeddings\n", "embeddings /= np.linalg.norm(embeddings, axis=1)[:, np.newaxis]\n", "\n", "# Print shape\n", "embeddings.shape" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "DPWrubv5oOn7", "outputId": "972c1837-2404-42f4-9a5e-51f3c3f149ee" }, "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "(120000, 384)" ] }, "metadata": {}, "execution_count": 3 } ] }, { "cell_type": "markdown", "source": [ "Next we'll build a vector index using these embeddings!" ], "metadata": { "id": "SDaDLMyXLGe1" } }, { "cell_type": "code", "source": [ "from txtai.ann import ANNFactory\n", "\n", "# Create Faiss index using normalized embeddings\n", "ann = ANNFactory.create({\"backend\": \"faiss\"})\n", "ann.index(embeddings)\n", "\n", "# Show total\n", "ann.count()" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "ILSfWHxVHex0", "outputId": "b9da6a79-778f-4338-a6b5-d693772fcdae" }, "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "120000" ] }, "metadata": {}, "execution_count": 4 } ] }, { "cell_type": "markdown", "source": [ "Now let's run a search." ], "metadata": { "id": "B_XnpIpXNKSP" } }, { "cell_type": "code", "source": [ "query = model.encode([\"best planets to explore for life\"])\n", "query /= np.linalg.norm(query)\n", "\n", "for uid, score in ann.search(query, 3)[0]:\n", " print(uid, texts[uid], score)" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "c2FVQlxSLKgP", "outputId": "825ca5de-d765-4c67-fdde-c7fe06557095" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "17752 Rocky Road: Planet hunting gets closer to Earth Astronomers have discovered the three lightest planets known outside the solar system, moving researchers closer to the goal of finding extrasolar planets that resemble Earth. 0.599043607711792\n", "16158 Earth #39;s #39;big brothers #39; floating around stars Washington - A new class of planets has been found orbiting stars besides our sun, in a possible giant leap forward in the search for Earth-like planets that might harbour life. 0.5688529014587402\n", "45029 Coming Soon: \"Good\" Jupiters Most of the extrasolar planets discovered to date are gas giants like Jupiter, but their orbits are either much closer to their parent stars or are highly eccentric. Planet hunters are on the verge of confirming the discovery of Jupiter-size planets with Jupiter-like orbits. Solar systems that contain these \"good\" Jupiters may harbor habitable Earth-like planets as well. 0.5606889724731445\n" ] } ] }, { "cell_type": "markdown", "source": [ "And there it is, a full vector search system without using the `embeddings` package.\n", "\n", "Just as a reminder, the following much simpler code does the same thing with an Embeddings instance." ], "metadata": { "id": "00dnum6fNNM0" } }, { "cell_type": "code", "source": [ "from txtai.embeddings import Embeddings\n", "\n", "embeddings = Embeddings({\"path\": \"sentence-transformers/all-MiniLM-L6-v2\"})\n", "embeddings.index((x, text, None) for x, text in enumerate(texts))\n", "\n", "for uid, score in embeddings.search(\"best planets to explore for life\"):\n", " print(uid, texts[uid], score)" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "OYAqPoTmNaNN", "outputId": "30b6305a-11da-4439-e0f5-646aef8d96f6" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "17752 Rocky Road: Planet hunting gets closer to Earth Astronomers have discovered the three lightest planets known outside the solar system, moving researchers closer to the goal of finding extrasolar planets that resemble Earth. 0.599043607711792\n", "16158 Earth #39;s #39;big brothers #39; floating around stars Washington - A new class of planets has been found orbiting stars besides our sun, in a possible giant leap forward in the search for Earth-like planets that might harbour life. 0.568852961063385\n", "45029 Coming Soon: \"Good\" Jupiters Most of the extrasolar planets discovered to date are gas giants like Jupiter, but their orbits are either much closer to their parent stars or are highly eccentric. Planet hunters are on the verge of confirming the discovery of Jupiter-size planets with Jupiter-like orbits. Solar systems that contain these \"good\" Jupiters may harbor habitable Earth-like planets as well. 0.560688853263855\n" ] } ] }, { "cell_type": "markdown", "source": [ "# Database\n", "\n", "When the `content` parameter is enabled, an Embeddings instance stores both vector content and raw content in a database. But the `database` package can be used standalone too." ], "metadata": { "id": "KfGc7iNRO0Tw" } }, { "cell_type": "code", "source": [ "from txtai.database import DatabaseFactory\n", "\n", "# Load content into database\n", "database = DatabaseFactory.create({\"content\": True})\n", "database.insert((x, row, None) for x, row in enumerate(dataset))\n", "\n", "# Show total\n", "database.search(\"select count(*) from txtai\")" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "9IwcIgocPSDr", "outputId": "eeceee2f-cf35-414f-b9e4-ad975c118e42" }, "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "[{'count(*)': 120000}]" ] }, "metadata": {}, "execution_count": 7 } ] }, { "cell_type": "markdown", "source": [ "The full txtai [SQL query syntax](https://neuml.github.io/txtai/embeddings/query/#sql) is available, including working with dynamically created fields." ], "metadata": { "id": "l18dapNgSqnA" } }, { "cell_type": "code", "source": [ "database.search(\"select count(*), label from txtai group by label\")" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "7yPjcT3peDnZ", "outputId": "241c98df-9c24-47a1-8ca5-e5bace19abfb" }, "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "[{'count(*)': 30000, 'label': 0},\n", " {'count(*)': 30000, 'label': 1},\n", " {'count(*)': 30000, 'label': 2},\n", " {'count(*)': 30000, 'label': 3}]" ] }, "metadata": {}, "execution_count": 8 } ] }, { "cell_type": "markdown", "source": [ "Let's run a query to find text containing the word planets." ], "metadata": { "id": "S8G09Ib0kRB9" } }, { "cell_type": "code", "source": [ "for row in database.search(\"select id, text from txtai where text like '%planets%' limit 3\"):\n", " print(row[\"id\"], row[\"text\"])" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "Aq1ZvOhvQHRO", "outputId": "3c2f9c94-57b1-489d-c854-60b909187ff0" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "100 Comets, Asteroids and Planets around a Nearby Star (SPACE.com) SPACE.com - A nearby star thought to harbor comets and asteroids now appears to be home to planets, too. The presumed worlds are smaller than Jupiter and could be as tiny as Pluto, new observations suggest.\n", "102 Redesigning Rockets: NASA Space Propulsion Finds a New Home (SPACE.com) SPACE.com - While the exploration of the Moon and other planets in our solar system is nbsp;exciting, the first task for astronauts and robots alike is to actually nbsp;get to those destinations.\n", "272 Sharpest Image Ever Obtained of a Circumstellar Disk Reveals Signs of Young Planets MAUNA KEA, Hawaii -- The sharpest image ever taken of a dust disk around another star has revealed structures in the disk which are signs of unseen planets. Dr...\n" ] } ] }, { "cell_type": "markdown", "source": [ "Since this is just a SQL database, text search is quite limited. The query above just retrieved results with the word planets in it." ], "metadata": { "id": "xhND31tnkOY2" } }, { "cell_type": "markdown", "source": [ "# Scoring\n", "\n", "Since the original txtai release, there has been a `scoring` package. The main use case for this package is building a weighted sentence embeddings vector when using word vector models. But this package can also be used standalone to build BM25, TF-IDF and/or SIF text indexes." ], "metadata": { "id": "-vNVSA2FQnKj" } }, { "cell_type": "code", "source": [ "from txtai.scoring import ScoringFactory\n", "\n", "# Build index\n", "scoring = ScoringFactory.create({\"method\": \"bm25\", \"terms\": True, \"content\": True})\n", "scoring.index((x, text, None) for x, text in enumerate(texts))\n", "\n", "# Show total\n", "scoring.count()" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "LJ2FskiiQ_l_", "outputId": "d023191a-b9bc-47fa-be99-375b81f02e8e" }, "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "120000" ] }, "metadata": {}, "execution_count": 10 } ] }, { "cell_type": "code", "source": [ "for row in scoring.search(\"planets explore life earth\", 3):\n", " print(row[\"id\"], row[\"text\"], row[\"score\"])" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "slLtmzfbRuf6", "outputId": "be7ba16e-fd6d-4c30-fb43-a4617bac21ca" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "16327 3 Planets Are Found Close in Size to Earth, Making Scientists Think 'Life' A trio of newly discovered worlds are much smaller than any other planets previously discovered outside of the solar system. 17.768332448130707\n", "16158 Earth #39;s #39;big brothers #39; floating around stars Washington - A new class of planets has been found orbiting stars besides our sun, in a possible giant leap forward in the search for Earth-like planets that might harbour life. 17.65941968170793\n", "16620 New Planets could advance search for Life Astronomers in Europe and the United States have found two new planets about 20 times the size of Earth beyond the solar system. The discovery might be a giant leap forward in 17.65941968170793\n" ] } ] }, { "cell_type": "markdown", "source": [ "The search above ran a BM25 search across the dataset. The search will return more keyword/literal results. With proper query construction, the results can be decent.\n", "\n", "Comparing the vector search results earlier and these results are a good lesson in the differences between keyword and vector search." ], "metadata": { "id": "BOW5JlxYS3Rm" } }, { "cell_type": "markdown", "source": [ "# Database and Scoring\n", "\n", "Earlier we showed how the `ann` and `vectors` components can be combined to build a vector search engine. Can we combine the `database` and `scoring` components to add keyword search to a database? Yes!" ], "metadata": { "id": "gqEBeBEoTrup" } }, { "cell_type": "code", "source": [ "def search(query, limit=3):\n", " # Get similar clauses, if any\n", " similar = database.parse(query).get(\"similar\")\n", " return database.search(query, [scoring.search(args[0], limit * 10) for args in similar] if similar else None, limit)\n", "\n", "# Rebuild scoring - only need terms index\n", "scoring = ScoringFactory.create({\"method\": \"bm25\", \"terms\": True})\n", "scoring.index((x, text, None) for x, text in enumerate(texts))\n", "\n", "for row in search(\"select id, text, score from txtai where similar('planets explore life earth') and label = 0\"):\n", " print(row[\"id\"], row[\"text\"], row[\"score\"])" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "MEpZAr0TUMCK", "outputId": "b320604b-f2a0-4546-c91d-2d2b0c449382" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "15363 NASA to Announce New Class of Planets Astronomers have discovered four new planets in a week's time, an exciting end-of-summer flurry that signals a sharper era in the hunt for new worlds. While none of these new bodies would be mistaken as Earth's twin, some appear to be noticeably smaller and more solid - more like Earth and Mars - than the gargantuan, gaseous giants identified before... 12.582923259697132\n", "15900 Astronomers Spot Smallest Planets Yet American astronomers say they have discovered the two smallest planets yet orbiting nearby stars, trumping a small planet discovery by European scientists five days ago and capping the latest round in a frenzied hunt for other worlds like Earth. All three of these smaller planets belong to a new class of \"exoplanets\" - those that orbit stars other than our sun, the scientists said in a briefing Tuesday... 12.563928231067155\n", "15879 Astronomers see two new planets US astronomers find the smallest worlds detected circling other stars and say it is a breakthrough in the search for life in space. 12.078383982352994\n" ] } ] }, { "cell_type": "markdown", "source": [ "And there it is, scoring-based similarity search with the same syntax as standard txtai vector queries, including additional filters!\n", "\n", "txtai is built on vector search, machine learning and finding results based on semantic meaning. It's been well-discussed from a functionality standpoint how vector search has many advantages over keyword search. The one advantage keyword search has is speed. " ], "metadata": { "id": "aPaZsoxnYW8I" } }, { "cell_type": "markdown", "source": [ "# Wrapping up\n", "\n", "This notebook walked through each of the packages used by an Embeddings index. The Embeddings index makes this all transparent and easy to use. But each of the components do stand on their own and can be individually integrated into a project!" ], "metadata": { "id": "4L8smyyXc8q8" } } ] } ================================================ FILE: examples/38_Introducing_the_Semantic_Graph.ipynb ================================================ [File too large to display: 1.6 MB] ================================================ FILE: examples/39_Classic_Topic_Modeling_with_BM25.ipynb ================================================ { "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "provenance": [] }, "kernelspec": { "name": "python3", "display_name": "Python 3" }, "language_info": { "name": "python" }, "gpuClass": "standard" }, "cells": [ { "cell_type": "markdown", "source": [ "# Classic Topic Modeling with BM25\n", "\n", "txtai 5.0 introduced topic modeling via [semantic graphs](https://neuml.hashnode.dev/introducing-the-semantic-graph). Semantic graphs can be easily integrated into an embeddings instance to add topic modeling to a txtai index.\n", "\n", "In addition to transformers-backed models, txtai also has support for traditional indexing methods. Given the modular design of txtai, traditional scoring methods like BM25 can be combined with graphs to build topic models. \n", "\n", "This notebook is all classic Python code on the CPU. No GPUs or machine learning models required!" ], "metadata": { "id": "-xU9P9iSR-Cy" } }, { "cell_type": "markdown", "source": [ "# Install dependencies\n", "\n", "Install `txtai` and all dependencies." ], "metadata": { "id": "shlUi2kKS7KT" } }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "xEvX9vCpn4E0" }, "outputs": [], "source": [ "%%capture\n", "!pip install git+https://github.com/neuml/txtai#egg=txtai[graph] datasets" ] }, { "cell_type": "markdown", "source": [ "# Load dataset\n", "\n", "This example will use the `ag_news` dataset, which is a collection of news article headlines." ], "metadata": { "id": "408IyXzKFSiG" } }, { "cell_type": "code", "source": [ "from datasets import load_dataset\n", "\n", "dataset = load_dataset(\"ag_news\", split=\"train\")" ], "metadata": { "id": "IQ_ns6YvFRm1" }, "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "source": [ "# Build BM25 Index\n", "\n", "Since the original txtai release, there has been a `scoring` package. This package supports building standalone BM25, TF-IDF and/or SIF text indexes." ], "metadata": { "id": "-vNVSA2FQnKj" } }, { "cell_type": "code", "source": [ "from txtai.scoring import ScoringFactory\n", "\n", "# List of all text elements\n", "texts = dataset[\"text\"]\n", "\n", "# Build index\n", "scoring = ScoringFactory.create({\"method\": \"bm25\", \"terms\": True})\n", "scoring.index((x, text, None) for x, text in enumerate(texts))\n", "\n", "# Show total\n", "scoring.count()" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "LJ2FskiiQ_l_", "outputId": "fc4685d9-5ec2-4fbe-a347-857a74b9b509" }, "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "120000" ] }, "metadata": {}, "execution_count": 3 } ] }, { "cell_type": "markdown", "source": [ "Let's test the index." ], "metadata": { "id": "BOW5JlxYS3Rm" } }, { "cell_type": "code", "source": [ "for id, score in scoring.search(\"planets explore life earth\", 3):\n", " print(id, texts[id], score)" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "slLtmzfbRuf6", "outputId": "27aca9cb-8704-475c-c38d-01da65328686" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "16327 3 Planets Are Found Close in Size to Earth, Making Scientists Think 'Life' A trio of newly discovered worlds are much smaller than any other planets previously discovered outside of the solar system. 20.72295380862701\n", "16158 Earth #39;s #39;big brothers #39; floating around stars Washington - A new class of planets has been found orbiting stars besides our sun, in a possible giant leap forward in the search for Earth-like planets that might harbour life. 19.917461045326878\n", "16620 New Planets could advance search for Life Astronomers in Europe and the United States have found two new planets about 20 times the size of Earth beyond the solar system. The discovery might be a giant leap forward in 19.917461045326878\n" ] } ] }, { "cell_type": "markdown", "source": [ "Results look as expected. BM25 returns keyword-based results vs contextual matches." ], "metadata": { "id": "XnAhNb8Td6Wd" } }, { "cell_type": "markdown", "source": [ "# Build topic model\n", "\n", "Now that we have a scoring index, we'll use it to build a graph.\n", "\n", "Graphs have built-in methods to insert nodes and build a relationship index between the nodes. The `index` method takes a search parameter that can be any function that returns (id, score) pairs. This logic is built into embeddings instances. \n", "\n", "Graphs constructed via a BM25 index will have more literal relationships. In other words, it will be keyword-driven. Semantic graphs backed by embeddings will have contextual relationships.\n", "\n", "The next section builds a graph to support topic modeling. We'll use a multiprocessing pool to maximize CPU usage." ], "metadata": { "id": "gqEBeBEoTrup" } }, { "cell_type": "code", "source": [ "import os\n", "\n", "from multiprocessing import Pool\n", "\n", "from txtai.graph import GraphFactory\n", "\n", "# Multiprocessing helper methods\n", "SCORING = None\n", "\n", "def create(search):\n", " global SCORING\n", "\n", " # Create a global scoring object\n", " SCORING = search\n", "\n", "def run(params):\n", " query, limit = params\n", " return SCORING.search(query, limit)\n", "\n", "def batchsearch(queries, limit):\n", " return pool.imap(run, [(query, limit) for query in queries])\n", "\n", "# Build the graph\n", "pool = None\n", "with Pool(os.cpu_count(), initializer=create, initargs=(scoring,)) as pool:\n", " graph = GraphFactory.create({\"topics\": {}})\n", " graph.insert((x, text, None) for x, text in enumerate(texts))\n", " graph.index(batchsearch, None)" ], "metadata": { "id": "MEpZAr0TUMCK" }, "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "source": [ "Let's list the top 10 topics. Keep in mind this dataset is from 2004." ], "metadata": { "id": "aPaZsoxnYW8I" } }, { "cell_type": "code", "source": [ "list(graph.topics)[:10]" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "HyKiVFBerQfw", "outputId": "fdba468b-bac1-4f63-c915-5fcad78880f7" }, "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "['kerry_bush_john_president',\n", " 'nhl_players_league_lockout',\n", " 'arafat_yasser_palestinian_leader',\n", " 'sharon_ariel_prime_minister',\n", " 'blair_tony_minister_prime',\n", " 'xp_windows_microsoft_sp2',\n", " 'athens_gold_medal_olympic',\n", " 'space_prize_million_spaceshipone',\n", " 'nikkei_tokyo_reuters_average',\n", " 'hostage_british_bigley_iraq']" ] }, "metadata": {}, "execution_count": 10 } ] }, { "cell_type": "markdown", "source": [ "Topics map a list of ids for each matching text element ordered by topic relevance. Let's print the most relevant text element for a topic." ], "metadata": { "id": "BKPJuq6br-ru" } }, { "cell_type": "code", "source": [ "uid = graph.topics[\"xp_windows_microsoft_sp2\"][0]\n", "graph.attribute(uid, \"text\")" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 54 }, "id": "sYd9nKqYrwhe", "outputId": "0db63a74-2c01-433a-d754-6f0988894ffe" }, "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "'Microsoft continues Windows XP SP2 distribution Continuing the roll-out of Windows XP Service Pack 2 (SP2), Microsoft Corp. on Wednesday began pushing the security-focused update to PCs running Windows XP Professional Edition '" ], "application/vnd.google.colaboratory.intrinsic+json": { "type": "string" } }, "metadata": {}, "execution_count": 44 } ] }, { "cell_type": "markdown", "source": [ "# Graph analysis\n", "\n", "Given this is a standard txtai graph, analysis methods such as centrality and pagerank are available." ], "metadata": { "id": "muN33RNB0Kob" } }, { "cell_type": "code", "source": [ "centrality = list(graph.centrality().keys())\n", "print(\"Top connection count:\", [len(graph.edges(uid)) for uid in centrality[:5]], \"\\n\")\n", "\n", "# Print most central node/topic\n", "print(\"Most central node:\", graph.attribute(centrality[0], \"text\"))\n", "\n", "topic = graph.attribute(centrality[0], \"topic\")\n", "for uid in graph.topics[topic][:3]:\n", " print(\"->\", graph.attribute(uid, \"text\"))" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "hpwfJFcRv19j", "outputId": "0bb4d53d-27c2-42f5-cfaf-89c785cb73da" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Top connection count: [30, 30, 28, 28, 28] \n", "\n", "Most central node: Manning Gets Chance to Start Giants Coach Tom Coughlin announced that rookie quarterback Eli Manning will start ahead of two-time M.V.P. Kurt Warner in Thursday's preseason game against Carolina.\n", "-> Manning Replaces Warner As Giants QB (AP) AP - Eli Manning has replaced Kurt Warner as the New York Giants' starting quarterback.\n", "-> Eli Manning replaces Warner at quarterback Eli Manning, the top pick in this year #39;s NFL draft, has been named the starting quarterback of the New York Giants. Coach Tom Coughlin made the announcement at a Monday news conference.\n", "-> Giants to Start Manning Against Carolina (AP) AP - Eli Manning is going to get a chance to open the season as the New York Giants' starting quarterback.\n" ] } ] }, { "cell_type": "markdown", "source": [ "Notice the correlation between the number of connections and centrality.\n", "\n", "Given that BM25 is keyword-driven, we expect that the most central node would be text that is duplicative in nature. And that is the case here." ], "metadata": { "id": "lqOdJR6a0ftB" } }, { "cell_type": "markdown", "source": [ "# Walk the graph\n", "\n", "Just like semantic graphs, relationship paths can be explored." ], "metadata": { "id": "8u_tTNrq2EoN" } }, { "cell_type": "code", "source": [ "from IPython.display import HTML\n", "\n", "def showpath(source, target):\n", " path = graph.showpath(source, target)\n", " path = [graph.attribute(p, \"text\") for p in path]\n", "\n", " sections = []\n", " for x, p in enumerate(path):\n", " # Print start node\n", " sections.append(f\"{x + 1}. {p}\")\n", "\n", " return HTML(\"

\".join(sections))" ], "metadata": { "id": "oodIUmwrsZRd" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "showpath(83978, 8107)" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 208 }, "id": "gZzStey9tEkl", "outputId": "8e6398d5-0fd8-465c-ce95-45b98eb2c195" }, "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "" ], "text/html": [ "1. NFL Game Summary - NY Jets at Buffalo Orchard Park, NY (Sports Network) - Willis McGahee ran for 132 yards and a touchdown to lead the Buffalo Bills to a 22-17 victory over the New York Jets at Ralph Wilson Stadium.

2. NCAA Game Summary - Marshall at Georgia Athens, GA (Sports Network) - Michael Cooper ran for the only touchdown of the game, as third-ranked Georgia rode its defense to a 13-3 victory over Marshall at Sanford Stadium.

3. NCAA Game Summary - Northwestern at Wisconsin Madison, WI (Sports Network) - Anthony Davis ran for 122 yards and two touchdowns to lead No. 6 Wisconsin over Northwestern, 24-12, to celebrate Homecoming weekend at Camp Randall Stadium.

4. NCAA Top 25 Game Summary - Northwestern at Minnesota The last time Minnesota won four games to start three consecutive seasons was 1934-36...Chris Malleo replaced Basanez for two series in the third quarter for his first career appearance.

5. UConn ousts Marist Sophomore Steve Sealy netted his third winning goal in the last four games, giving Connecticut a 2-1 overtime victory over Marist yesterday in an NCAA Division 1 first-round men's soccer playoff game at Morrone Stadium in Storrs, Conn.

6. United States upsets Germany to move to soccer semifinals Deep into overtime, and maybe the last time for the Fab Five of US women #39;s soccer, the breaks were going against them. A last-gasp goal that stole victory in regulation, a wide-open shot that bounced off the goal post." ] }, "metadata": {}, "execution_count": 43 } ] }, { "cell_type": "markdown", "source": [ "Notice how the data pivots from the start node to the end node. If you've read the [Introducing the Semantic Graph](https://neuml.hashnode.dev/introducing-the-semantic-graph) article, you'll notice how this traversal is more literal in nature. In other words, the relationships are keyword-driven vs contextual." ], "metadata": { "id": "5qv1CbY35uUy" } }, { "cell_type": "markdown", "source": [ "# Wrapping up\n", "\n", "This notebook demonstrated how graphs can index traditional indexes such as BM25. This method can also be applied to an external index provided a search function is available to build connections.\n", "\n", "Semantic graphs backed by embeddings instances have a number of advantages and are recommended in most cases. But this is a classic way to do it - no machine learning models required!" ], "metadata": { "id": "4L8smyyXc8q8" } } ] } ================================================ FILE: examples/40_Text_to_Speech_Generation.ipynb ================================================ { "cells": [ { "cell_type": "markdown", "metadata": { "id": "4Pjmz-RORV8E" }, "source": [ "# Text to speech generation\n", "\n", "Text To Speech (TTS) models have made great strides in quality over the last few years. Unfortunately, it's not currently possible to use these libraries without installing a large number of dependencies.\n", "\n", "The txtai TextToSpeech pipeline has the following objectives:\n", "\n", "- Fast performance both on CPU and GPU\n", "- Ability to batch large text values and stream it through the model\n", "- Minimal install footprint\n", "- All dependencies must be Apache 2.0 compatible\n", "\n", "This notebook will go through a set of text to speech generation examples.\n", "\n" ] }, { "cell_type": "markdown", "metadata": { "id": "Dk31rbYjSTYm" }, "source": [ "# Install dependencies\n", "\n", "Install `txtai` and all dependencies. Since this notebook is using optional pipelines, we need to install the pipeline extras package. We'll also demonstrate running this pipeline as an application." ] }, { "cell_type": "code", "execution_count": 44, "metadata": { "id": "XMQuuun2R06J" }, "outputs": [], "source": [ "%%capture\n", "!pip install git+https://github.com/neuml/txtai#egg=txtai[pipeline-audio,pipeline-data] onnxruntime-gpu librosa\n", "\n", "# Install NLTK\n", "import nltk\n", "nltk.download('averaged_perceptron_tagger_eng')" ] }, { "cell_type": "markdown", "metadata": { "id": "PNPJ95cdTKSS" }, "source": [ "# Create a TextToSpeech instance\n", "\n", "The TextToSpeech instance is the main entrypoint for generating speech from text. The pipeline is backed by models from the [ESPnet](https://github.com/espnet/espnet) project. ESPnet has a number of high quality TTS models available on the [Hugging Face Hub](https://huggingface.co/models?library=espnet&pipeline_tag=text-to-speech&sort=downloads).\n", "\n", "This pipeline can use the following models on the Hugging Face Hub.\n", "\n", "- [ljspeech-jets-onnx](https://huggingface.co/NeuML/ljspeech-jets-onnx)\n", "- [ljspeech-vits-onnx](https://huggingface.co/NeuML/ljspeech-vits-onnx)\n", "- [vctk-vits-onnx](https://huggingface.co/NeuML/vctk-vits-onnx)\n", "\n", "The default model is `ljspeech-jets-onnx`. Each of the models above are ESPnet models exported to ONNX using [espnet_onnx](https://github.com/espnet/espnet_onnx). More on that process can be found in the links above.\n" ] }, { "cell_type": "code", "execution_count": 45, "metadata": { "id": "nTDwXOUeTH2-" }, "outputs": [], "source": [ "%%capture\n", "\n", "from txtai.pipeline import TextToSpeech\n", "\n", "# Create text-to-speech model\n", "tts = TextToSpeech()" ] }, { "cell_type": "markdown", "metadata": { "id": "-vGR_piwZZO6" }, "source": [ "# Generate speech\n", "\n", "The first example shows how to generate speech from text. Let's give it a try!" ] }, { "cell_type": "code", "execution_count": 46, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 465 }, "id": "-K2YJJzsVtfq", "outputId": "28fd09d8-73e1-4c07-ae71-09397081631c" }, "outputs": [ { "output_type": "display_data", "data": { "text/plain": [ "
" ], "image/png": "iVBORw0KGgoAAAANSUhEUgAABMkAAAHACAYAAAC4Zz/7AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/bCgiHAAAACXBIWXMAAA9hAAAPYQGoP6dpAACkeElEQVR4nOzdd3gUZdcG8HvTe0IIhBYIvfdepUlV4VOxISp2BRu+voIFVFTsYkFRLKivCCpiQYp0pPfeS+hJCCGd1N3vj5CQsmV2d2aeKffvurgkm9ndI8nuzpznPOdYbDabDURERERERERERCbmIzoAIiIiIiIiIiIi0ZgkIyIiIiIiIiIi02OSjIiIiIiIiIiITI9JMiIiIiIiIiIiMj0myYiIiIiIiIiIyPSYJCMiIiIiIiIiItNjkoyIiIiIiIiIiEyPSTIiIiIiIiIiIjI9P9EByM1qteL8+fMIDw+HxWIRHQ4REREREREREQlks9mQmZmJWrVqwcfHcb2Y4ZJk58+fR1xcnOgwiIiIiIiIiIhIQ86cOYM6deo4/L7hkmTh4eEAiv/HIyIiBEdDREREREREREQiZWRkIC4urjRn5IjhkmQlWywjIiKYJCMiIiIiIiIiIgBw2ZaLjfuJiIiIiIiIiMj0mCQjIiIiIiIiIiLTY5KMiIiIiIiIiIhMj0kyIiIiIiIiIiIyPSbJiIiIiIiIiIjI9JgkIyIiIiIiIiIi02OSjIiIiIiIiIiITI9JMiIiIiIiIiIiMj0myYiIiIiIiIiIyPSYJCMiIiIiIiIiItNjkoyIiIiIiIiIiEyPSTIiIiIiIiIiIjI9JsmIiIiIiIiIiMj0mCQjIiIiIiIiIiLTY5LMZPq9txrztp4WHQYRERERERERkaYwSWYyJ1OyMX35UdFhEBERERERERFpCpNkJnQhPRfZeYWiwyAiIiIiIiIi0gwmyUxq5prjokMgIiIiIiIiItIMJslM6tSlHNEhEBERERERERFpBpNkRERERERERERkekySERERERERERGR6TFJRkREREREREREpsckGRERERERERERmZ6f6ABIHX/sOoftpy6LDoOIiIiIiIiISJOYJDOJp+buEh0CEREREREREZFmcbslERERERERERGZHpNkRERERERERERkekySERERERERERGR6TFJRkREREREREREpsckGRERERERERERmR6TZEREREREREREZHpMkhERERERERERkekxSUZERERERERERKbHJBkREREREREREZkek2RERERERERERGR6TJIREREREREREZHpMUlGRERERERERESmxyQZERERERERERGZHpNkRERERERERERkekySERERERERERGR6TFJRkREREREREREpsckmUn9ufs8TlzMEh0GEREREREREZEmMElmYptOpIoOgYiIiIiIiIhIE5gkIyIiIiIiIiIi02OSzMT2nksXHQIRERERERERkSYwSWYCS/Yl2r39py2ncSH9isrREBERERERERFpjypJshkzZiA+Ph5BQUHo2rUrtmzZIul+c+fOhcViwciRI5UN0OA+X33M4fdy8otUjISIiIiIiIiISJsUT5LNmzcPEyZMwJQpU7Bjxw60bdsWgwcPRnJystP7JSQk4D//+Q969+6tdIiGZ3PyvX+PXFQtDiIiIiIiIiIirVI8SfbBBx/goYcewtixY9GiRQvMnDkTISEh+Oabbxzep6ioCKNHj8arr76KBg0aKB2iqb3y1wHRIRARERERERERCadokiw/Px/bt2/HwIEDrz2hjw8GDhyIjRs3Orzfa6+9hurVq+OBBx5w+Rx5eXnIyMgo94fKyy+0ig7BLpvNBqvVWZ0bEREREREREZE6FE2SpaSkoKioCLGxseVuj42NRWKi/Wby69atw9dff41Zs2ZJeo5p06YhMjKy9E9cXJzXcRtNzcgg0SHYNXH+Xtw6c4PoMIiIiJw6dSkb7y49hDmbT4sOhYhIFeuPpWDR3guiwyAiUp2f6ADKyszMxJgxYzBr1izExMRIus+kSZMwYcKE0q8zMjKYKNOJedvOiA6BiIjIpbHfbsWJlGwAwF1d6wqOhohIeaO/2gwASHhruOBIiIjUpWiSLCYmBr6+vkhKSip3e1JSEmrUqFHp+OPHjyMhIQE33nhj6W1Wa/FWQT8/Pxw+fBgNGzYsd5/AwEAEBgYqED0RERERShNk9mw5mYrPVx/D1/d2ho+PRcWoiIiIiEhuim63DAgIQMeOHbFixYrS26xWK1asWIHu3btXOr5Zs2bYu3cvdu3aVfrnpptuQr9+/bBr1y5WiClkyh/7RIdARESkS+8tPYxVhy8it7BIdChERERE5CXFt1tOmDAB9957Lzp16oQuXbpg+vTpyM7OxtixYwEA99xzD2rXro1p06YhKCgIrVq1Knf/qKgoAKh0O8nnu42nEOjvixeGNRcdChERka4UXK14Hzx9Lf4Y1wvRoQGCIyIiks+mE5fQrUFV0WEQEalG0UoyALj99tvx3nvvYfLkyWjXrh127dqFJUuWlDbzP336NC5cYFNI0b5ce0J0CERERLp1JvUKOkxdhh2nL4sOhYhINnd8uUl0CEREqlKlcf/48eMxfvx4u99bvXq10/vOnj1b/oCIiIiIFHAsKQsd6laRfPzPW88AFuC2TmwpQURERCSa4pVkRERERGbx3/l7cDQp063j//vrHgUjIiIiIiKpmCSjUvO2nhYdgu5sOJ6CGauOiQ6DiIg0ZPG+REnHzV5/svTv98/eqlQ4RERumTBvl+gQiIiEYZKMSj0/f6/oEHTn0R+2492lh0WHQUREOvTKXwdK/77yULLASIiIrvlt57lKtxUWWQVEQkSkPibJiLyQkVsoOgQiIlJRana+R/dbsu8C+r23GkVWm8Nj/vvrbthsjr9P2vPKn/ux5WSq6DCIFPXETzvR6MXFOJSYIToUIiLFMUlGREREJFHvd1aW/r2wyIrkjDxJ9/t8zQmcTMlGfqHjaoyft511mkQj7Zm9IQET57OnHBnbX7vPAwCOJGWV3paSlYfE9FxRIRERKYZJMiIiIiKJsvOKSv8+bfEhnEu74vDYwiLrtcqwChViuQVFdu5BenQiJRsJKdmiwyBSXNn3rTu+3IThn/wrMBoiImUwSUZCcDsJERHp3RIHDfqXH0zC7V9sRKMXF2PUzI3Ydy699Hu/7TyLwiIrmr28xO59U3M8285JYn2w7Iik4w5eyMDdX23GlXwmSUl//vvrHry56CAycwtwLDkLl7L4fkVExuMnOgAyp5+2nBEdgqwupF9Bzchg0WEQEZEG7Dl7LSm27dRl3PDJutKvX1ywDw1iwhzel2tI+hQS4CvpuNnrE7DuWArOpeWgUfVwhaMikt+Xa0/gYqa0beZERHrESjISYluCsZrcJqTkiA7BMB7/cTvmbD4tOgwiIsXkFjquInpxwT40e3kxVh5KUjEi8pa/r7RTauvVLOiEn3crGQ6RotYfSxEdAhGRYpgkIyHsjZbWs2CJK8jk2qK9iXhhwV7RYRARCbH8YBJyC6z4eetZ0aGQC3O3SFvQOZOag4IiK45fzMIv24t/rmWrDYmIiEg7mCQjkoHUbRYk3ay1J0SHQEQKYV9K0ru8wiJM/M31gk5+oRW931mFD5cdwbytxmo1Qfp3JjUHN3+2ARfSHQ8gceX4xSy8veQQ39eJyDCYJCMiTXpj0UGP7vf9xgTsO5eOtxcfwvv/HJY5KiLy1l+7z6Ptq/8gz8mWQy05mpQpOgTSgbQrBXZvf/C7rQCAf4+mMIlAmrPyUDJ2nL6MzSeutUH5e88Ftx5j2qJD+Hz1cWRzGAURGQSTZERkCDabDe//cxiT/9iPJ+fuxOdrjuOTlcdEh0VEFfy05TQycguRW2AVHYokD32/rdJt4+fsEBAJadlfu8/jZEp2pdvXHmXvJtKXcRLe35LLNO5Pv8IJl0RkLEySEZEhXM4pKE2K2btQISLyRMKlyoNZFrpZaVHRphOXvLo/aVNSRq7oEIiIiMhLTJIRERERueloUibOpXnWx+eLNey5aEQfLDvCLZWkW/O3n0VKVp7rAysoWxVss9kw+Y992Hn6spyhERGpikkyIi80qh4mOgRDk9KzKCUrDxPn70F2XqEKERHpV1pOPib9thcZufZ7J6ml0GqMJMJjP3LLJZW35WQqDiVe62H3x65rk7z3nktHQZExfvfJeF75az+e/WU33l3ifi/XvefKT2r9fuMpTPlzv1yhERGpjkkyIi+EcqqloibM2+3ymN93nsPcrWfw8zZODSNyZtHeRPy05TRWHkwWFsM3605iy8lU1wfqwLHkLEUff8n+RPx79KKiz0Hy23UmDUDxVMun5u4q973ZGxJUj4dIirSc4sWTM5crby+XKiv32mJlhoNBFkREesAkGRFp1t97nff9WXPkIl7/u3gKpqMm/RuPs/cPEQAUWa1X/yuumuV/m08Je249eu6XPaJDIA/dNWuT6BCIVNVt2gocSVJ28YCISA1MkhGRbq094rrK4k5eqBABAL5Yyz5YeiN6ayy578TFLHy97iS2nWJPJjKfIR+tFR0CEZHXmCQjIsOzWm1IzuTUMTKvK/lFOHvZsybzSnn5932iQyCS3ax/T2LqwgOSjr35sw0KR0OkLs6tICIjYJKMiAyn4knazLXH0eWNFbiS73oQABGp48/d50WHQOSxe7/Z6vVj7OAEQCIiIs3xEx0AEZG7Vh1Kxu+7ziEmLFDS8f8eSQEA5BdZEQwOWyAiIu9sOsF+l6RfKVl52JbAJC0RkT1MkhGR7kz6bS8SM7h9ksgeq9WGr9adwC0d6qCqxEQyERGZxxt/H8SCnefw4rDmokMhItIcbrekcrLyCl0fZGJFVhumLT6ImWuOiw7F1LLz3fs93cgVfzKRk5ey8eaiQ2zUT6QD59O01SuQzOFIUiYA4I1FB8vdvkGmieAJl3Lw8YqjsjwWEZHamCSjcthI2bndZ9PwxZoTeGvxIRRZrzW+stmA/EKrwMiMraDIyn9fE9pzNg0DP1iDS1l5okPRFdvVpnyXs/MFR0JEroyYsd7t+xQWWXH0apKDpJuz+TQOJ/LfTS0fLDsiOgQiIo8wSUblrDlyUXQImlbgIFHzzpJD6Pn2SpWjMYeM3ALc8cUm3PHlJtGhkMoW7rmAY8lZ6Pj6chQWMUlKRMZzMTMPqdn5yCuUPljmx82ncf2Ha3GOVWhueWHBXgyevhb7zqWLDkW4An6mEhE5xCQZkRd2ny0+0VpxKBkXM1ntooSRn67H9tOXseP0Zfy87QxaTVmKzFxuCzabgiLOlXfmZEo2Fu29AACYs/mM4GiMY/95XkyT8nq9vRIT5u2WfPzBCxkAgCx+Fnrk+fl7RIcgXM3IYNEhEBFpFpNkRKRpJ1KyS//+31/3sG8ekR1P/LQDj/+4AwDwzfqTgqMxjru/2iw6BDKBnPwi/H01yU3KS8rgoqZarFYucBGR/jBJRkREpHP7zmWIDsGQLucUCH1+q81W2mOOiEhv7vt2i+gQiIjcxiQZEWnGD5tOiQ6BSNfKrtpn5Bbg953nmGTRsdwCK17kQB26KjkjFz3fWom5W7mlmvRh7dEU0SEQEbmNSTKiCrYlpOLAeddVGal2JscVsazcK5yuSo6kcMKlJHfMujbgYun+JDw9bxcSM3IFRkTemrP5tOgQSCP2n89gs34iIiKFMUlGVMGtMzfi1pkbXB5ntVOdseZIshIhEZle73dWsSJKgi0nUyvdVsihB0REpa7kF7JXFhEROcQkGalOD1Mgc/Klj2Iv6/1/jsgcCRGR985czhEdAhG5kFfo/NwjPMhPpUiMLTu/CK8tPCA6DCIi0igmyUh1321IEB2CYrjdkoi0aNCHa0WHQEQuvLX4kNPvB/r5qhSJ8c028Lmo1jwxZ4foEIiI3MIkGakuO79QdAhEREREmrLqEFs2KGUye54K89eeC6JDICJyC5NkpDq2FSIib+05m4asPCbc/9mfiDavLHV6THKFLe4reSFOpEsfLDtc7utX/9rPCnaJvuf0bCIikohJMlIdS9yJyBsPfrcVN326Hq/9tV90KMLN33EWGbnOk4W3fF5+EMnfey8gmRMvXfpkxVHRIRCVs+rwxXJfbzh+iZN/iYiIZMYOoCbARUYiMoL//LIbeYVWLD9YXAm12c4kR5KmgB8MLr2/jINYiPTuQvoVVAkJEB2G6a05chHXNakmOgwiIkmYJDOB6uGBokMgIvLa/B3nRIdAREQ68ev2s/jPL7sxol0t0aGY3r3fbMGR14ciwI+bmIhI+/hOReSCzWZjzw8ijdtzNg0T5++BjU0PichEMl1stzaz//yyGwDwx67zgiMhALDy85mIdIJJMoO7lJWHX7afFR2Grk34eTdunbnB9YGkeR8tPworE56GcuJiFm75fAOembcLc7eeQV6hVXRIRESyc7QAMPCDNcgtKFI5Gv2LCvEXHQIREWkUk2QGV7HJK7lvwc5z2Hk6DQCw7ECS2GDIK9+sP4kzl3NEh6F5+YVWbDpxSXQYLuUVWNH//TXYfuoyjl/MFh2O6nLyC7F0P9+TiNT289Yzqj+nswEd+UVcHCAiIpILk2QGFxboKzoEQ/lq3UnRIZCXWO3v2pzNp3DHl5uw4qC2EzCJJp/QuHD3BdEhEJnSf+fvER0CERERKYRJMoML9GeSTCklvS6IjCbhUnG13QPfbUOBzioUvjZRIjs73/NeRL4Wi4yREJEcEi7lIN/BlvHJf+xTORoiIiJzYpKMyEP/Hk0RHQKR7Gw2G86lXSn9Wm+Ndt9dehgA8NOW09hwnK9RR5gjI9KmT1cdq3Tb3V9tZvN5meUVWDnohYiI7GKSjIiISu0+m16u994rfx4QGI1n/th1DpN+24sxX28RHQop4O0lh3DXrE2iwyBSxM7Tlyvdtu4YE/7u2pqQ6vT7VwqK8OGyIypFQ0REesIkmcHtOZMuOgSX9p7VTox621pGJLfE9Cvlvv5py2lBkXjuqbm7AABFnGRqSJ+vPo4Nx7U/WEIJVqsNBy9kKF4Bk1tQhEV7L7DSRoBAP56ay2HUzI0uj/l2Q4LygVApb99OluxLxImLWfIEQ0TkBD+JDaywyIoPl2t/lWzsbO1UexxOzBQdAhHJKNnkzf2NzIw/25WHkjH0o3+x6YTzKhlv/bTlNB7/cQf2aGgRi4j0rfnkJeXaObjr0f9tx+M/7pAxIiIi+5gkMzC9rP+mZOWLDkE2qdnG+X8hMoKJv+0VHQIpJN9klb/pOQX4cu0JAEByprIJwqSMPADFW9KIiORy+upgIE8dKrOY/d7Sw3j5dw60ICL5MUlGqjqZki06BEUlZ+aJDoGIylh5KBlnUr07KSfSgg+XH8EWF32WiIiMyN7W709XHcMPm04JiIaIjI5JMlLV7zvPiQ6BiNxUqPOKnY9XHBUdApHXTl1SfpGJffz0adWhZNEhaMrNn20QHQIREekYk2SkqujQANEhEJGbBk9fKzoEr2TnF4oOgRxIy8lHVh5/Plqw6cQlNHpxUblkXA5fO8L9su2My2NKhpVQsR12JoSSMYQG+Fa6LTHdfP0piUhZTJKRqvx8LaJDICI3Hb9o7G3SJM4Nn6zDPV9vFh1GJbOu9v7SiveXHlb8OTafSIXNBpxIyUbu1V5k7y7V/vAfo1l+MLlcf9Pnft0jMBoi7eNCGBHJjUkyIiIik8nVSEP2s5evYMfptNKvd51JQ593VuGi4P6Obyw6KPT5K/pk1TFVn2/R3gsAgIMXMlR9Xir2I/ssEYCElGzkF+q73YFSjiZxGj0RKYdJMiIHuM2EzCa3oAiP/o/j1fUiJcvzRNJ1766WLxAZLd57AadTcyQNefHzMe8pjNL/7xxCI1YWzz88Nlnn0w6tVhu++vcEkjJy0fe91XjvH+WrSLXuZEo27vt2a7nb3lWhupaIzMu8Z5gkxOFE/az8rD1yUXQIpAC2pXYsJ18b1UXk2vZTlzFj1XGvHuPTlRxooCdlt+BFhfgLjIRIu753swovKSNXUwMrTqRk4fW/D6LrmysAAP8eTREckXhzt5zGmqvn5NlXz1N4vqIfBUVWrD+WYndCKZFWMUlG5aRm50tawffU9xv1s4WgUEMnTSQfb6pvjK5kixVp3+lU79+n3/uH/ab0ZPfZdNEhEDgB1EgycwvR9c0VmDh/j2a2Nep8mLQi8uz8bJS8ViF5Ldh5DqO/2lyutQKR1jFJRpV8soLVBWRcPpwdYVduQRFe0vk2FUcW7U1EXiFXnYnIey/9vlfx5/hizQmkXynAkn1cuFDDL9vP4vW/D4gOwy5W39jfhRIe5CcgEvJEyc8vI7dAcCRE0qmSJJsxYwbi4+MRFBSErl27YsuWLQ6PnTVrFnr37o0qVaqgSpUqGDhwoNPjSX5XNNLQmYjUYzX4ifjivYnlvh7x6Tq8r+NeL3L9uPS0BV5Nwf6+okMQZmyF3j9U3k9bzqjyPDtOXca4OTtVeS4Cft95TnQIdh1KzMQfu7QZmye+WX/S7WSJjY0ydMlms+HdpYfw9bqTko49ezmHSWHSDMWTZPPmzcOECRMwZcoU7NixA23btsXgwYORnJxs9/jVq1fjzjvvxKpVq7Bx40bExcVh0KBBOHfOOB8QRESkroV7zqPX2ytx5Wofk91n0/HJSnUnBsqpaligLI8zQ+Wpid76fmOC6BCIVHH8YpZbWzuPJTPh7Y3aVUJEh+DQU3N3Ycfpy6LDkMWyA0n4cdNpWR9zW0KqrI9H3ssvtKL3O6sk905dffgier29yjC/56R/iifJPvjgAzz00EMYO3YsWrRogZkzZyIkJATffPON3eN//PFHPP7442jXrh2aNWuGr776ClarFStWrFA6VCLNWH8sBRuOsVmrEuZuOYNcVkuazvKDyTh7+QrSrxSg0ABNX9Jy8l0fJIFW+vBINfmP/ao8DysXSLTX/z7o1vEDP1irUCSkBa/+qc57nxrSr3i37a5iJdrz85XfAk3uuZyTj7OXr0g6dmtCKmauKU6mnUvLVTIsIskUTZLl5+dj+/btGDhw4LUn9PHBwIEDsXHjRkmPkZOTg4KCAkRHRysVJpHmjP5qM+76arPoMAzpl+1nsZh9Xkxr7dGLaPTi4tKvn/15Fyb/sQ9ZeYUCo3JPek4Bnpq7S3QYhjV/+1nkFmg7eZiWw94uRHI6eCEDZ1JzRIfh0AmZG9XrbYGkrLcXHxIdAsnoti82YvNJVgOStiiaJEtJSUFRURFiY2PL3R4bG4vExEQH9yrv+eefR61atcol2srKy8tDRkZGuT9Eclh5KBkflxli8Nfu86rHwL35ysjM1U9ChOT131/3lPt6/o5z+H7jKfygo8m72fnG+/0tLLJq5v3uOx1s6Rw3ZwfWHLkoOgwiQ5mi4WqtwiJ53x9rRAbJ+nhKWbjnPDadKJ9A2XeOk36NRCMf/UTlaHq65VtvvYW5c+diwYIFCAqy/2Y+bdo0REZGlv6Ji4tTOUoyqt92nMMHy46Ufv3ET+o30G0+eQkuZeWp/rxm9cqf+7Ge21zteneJsVdu+ToTa+hH/xp2uqpSDidyUZBITmcvi60ks9lsmL/jrN3v+fvKO5rbopNJ3+M5vIKIBFA0SRYTEwNfX18kJSWVuz0pKQk1atRwet/33nsPb731Fv755x+0adPG4XGTJk1Cenp66Z8zZ9SZOkSkhtwCK5IyePGultkbEjDh512iw9CkGaulNV8l8sTR5Cz8uFneZs5Gl8zPBiJDuZiVhy/XnhAdBpEiJs7fg3S2CiCdUDRJFhAQgI4dO5Zrul/ShL979+4O7/fOO+9g6tSpWLJkCTp16uT0OQIDAxEREVHuD2lPkdWmma00zlyUoZpk71l5y8BXHkpyfZACLmbmGWrsuFSXTfoB/uRPu0SHQERu8PHRSSmIE7kFRdjKyXREAJxvO8vILURmrjznJ/vOpeO3HeY7vyOxkjLysPSAtHZLRKIpvt1ywoQJmDVrFr777jscPHgQjz32GLKzszF27FgAwD333INJkyaVHv/222/j5ZdfxjfffIP4+HgkJiYiMTERWVlZSodKChoxYx0m/ab96TMf/HPE9UEuPPfrbhkiueY9GWLyxLRFB/HU3F24km+uSZD5hVZdNXGXy/KDYpKxWvHVupM4nyZtEhMRyePrdScxauZGJKZzohmRK3KdR5dtJaJHu8+m41BipugwyANWa/lM8CM/bBMUCZFzfko/we23346LFy9i8uTJSExMRLt27bBkyZLSZv6nT5+Gj8+1XN3nn3+O/Px83HrrreUeZ8qUKXjllVeUDpcUsu9cBvad037/FDkuki9mGmMLzMlLxZOUbNB+BaDcbv5sPf555jrRYZDK9p/PQK2oYNFhqCqnwFxJcNKWo0nFF7q5/D0kcmnVoWRZHievkK830oal+829QEvapXiSDADGjx+P8ePH2/3e6tWry32dkJCgfEBEOmO12gyxtUYvjiRlwWazIbfAiuAAX9HhmJaeR9TrxdojF5GQko34mFBVnk/uCW1ERHI5kpSFvWfT0bpOpOhQiHTp8R+3o35MKO7pHi86FCKvaHq6JREVm77iqMPvXUi/gj1n09QLxiS+XncSzScvwXcbEljlIMiJlGxVny/EpAnR8+nqbTM9xy2tmmS12vDhcn1vwSKSwztLjT3JmUhJi/YmYsaq47j18w0e3T+P59ukEUySEZWRmpMvOgS7lh1wXI78wOxtuOnT9SgsYtWNnP7YdR4AMOXP/fhz93nB0ZhTxhV1hyh8uOxIpX4ZJK8iGf99P1ph7qTO6Us5sj1WmsqvNSKtyisw/rkUewCS0s5c9mxBTAcz3sgkmCQjKkMPb85L9l0oNw3swIXiXm9vLebqp5yy868178/MLd/Iv7DIisd/3I795+WdZCpVWk4+ElSushLBV+UtxttOXUY6kwWKigkLlO2xftpyRrbH0qMl+40zJexQovZ7lhIZhcUitn3H8oNJuCRhmvy+c2LOscgzUxcecHlMHttokE4wSUaaYNNDdkoipf9PHv3fDtz+xcZKtxvpgkkLTlx0nITKyC3Eor2J+Gi5422wSnrkh+3o+95qIc9N5A1fiWcdz8/fg+83JqDPO6uUDYg04dH/7UBmLhPURGrwFZwkO5achdf/PujyuP/8Iu+0eFLW1+tOujxmyp/72cKEdIFJMtIEtXsPKSk127stmwV2tk0WFlkxe/3J0slGJTuWtp9KrXQsqSdf0BbXzSfl/bm/tdj1ySqRmk6mZOPDZUdwOlW+LYVGlKbRFgGeKOBQB9IAURO9bTYbVh92Pr2ywEDtAA6cz3C5QH4oMVOlaEhN/x5Ncfr9hXvOY8HOsypFQ2Qfk2SkCfYSQ0pJycrDhmPO36BFspdkO5qchVf+OoCxs7eW3maz2XDL55UrykgMPfeymrnmhOgQSKI9Z9Nw2UCJkYrKJp51/JJSzYsL9okOwWPHkjPx+y599XtkLyfj25pwWcjzHkvOwvPz9zo9xkgTnw8nZeINCdVkpA8pErbPliiyOv89Hj9nJ56ZxypCEotJMjKd8XN24K6vNosOw2v1Jy0SHYKpWK02u6ueVqsNX/17Am1f+0fW5/vq3xMY8P7qSrcfLrOympNfWOn7ZGw3fboewz9eJzoMjyw7kISkDOcn0t+uT1AnGIPYfPKS6BA8Nmez/nrKzVxzXHQIZFBXTLgF7SsJ2/NIH+77dosbR1vw4+ZT6P3Oykrf+e/8PfIFReQFJsnIVFYeSsKmE8Vb1X7eqr8TdBJj79k0DPloLV75cz8AlFbyrD58EQ1eWITX/z5Yrrn/lfwiDHh/NTYe9/wC9p0lh3HcTl+0O2dtKv07t6KRnjz0/TaP7rdkXyL7VTmQlSdPovztJeoOfimy2rCvwuATPVRIZvD3UNPY68i81NyRQpXtOyd9+IqPpbgK+kyqZxMwidTAJBmZyu4z107KuVpBUv2+6zyOJGXhu42nED/xbwx4f43T4y/n5OP4xWz8b9Mpj5/TWqZq7XzaFZy9XJwQ87bnHVFFamzhKfn99cSj/9uOz1ezgseeetGhsjzOPJUXjf7afR5bKvRW1MOW9axcVu+WWLIvEYM+XIMZq8QMsLFn2Ef/ig6h1OlLORj+8b84w8UsVXSfVrkqibRJ++/0REySkcKKrDYUafjEd9HeCwCAy9n5WLLvguBovHMlnyuoouUVFpUbWf733gvYe9b7Eea3fL4BI2esr3T7r9vY2FRue004cv6+b7e6PshLAz9wnlh2ZfspMX2CSBkJl/Q5rMedvjtG9+j/tuNIUhbeXXpEdCiltDQEauWhJOw/n4G+763GZS5uKU6Pr83py45g4R599WW0Z58Jz5vI+JgkI0Xd/dVmPOzhFhslfLSi/Irn9OXFJ3fvLD2ER/+3Q0RIlWR7uH3mUnY+jl/MkjkacsestSdwwyfryvUNm/DzLq8f90J6LlKyKp9kL96X6PVj/+BFtZsR3fONO301SKrcAu+q1bS71CKWHFuMzqWpu+UlPacA05drp/rIHXkab5w+b+tp0SHQVVeuvucVWW148XfnDfnd4U1VLmnH3rPpmL7iKMbP2Sk6FK89PW+X6BCIZMckmYFpIWGy8cQlrDjkfKS1HKxWG4Z//C/+2e9e0qCkyM1e7ydRZv3reSPTo0kcly1SSU+GslNIE9NzcTgxEyc08Hqs6GhSJl7+Xb/T8cjYym4Drbg1j4rJUTmjdr+3hXv1WzlRNSxQdAhOuZqO6K1Vh5MxauYGzFh1TNHnEc3ekB5vLNrr/YJWicd/1MaCrhr6Nq0mOgTF3PipPgfwlJWWk493lhzCsWT3zm/ZO5D0gEkyAxsyXTu9GZRWaLVh//kMPPzDdtGheM2bZIpWquHMaomDJO3g6Wtx6+cbFXlOb07mX/6DCTI9UbpnU6FCjY89PSEWNe1NDw3k5bT68EVVn69A49VYeudNL0xX3lt6GFsTLuPdpYfL3f7LNmMNQpKjYjDtijLvI3tkaOGgFyEBvqJDUMTO08ZoH/Dr9rP4zIN+oU/N3SXpuLcWH5I9YU0kFZNkZGoZV7Q3qapmZJDoEMo5nqydKjs9S/XwwnvP2TSH3zuXdgUtpyxFcmauR499MdO9Hh6TmVQTqu1r/yj22HvPpqPRi4tl6aFXUd93V8v+mErS28StdUdTvLr/W4vVnWxJynpJwepgRz1mn/uVg5DK+urfE/hizQnRYZBG/d9nG8p9/c06z3eQiKT09vOZa44jLUd712lkDkySkaklZ+Zh04lLosMox2KxeHX/DcdSZB2FbYSScJEyPewxV+IxF9WBOflFuJDmWZLMXd9vZP8ykTIVnKx34EJxcmz/efmTZIkZ6vx+mtUTP7GCmMp79H/6r6rXs9f/Pig6BN2Yv50DiF5beEB0CG5LzsytVFGqFpvNhiwvz62JXGGSjEzPaH287vpqM/7cpd+eL0Z2xIPfNSWbamcomHSRQ7Ma4aJDMI0VB5XvHUnKuMyVdqpgiQxDXSoqLLLiUKKxzpccYQ9E9Tz7y27RIWjCK3/u11Xi5/QlcQMkpi8/ilZTlgp7fjIHJslI12w2G175cz8OJWaIDkVTLqR7n1g5k8oJSnIb/rG2+gSma/ziukWtCNEhmMY/B5JEh0CkWF888t7crcbqO+aMp1tWC4us6PX2SrvfSzNZr0Nyz+wNCRg/h1XBFX22+lilBeb5O/RffXjjJ+vw5Vrn/dwKi6yK96Ilx5gkI13LLbBi9oYETPjZ85UoRz029Cxdhl5rSm7tskeJXkhaU1Dk+ndt0d4LGDJ9LawqNCsNCTRmU1wyJiO+V9M1G45fQqMXF+PUJfbB1CKjVd07Ex7k59H98gqtOHvZ/iKlXvtOiXQ5W9sLeZ5Yfdhx1bbaQ1S8sUylhbVZ/57EiwuUndqrlJz8QmxNsF+VuvdcOt5c5LwfaPupy/D0vF0KREZSMElGhnApy3UDckfZ+E9XHddUpt67jmTFcgu8X43391X37YG9z4pNX34EhxIzoaFfSWF+2ab/1UK9mfjbXk29H5b19To2wjayzSeL+4OeurqNx2q1yVIVTepIMlDvwf3n5d+d4OnwHjPbqLGewXK479utbh3/9bqTLiuO1JZXWIQv1qr3eZyU4d6QKRFWHUpGz7dWIq/w2lTuj1ccw6iZG5GZ6zzZm5ieazfpmJlbiD93s32OKEySkSEE+Ln+Vb7s4AQlJStP9aopp2TIkvn6yJFqU0ZuQRFSs7V3suhsdU8Niem5GDljPU5cdL+KQq0VPTUVFlmx60ya6DA045Eftqn2XAVWbW5523LysugQSEFbE4p/vvmFVvyzPxHv/nMY3aetxKcrjwqOjADXi2+Dp6+V9Dip2fl45c/9yMnX0HmXHQkp7n8Wa3N5gfRs6sIDeHPRIew7l66ZNigqbHQo53RqDn7Zpu3t3pP/3IdzaVeQnXctSXbgQnGyvdDFLpLn5+/BQ9+XP8dj5bx4TJKRYg5ecL4Sp6U0zmETbSMQ7am5O9H7Hfs9O0RJy8l3e3VPblsSUrHrTBoKPfhg/HTVMQUiIi1Zut94iVB3LT/IfwMR1EpmXMwsrhZ48PttePiH7aUXRe/9cwQpEqrFSVm5ZSok7EmT2ONywc5zmL0hAS0mL/UoEaWWF393b4uX1WpDu1f/8fj5gvzZ/sDopAyEOHvZfiLshk/Wofc7q/DOEudb9NSQV6j+Qtpzv+5R/TndERnsX+m2Aon/Tvamio+csd7rmMg7TJLp1O4zaeVKOuXkqJ+Cu1yNBmaOXDmzNyQgMV2bWx+W7k9Cdl4R7vhiIybO34NxGmhU6kliSgQp24qlOpmSLfmihojM60yqvFsepV5gpWRdqzhWo0cjOSfXFNWCMsMZ1hzRbg+mPW72SS2wWp2eS7iqDFG7xQWp77YvNro8ZvRXmwEA7y09bPf8+LPV4rdean3ok0hlr83POEh4VlT2s67E3nPX3n++25DgdVzkPr4j69ClrDyMmLEeX65RZj94kL88vxbrjqXI8jhySRA4rliEZV5WXcg9ijohJRuvLzxQ+vWmk6mYu/UM/t5zQdbn0avzaa4vRPNlnPw2e722mwjzkphIumSd9IPady4d0xa7XwlhgQXbElJ5sSCQN70KC4usWH04GUeTMvGWBz9/ESKCKleGOONqYfKnLdreLqY1NpMmxk9dykGLyUvw6apjDs+Pr+QrUyQhVZ93Vwl9/orFHCcuZgn/N/GxFO+P+m3HOQDAqUvZpXFKPXd3dB0w5c/9MkRI7mKSTIdKXmwHE+VvLKo3+VdXpKV8lt7y+QaFo5FHyRutkew6k4a+763GVw6mO1Uc72xGASqvImu9ek6LfetEeePvA64PIlNbuj9RdAiSHL+Y5dH9fttxFg99v40XCzq1dH8S7vt2Kx7/UXzluFJ+38kG23J69H/bRYcgq2w3Fp5zXCR8uk9bITwppCX931+DyX/sExqD5eq1W8kuqrKfVRlXiivvDidm4t+jjqtnH1ax9yy5xiQZKcbe/my5ZVydGJKcaZx+JZ3jqwh9/mPJWViwU96pgtOXH3H6/UEfSmv4awRbE1Lx89bKK8ohAer2I9F6jx+Ri8gnNdYnZ9a/2q76I1LatMWHZNvuR+or2Tp0NNmzJKkI59Ku4H+bTkk+3tFwKCXYbDbDV1oZqQ9nQZEVA95fI9vjpV0pwJCPzHPeLIXonqW7Kwya2ldmu2RJ7cPdX2/GmK+3OHyMfedY/KIlTJLp0Dw7F9hykqsho7+GJywqrdCLbXGBfvIkS971sLnnU3N34pl5u2WJISO3ABPm7cLqw9rtO6K2UTM34r/zKzcgDQn0UzUOjReSCWWviSqZw6lL2kqQEmmFNy00cgv0WfXy6UptDsWpP2kRJs7fi2PJWfy80oH/+2w9EmXeEn9KpRYyRk/GKqXstXTJ+fZFCQUdnGqpHUyS6dD05cqOQxdxMhMkU2JIK4o08KGSketZT7FDid5vffxo+VEM/nAtFu25gN92nvP68YxohgcTKR+YLV8ptq/Gt/WmXxFXNRKmcsKStGNrwmXRIXgkK0+5z+3tp/T5b0Linb2cgz7vrMJsB73kglWuoHaXTcbumH4yLxzP23YGt87cgOEfr3P7vnL+f8lh52ljv8fotULonSWHDDFl8WhSJjYev6TY429NqDy1NNDvWoolyY0EqTvHkrKYJNOZzFzlLxyjggNkeRx/P+m/Xkbsw2Uk9j4AHLFabfhw+REcTsosN8WKynM1/dWeAxf0eaLlriv5RRg8XdxWgiB/7Vy4TbRTdUhU0dseVg5L8cKCvYo9NhnT2iMXcc83WzBzzXGcTnVc8RIaYJ4FicEtazj8XlZeIfq9t9rtx/RkQnVWXiGOJGlr2+v/feZ9z+BqYYEyRKIv937jeOueHD5bfRy7y0x53XcuXdFkk1Ku/3At7py1SbHHf3PRwXJf7zuXXm5ipZ+P4+vhigM/OMlZO5gk05nvN0rvj2BWXztoDq8me+N89cJeqe+oma7HVpf48t9rU1ffXKTNKVYsZ9a2fJm2fBvBXIW31xOp6ZyEKb6uuOpxSfKT2iT87cXFF4vTFh/C2iMXkemiol3uLWgiBbqYDF8l1HGf3lQVzxmNOgjGnYV5o1hzRN1WJjd8sk7RZJMeXUi/gp2n08rdZm8RvOwE6kNlBu/1fmelpOfxZrowecZ87yg6J2U/s1aoMa3P3gS8qQvFnwA8PXenR/cbPH2tJpJ89pxxshpc1voyfUuuaLQPySca7TNCRGRUKVl5eGeJ+xW0FU1ffhSbTuivmkHPzqdLS25+vuYEvt+YgINXq57/2OV84uPUhQdk+VkmZ+YKXVw5nJiJL9accHrMiYva6Hf40xYuvJB7vl2vzeuSI0mZpT2gC4vEJJFWHaqcqKy46yMztwADP7g2uGHI9H9L/15gJ257C/l6GnpiFEyS6UyhVT8VFmq8oLValuppX5vDiZmlU6C0pvc7q7waSKAl9pKrRCXMvv37jb8Pllv1JJLDhTT5fqfu+JLVDHJy1ZzbnQTU5D/2u/XcO2ToR9XljRW4++vNXj9ORVkSe7tKmawXEaT8xHcpokK0EQdpW9k+ca/+daDcArhWpGTmYfnBZABAZp5nfZi91bBaaKXbKha0PPzDdrf6RNu71jf5aakQTJLpTE6ZkvdFexO5bcyBI0neN5+nyqT8uv17VHsfpETuUKEIVhJR5fXfbzyFZ3+RZ8Kt0rJlPjGOjTBfXxu18CRfu7q+uQLfOKliV3KQilwTvbeclN47VapsidtMOQGQRLnti424ILHS0x0vLNhX7uvRX8mfhJZDhgfvTSlZ8u3KsijwwXbwAq9htUAjlwLkqV1n0kSHYJcaAwYA4CsHJ3VafTNXGyum9CMpXR+VOxWbjCrhxd/FNgq/5XPpPfiU9MzPu4Q99w6dTDSUezhIsIaGNpBz98/eKjoEw0jOzMOnTiYum7W6VupEStFF9qNmbsALC/ZKSmaKnBxN8ttyMhXdp0nra+WOtBztXT8kpFTeshxgpxfd4cRMtHv1HxxOrJxs2n4qFZ1eX44NGqyMK/Hr9spboqX2hST5MEmmMxWboGp1eqBae8P/3nPB7u0VS13NunXoTm5J0Y3wIH1M+brpU/fHzbtroYPXtZmkXylw2c+HyMxWHkoWHQLJQMtVWIVWG45K2JmQlSc28bQ14TLmbD6NFRK2fWr4n7vUZgP1HCzbi0pJn62Wr9dufqEVFzS4cLvNzuJdRHDlc+edpy8j7UoBdp2pfPypS8X9lY9f1G6PL3sFDq6GoJD8mCTTmTyDTn3LULjybJMCZfh6cJjbTklmyToaHqJnD37HKhki8py77ThSs/OdbrlUyut/H8TpS9IGA4nwxE/OBzFZrTYcSdLGBbdRWrA8M2+X2/c5L8PkXCUcU6nhuhxDUUq8v0y+x5KTzWbD6sPXFkeW7k+0e5yeXwY5+UVYtLfy/5dWfyZGxiSZzpyVOGFQb274RNnqlMl/7HN9EGGfi6EB0xYdVCkSInPzdPiH2Wh1gq4IIqf7kfYcSsxwfVAFrwmaDr79tHYXMg/Z2bJV1l97zmPNkcoT7tzh76fMdtbPVh/Dor36q8y+5EGrEIk7Yw1NruFaP246LcvjyO34xWzc9+21BcSVh+1XE7/yV/HwEDW2KMrdXuiWzzfYvX3n6TRZn4dcY5JMZ07Y2Y9NrqXlsAeDFPvPO0+SfbshQZ1AFHZDm5qiQyAiGWTneXYSPOWPfQ636+uVtxfq5JqWtwZWJGUYen6hFV+sOa58MC7o6J+1kqMaqSIDgJJ/xktZeXh67k68s+QwHv9xh9CY1GKvN5XZFMjU6iZIo705cyssip29bL96sGTBSK5/D2dyZE7E+TLbqxl8R9E5vfQxItKSiGBtjEAfMn2t6BCIFPXEHG1eoH238RTGaTQ2TylZVbdwD/vjGdHcracxbfGhcrctP+C6r5XZ5OQ77gd0XoHJgp6KDgnAseQsdHx9OX4v09Ny2Ef/Ysk+Yy0K6MEylV9LM2VKeGt1TkdeYfnPuCAHk3Fb1ooAADSKDav0vUtZ8g4kkDupxaIO7WCSTOf8fY3zIzx7OUe2UmEA+Ha9+r019Mxms2HXGeeVZID96TJ6o5X+J662cWiVoz4QctBTpYaRZcu0OvqXnWoti4O/k/qkfubKdfFF2nEmNQdv/F25hcKD329TPZYJP+/GvK3a3OIFAE/+tMvh90IDtLNY/eD32+w2ij9wIQOP/m+HIc7fHPHz0d710EMqv5Y+WnHU66EHNptNs5WdFYcJ2GDDLjvbEBtUK06O2XttfrDsCABOeSXXtPeOQoaQW+j+BVavt1fJeiL+6l9iemvo1fcbT+GnLa5PUv89qv8tPaGB2iwl98bvO89JPjYztwBLvEh0PfLDdo/v64pRh5PQNTYHf1eLUZpbV+RJgvmZn3dLfGy3H1oVP287IzoEzbqY5Xw63bM/79bU++3z8/eKDsGh5Q6mRiakZOOHTadUjsZzfd9bLToESTz5vTTieZ0nbv9yk9PKR2f+2Z+I+pMWISVLmwOaVh8uf/2RW2DFxyvdm+pZUnEt12lAUob2poCSPJgkI0VkeTiqdqOBxj7rzeaT/LcXZeL8PZj4m3cXCE+7MQ1q7hbtXlhOX35UdAhkcJ5+PmldTFig2/f5a7e0bZRazSv+99c9qj6fRav7kOxwFeuBC9Ib+7NNTmUZuQUY/dVm0WEQVeLJQtD5tCt4WMEFUKNSs4fYXAmFDCQfJslItyo2cHRmeGs2apdLhpMLTK2O4Na6uVvVTVqlXZG3J4M7bDYbVh5KqtRbogS3dZE7MmSeLKVnSuZv5NgGnXBJW1u9Vh5KcnsCZIOYUIWi0TYjtfaQy8fLj+KcG+c83lRvk2tJGdqqfjqWLK6dhifv1l/9a5wWNY62UloVWO05pWL7ln/YL1JV/NTTkYuZlT8Ahn70r+zP4+mJts1mU3UbS4Yb+8njokMUjMRc3l162OH39p93f+S82X29zjgnJlLsOH0Z98/eht92SN8eSmKoMT7dW2dSPT9B9aTySsuOJbs3Zc+dxJcc/RO/WHPC68ewx9ME3v2zt+Heb7a4dZ9Af542UzE5p82zWbc83Fk8V1JuQREGfiBuMFObV/5xe1DDlQLjVFin57i3EJycmYs9Z9M8ei41X7srDyWr9lzEJJmufLKy8jakIqsN20+lyvo80aEBHt3v2V92o++7q7x6bnfOdS9qdM88kTumLjRX77yLmcUnL3vPuR4SYVbfaCRx+ukq7W99dWdlNTkz163efXrzkZtblQ8nqVvpkK9Q/ytvKki0Vn0iJ18ZSwvzZRyqJDe1egJtOJai2GMXFml0P7POaKXf5PPz1d0Gbj8G91p4FBjgd/BydvH5pd/Vyte/JU5lHvfjDtz06XoAQGp2vlvX1UpXs3OnuzhMkumIo0aKZy9rY4vbbzvO4YyXsWw4fknyCY/FxVtHXHSwV7GQ+wL9+JZCzqVf3eo5Z/NpvPz7Pre2q5jFaxpJnO46k+bV/QtUuLD+e88FbDgu7eL1zb8P4ul5u5Bdpqnx8YtZOGDSCti8Auk/nzpVtPt5ahMy/kH7QgKcNzLPynNcOVKx8fclDS9KDvpQnYqdu2ToP3bionvVnmZVr6pnuz+cvRPYbDavKo/dcVwDP+f0KwXYmpCKyX/sw7qjrj8ja0YGqRCVsi5frSArqSicI7GH19aEy6V/nzh/D275fKPk58wrtCqayOKnmzi8otURRytNP26q/CYg9aJBi95cVHkkOSnDZrPhqbk7sfnEJVlWkcKDtDMK3RmtTmrzlh5e9xFB/qV//2HTKfR8ayUA4NSlbE1diLHXFRAb7t1Js1qvs03HpQ0dSbjaO6TH1d+5lKw83Pr5Bgz7WP62BSIo2ZPs1o51vH4MZ0kZb+S6kewjaVYdKj9F7rRKyQV3JWfmOuw/pDSrB29w7y87okAkxuNpn6fULMfb7P7YdR6931nl9rZ0PRs1cyO+33gKd3+9GUv2Oe6Jl36lAJ+4OSVSq/7Ydc5h65edZRb+HLXs2Xj1fEJq/7KLmXlMZBkUk2Q64qhKa0tC5bLQk170SvC2r9TFzDykZnveGFyuFZgzqVcwjQk3p2y24hOHV/86gGQ3tixs0vkU0shgf9cH6dBds7Q/aSvYTnXD5ex8XPfuatz/3TYBEdnnTpWNN37eegZ/77nWO0StlW4p/Hy9y7o4Gs4gN6kj4O01jr/MXkCShAV6twCSW1CkWNVotkLJN727deZGj/9tCq3l3//eXHRIjpBkp1YVWUU/bTmN1Ycvuj6wAh8dTUfVI2dVpTtOF1cLqbEYt++c9qqTH/2f48mVa464/7usRTn5RXhq7q7SrwuKbOV6Vq45fK2n1/ebTgEAdp6+VkUGAJlX3zOlfl65c+1E+sIkmY5k5alzMu/tiuHbSw4hTYWVvV+3n3V5zBdrlWkUbDRXCorgTiuH0ypOc1HC+fTyH36fGmQFTSqRk8qy8yonTtpPXQYA2O3l9j65pbnZ/NUT/52/B+Pm7Cj9+sXf9yn+nFL9vO2sV9OgMp1MwhXB20QPeS5PoX5k5Nx3GxM8up+vj5hkjrs9pdRueP/TltMostrc7v9XItTFFlgCErxY5Hf263Pp6uJ9SAA/B8pafywFT/60U3QYsrC3KPBAmcXXsgtqJT0yx3x9bXjL3rPXeuWuOiytSb6o90pSHpNkOjKmWz3RIUhyOjVHlQkzWxKkVzOtFbBKoofJcGUF+3t/8qaXCU0VT5Jmb0gQE4ggZbc8esrTBrllE0Ja1vmN5Wj32jL8ufs8MnILVFstlLp1UC1mG5Ci50buKVn5GPO19qtJST3vLHE8jdqZmpFietCp1YTfU5N+24s3/j6IRJnj9DSZaSQl1UzebMu+mOn4/bvkHDeIE2rLGS1Drz2tsNip1HQ1EbLs79ufu8sP9ikosuKLNcedtt8QuehMyuJPVkeiQjybOump9JwC2Gw2t3vzbDmZqkpmPSFFejXTgQvqlz6vPpyMw4nqTg/zihs/Mkf/nmo06pbD1oRUVaqEjMxZ6b4jmTrs8/Xt+pNo88o/uF6lbT1amyLnzTu5ms2L5epLpPdV4X8lNGguIXWl3EjOpOYoNqXQSDvp9PD/sk/QhOSKF9LucNTHTMrOCKO795stsNlsLpMazuj87VtxRj/vzc73rnr9uw2nSv+ecaUA2xIuY9riQ5i35Yy3oZEOMUlmECVlo2dSc2RpOJ2SlYe2r/2D277YiDav/ON2j7GZa457/NyuplaWkLrapEZVmz2P/bgDg6eL6ZdRlrPtUqkVJsFIpffKq7ScArywwL3x2HpxJEmdxOyyA0mY8sc+POhGH7FkJ6u8WrXzdBqAykmY5IxcnLiYhe2nUvHlWs/f74xs0V7HjYLl9udu16Pevdk6qiftXvunXB8WR6Z7uGVMzx77cbssUwrtMdJAGFHN8N1xwyfrRIfgtp+3ncWMVeZq7+CO1Ox8fODFcANnfaTOXi5eWNdztbC3SobWlFCrb6haxn671av7l12kzM4vKl34f3Oxdvpbm+U8RguYJDOIJi8txk9bTqP3O6vwmAcVHhXd+vkGANfG4u46c9nZ4ZV407QyOlTeirlPVprvQqAsZxNrSlY1wwL9ShOt3nhyrn76Gizam4jvNybgkxXG+v1w1cg4t6AIry08IMtzfbfxFJYfTCp32+lLOQ63GhtlkXfqwgPo8uYK9H9/De6fvU2zTa3lUOjhCdldszbhJ4nj1+XwoYQLq/Pp2t7KJZe0nAI8+r/t+Gj5USzZd8FuD0lPtkvvO5eOHC9X6kWy2Wyl5yZGuziU29hvt2L7KffO++QgalHTHSlOJihK8bHBzjnk1PPtla4PcsLRDpNLWXnYdKJ4yNndX2/G/vPeVyGm5eSXDgPQi5wK52ZnHQyEM4rejWM8vu/nq68tfmppAaTve6tFh2AaTJIZyDtLii/U1h+7hLcXe3fRllDhpPr+2dswS6Um+HJPWZmxytxVHnO3ur5QzcgtQINqoV4/V65KEwHlMvmP/aYbyb7sQJLrg9z01b8nSlfc+ry7Ci8atEpv0vw9AICv150sva2k4mLR3gt276N3Az9Y49H9NqjcW82bicoAMOm3PfraHu/C0v1J+HD5ETz6vx0YO3tLpe9vdnNCcZHVhhs+WYc3/nZvRT05I1eWBRhnbvhknaTkXdm84Nojymy51KIUCX0F7S1gLNzjujpTbl+aYNgSh1g45u055IfL7Z/PTa2wMHjr5xu9eh4AeHHBPtz82YZKt69XaDu3XLacTC39u6cDKPTC1YRKZ/2qwyUO+nHWB08Jp1NzdNm6RI+YJNORr/51fvJQdpR9hgJTxd5YVHxy/OPmU/huQ4LHjbul0NvqjDPP/rxL6PNfkFA9ERLgp7lJdOQ5Z4lmJfrGvf73QWwskxT520HCSA9beJz5aavjvhSP/yjPQIJO9arI8jhyqbjyrGXOpu5eSHd+svzTljN4/W95Kiy15vjFbPyx61ofpeSMXMxycT5R0bSrC28Ldkrvx3TqUja6vLkCd6swTOCci4qIe7/ZUq6C56HvpW8T17tOry93eYxWCiUulUl2bz91GRuOazvh4KmyCy2kvEMVFkCueFGxmF9oxfIDSQ7Pc7xpN6OG277YiPiJfyN+4t+S2hTo2YmLziel3vNN5QWkEg/0ri9pgUFEL+bjLv6/SB5MkunIkST1miA70u7Vf/Dign2Y8uf+SquMATJO+HhvqWcTmbRo/g7Pm7zKoUZEULmv7SU38wutblcYnUmVPjiB1HWvkw9+peQVWrHnbJrTY97/R/9Ve285qdL99+hFr7ejRYV4P3lUbufSrmDUzA12x6trSWae4yRs92mut/Go1c9PhKfm7sKfu89j7ZGLmLb4EFYd9qxi252kackq/paTqVh5SP4K1rLsTTUra82Ri/jIwTY3PWzxk0vZ1/DIdrUk3UfE4obNZsMtn2/AXbMcJ1j13KS9bGWTlP6BJN2czafLTaMuLLJWSpIBnlV8Ldh5Fq1eWYoHyyTZV1cYgKKnhSVybNeZNEz4ebfL44KuTk2Vg9S3tC80nog1CibJyC1pZU6Wnpq7C+vKTNLy85XvjEXNqWhqOJkiLuvvYyk+Cft4xVGMn7MDDV9YVKkfiycjt3u/s0quEMkApi8/gps+Xe/0mHUa34YghbNV4jFfb0GLyUsdXlRuOnEJd365CYUam2DpyosL9mJrwmU8+ZO2ew56u+XS6A2dn/xpJ+75ZotX0+MA4Llfdku6sC978fDMPNcXG97xPNHQ7OUlko+dv13sopcjrs6+LqRfwarDyWg5ZWlpMrhOlRD4Ozlv+3Z9AgqKrLJXSjiLtWQRT8pnRaCffBenIr30+z7RIRjKCwv2osubKwAU/z41enGx3eNGf7UZWxNSccKN641n5u2utH38vm+3ljuHFtHLj+S32s5CUkJKdrmf9e4zaV5VJVYk9VNs8T71hiKZGZNk5JW7v96M7acuw2azybp6UnKxYrPZkGiAZssHzmfgg2VHkJSRi+2nUl3fQUbn03PR9OUl+GDZESzcU1we/vee8mXiet8GZ3QuiiQkSc3Ox6rDydh8Qpnfv/3nrw3ryCu0YpOdnkc1I4Mq3WZEwz761+7tz/68GxtPXLJbFWy12mC12rD8oHcJDCWUnCyuOJTsdSJKSWO+vlZBmZyZ67IfiVl5+37yy/az2HPWdeNrNSsP1UhwZuUVStp+I4Kri6vu01aWTn4bP6d4a3jJdGtnPJlu7oqzWEsSuGUHcThq7dE4NkzOsFRXUl3542b1BpyYTclES0dGzdxY7nPDGWcVp62mLGVyzAT6vrcad83aVPr1bV9439vOUxw+ozxpXemInLjl8w34c3xP2R/32Z93o3pEID5ffRxfjOmI5IxcjOkeL/vzqGHc1ZPSkp4ovz3eA/d8vQV/P9kL9ap63zDflYorXxN+3o2qYYFo4uVJZlZeIcIkNrckz3myG2Pi/D2Ye7WHVts6kdgt4aJWTnfO2oST04aXu01KfzwjOJd2BYVFVvhV2IJekrTZfPISWtSKKPe9h3/YjueHNFUtRk91mLoMCW8Nd32gIO8uPYQb2tTCuDk7kJqdj071qriVeHxUhunQWpeW4/2iyDfrT+KjO9o7PeaLNeo1YV9/LAU9G3k+yUwKq0G2xR1JysJD32+T1GLhSkGRR58/FrhX21f2+PiJf6Ne1ZDS7+XkFyI8qPI29MsSknxadv/sbVj3fD9hz+/uz0hv4if+Lek4KYspH684ig9cDHm65fMN2PriQEnPSfpT8vMvu0AkcgjHoz9sx5ju9dCvaXWX7QbIM6wk04lTl7TdpM/VNitPzN9xtnQE7yM/bMfLfxT3QZux6pjkx9Dq28bNn21AVl4hBry/BnMErSLe+80WSX16nGk1ZSmsEgY4aPXnYGRzyzSZVztBBhQn9pIzzZEUs2ebhFXlgxeuVd8tP5iEmSomFbwRP/FvyVvu1DZj1XEM/ehfnLiYjbScAk1W5hnBH7tcN3zuHB+tQiTFPlvtXY8WKdPC9gl4H1WKOz1Ilx1wf2tPyTuD1M/+iu8kp8oM4Wj9yj+In/g30iokxc6k6r9StNfb4tpWePrubcTzufiJfzscpmC12lwmyEoYaegYlfdxhZ6WVwT3nlt1+CLun70NX/3LISBKUSVJNmPGDMTHxyMoKAhdu3bFli3OS1t/+eUXNGvWDEFBQWjdujUWLVqkRpiaZm9vtBmNn7MT7y49LHkykPYu4cortNrwwoK9+HnrGTzw3VbR4Xik2eQl2H7qstO+Zlr/OajFiCeXznR5YwXu+WYLLml0i5KS7vhyU6XGwL5XO02/+tcB5BUWYWiZbZlNYsOcTiXVml+2n8XUhcUTj9cdTcGstfpI8JF8ftiYgMV7LyB+4t/YcjIVW06mlmtInl8k/0WEs/fQSb/twa0zN5RLPgPStqW8vvAgLl/dVngmNQfztp6u9Hr8x83hNiIo8RnznhcDV+T87D94oXzzdT037tczo57PTV14AJ+uPFq68yIjtwD/Hr2Ib9ZLT0I88oPxK5Gp2JQ/tdFL8I1FBzFGhQnSZmSxKbwUPG/ePNxzzz2YOXMmunbtiunTp+OXX37B4cOHUb169UrHb9iwAX369MG0adNwww03YM6cOXj77bexY8cOtGrVyuXzZWRkIDIyEunp6YiIiHB5vF58tyEBU/7cLzoMIpKRxeLZVko96lSviqTqKqNtAZn7cDd0a1AVADD6q01Yf6y4V1u3BtHYpFB/ODU1rBZaOo687DZMqVtdyHiGt66JGpFBkhezlLDlhQGoHhGE7acuo6DIiju+3OT6TgD+Gt8LD/+wrXRr+MIneqFV7UgAwM/bzuC/v+5RLGZy7t1b22BUp7jSr1tOXoJsDU0SNNpnFxHZFxnsr7leznd3rYvQQD+M6V4Pv24/i+nLj+LYG0Mrtf0g6bkixZNkXbt2RefOnfHpp58CAKxWK+Li4vDEE09g4sSJlY6//fbbkZ2djYULF5be1q1bN7Rr1w4zZ850+XxGTZJ9u/4kXv3rgOsDiYhIU6aOaIkx3eNx55ebsNHOQAMiKk9LCYdG1cPw2oiWOJt6Bf+dzyQZEemLmRZkqbwu9aNxU9taiA4NQOf4aLy1+BBy8gsx/Y52sFqB4ABfWK02+PhYkJNfiJz8IsSEBZbeZkRSc0WKdtzOz8/H9u3bMWnSpNLbfHx8MHDgQGzcaH8ixMaNGzFhwoRytw0ePBi///673ePz8vKQl3dtK09GRobd4/Ru95k00SEQEZEHXv5jP17+g5XARFJp6XruWHIW7prF7Sxq0NPFvJYSuUTO6OU1RfIraYVQ0eKXlkh+jAbVQrHy2b4yRqUPitbgpaSkoKioCLGxseVuj42NRWKi/UagiYmJbh0/bdo0REZGlv6Ji4uze5yepWTl4XcJTXKJiIiIiPRITxfzOgqViMhjJy5mSxpuYzS636g6adIkpKenl/45c+aM6zvpTJWQANEhEBG5ZMzCbPPgFHEiIiIintOWFezvKzoE1Sm63TImJga+vr5ISio/ESgpKQk1atSwe58aNWq4dXxgYCACAwPlCVijfH0seOvm1pj4217RoRAZCrdLyIv/lo7NuKsDxs3ZIToMp/RUxUEkCj83iEjL+B4lDzP8Gw5sXh0hAX6w2mxYuOcCAODubnURHRqIO7vEISTAD5HB/oKjFEPRJFlAQAA6duyIFStWYOTIkQCKG/evWLEC48ePt3uf7t27Y8WKFXj66adLb1u2bBm6d++uZKial3d1JDERyccMH4B6Y7STuwBfH2x5cQCiQgLw7XppEz71asWz1+HghQz8tOV06RRPIhHCAv1wS4fa+G7jKYQE+CLHwymILw1vjvt6xMPP1weL9l7A4z/uMNT7k958f38X9GhYFanZ+ejy5grR4ZCJaflcRatxkfLa1InEnrPpAIBXbmyBO7rURVCZKrD8QiuKrDYEB1SuDPv0LtXC1AXFt1tOmDABs2bNwnfffYeDBw/iscceQ3Z2NsaOHQsAuOeee8o19n/qqaewZMkSvP/++zh06BBeeeUVbNu2zWFSjczJaNuCakUGiQ7BKw2rheLm9rVFh0EaNeOuDtj36mBJxxrp5K5XoxgceWMooq5umc8uc6H+8yPGWPg5OW0YbmhTE1Eh/mhYLQw3tKmFHx/sJjosUll4oB/u6xEPADj2xlCceHMYutaPFhLLc4ObYt+rg/HqiFZIeGs4drx8vaT73dklDglvDceUG1sAAOrHhGJM93rw8y0+VT6alKVYzOTaz490R58m1eDn64PqEUEI8td9xxjSoK/u6YQ/xvV0eV5upHMVcs/JacNEh1DJxkn9sf2lgfhzfC/snjwIX93TCff1rF8uQQYAAX4+dhNkVJmilWQAcPvtt+PixYuYPHkyEhMT0a5dOyxZsqS0Of/p06fh43Ptg65Hjx6YM2cOXnrpJbzwwgto3Lgxfv/9d7Rq1UrpUDUtNkLfSRS5RIcGIDU7H4emDkFTNyZzaFnNyCAsm3AdCoqsaPfaMtHhuG3X5OtLkwC/7TwnOBrSkshgf2yY2B+hgYp/1GjOY30b4vkhzcrddvBC8fTlEe1qoUv9aDw1oDE+WnEUANC+bhTCAv3w79EU1WP11OHXh8BiseDjO9qz2tmEEt4aDgCw2WywWCwostrw1IDGpUmlEe1qY7OdqVpK2fHy9cgrLEJsePnzJX9f18mUBY/3QPu6VQAAY3vWx9ie9Ssd4+drsNU5nQkPKv85klvA9xyS16GpQ0qTChsmDQAAXM7OR/up0s7Nx/aMx7frE5QKjxxQu6rPYrFgydO9MWT6vyo+q2Nrn+uHmpHBpV9HhvhjYItYJ/cgKVRZhhk/fjxOnTqFvLw8bN68GV27di393urVqzF79uxyx48aNQqHDx9GXl4e9u3bh2HDtJexVdv1Gv9lH9evoaKPP7RVDVQLD8SmSQOw79XBCPTTdxb89ZHFSd/XRrTE30/2RmigX2miSU1/jOvp1f0/uK2tkLhL8JJF23ZPGWTKBBmA0qoae9rFRQEAnrm+SeltO0+nYVQnfUxn/nZsZxx47dr7sI+PRVMrk81qhIsOwSt6e1+zXC3t9vWxoErotc+DhXvUm8pdNTQA0aEBqBkZDB+f8v+Cvj6u/0VLEmTO3N5Z269PpX5vFj/VW6FHluaPcT0x/7HuaF4zQmgcShjTrZ5X99fbe4VW/fJo93IJsrKqhAYgwE/a5fKdXerKHRpJIKKqr1kNbbwf7Xz5etStGiI6DEMy59WLDkk5yRNlcMtYjOvXCDNWHZf1cRPeGo6LmXl45a/9eH9U29IPL6kfVlpyZ5c4/LTlDJ69vgkSLmXj1o51EBroi5va1hbys31peHM82LsBkjNzvXqcG9vWkikiz7Dc3bFDU4dg77l0NIkNB2zAe/8cxv82n0Kgn48qK/CP9GlQ6baakUG4kO7d75xeOKv+rRpWedjM30/2QnzVUCVDksXkG1qgX9PqosNw6OS0YbBYLPh1+1lkXClAr8YxyMorxM2fbZB0/xva1CxtXiuKHt7Xvr+/i8tjBjaPxYbj6vSm+2x0B8WfQ+vnHu783ux4+Xp0mLoMfj4WFFqd31Pt9hbzH+uBF37bi8NJmQCAFrUi7FYDlu29o0fd6kdj6shW+GHTKY8fQw/vFaKsea4vvt94Cl+vO+ny2M7xzreGr32uHx7/cTt2nE5zeEzd6BA0qhbmbpikE8ffHIaGLywSHUapFc9eh4b8fVOUtj/xSRdm3NUBIQF+CFGgkqBaeCBm3NXB7uqOntzZpS6OvTEUTwxojPdva4cgf1/8X/s6qiXIFjzeo9zX/ZvJc5ErZRsLiRHk74vO8dGIDPZHZIg/po5shZPThuPN/2st+TG8uTgqWyVVIuNKgecPqCNfjulo9/aHryYOB9h5/bWsFYnQQD/0bVpN0dg84Xf1fapKiD/u71V5G5pW3NyhdmlV060d6+D+XvXRJDYcHSRUCZW4oU35xL+c79BaWeqSI9nTvm6Uy2PaSThGLmoksIJ1fB5ye6c4NIgpTsJ/NroDokMDML5fI5fv8R/c1hbhQcpNNqv49FVC/NGxXhUMaH7tPdLPwXnSFQ+HMWjFnIeKezdGBOmjXkFvvYDrVQ3FkwMaOz1meOuamHxDC5ePVSMyCL897njnxTMDm2Dls9dVqmIl4/D1seDubnUxe2zn0tse66vsLipnmCBTHq9wyWOju9ZFwlvDS/uPyKlTPekXNd5S4yOtSWy4Iv9OUtSKDEL7ulWQ8NZwrHz2Onw5piMaVHhzrRrq/pbJCXaSIGQsNjeWqT+6ox1uulpZGOjnYzexbYZV79ljO2NQyxp2vzfh+ib4+8leTregOrogFOn929oCKF4Q0bIHZE7gyd3nRPTvf3iQH3o3jkHLWt5tE0l4a7jbiZOoEGVHyHuTyHlpeHNJx/n7+uCR6ypXyOrB27e2wYJxPfGfQU0wsHlx+44imw0FRY5/K9vXjcLNHeogUMEEZMVn73S1oueR665dfFocZGeOJut7kEJJQuXnR/UxxMWd8wGR/H0t+PSu9gCK+6K+N6qt3eMig/0xY3QHtxZ+2taJrHRb3egQPDXwWi9GvQ/iIsdeH9kafctU0j8/pJmuF0/IOSbJyC1/je9V+vcn+pdfofF0xLo93RtWle2xXFHjc19kJVxSZl7p3xtUC7N7AR8R7P4FhqsVOhKnpOeVmkID/PDxne2dHvPScNcrtlrnbKpRwlvDy51AVRTk74uWtSqfZGvdiHa1sePl69GjUYzoUDz2ZP9GLo9pXbv8z0Yn14SSdK0fjb2vDMYPD3TFY9eps/pdttKn7LmDKPVj7G9nfrC3PhNf7ooM9sf4/o1Lq+5+2nLa6fElnyNRHpwfeCsy2B9DWtVAewGfZWoo219NK72NjOLoG8PKVQTf2rGO3T6V214a6PZjL3i8J/73QNdyt639b79yX1fnoDVDeGZgE4xs57qlTKFV/QEi397X2fVB5DUmyXTkmYFiK3dqRwWjdZ1IHH9zGBLeGo4aCq6WGCkBs/TpPkKfP1TiNlhnjcZJDE9riuY+3M3h9wqK5P9AjwrxR9MyJ6HVwiv33AKAbg2c9/3QA0dVDXIlJpcfTJblceQW7UG1qdoaVXe8/WDCoKYu739j25pyhqMpPz547cJuUMsamHm3/S3BjjSNLX5926ukcKRbg6poXjMC8x/r4dFCjDtcDY747fEe5S4srm+u7WFIchrbM97u7Wk50ra/q1kFX7aQdubdHbHAy+FCWvXDA657+pF87E2m9aRdiI+PBb0axyDhreGIiw62e0yfJtprmVBW5/gqWPhEL/xWoQ2LGTn7WR1JysRgB7sCynJWjauUemzUrwomyXSkZpTzpNStHeuU/v2N/2sl+/P/9UTxSrDSfbQaVAuVtddVkL/YX/OmgietSalcycwtwIELGSpEoz3+dk6etMLTj15nlYuuGtR6YtfkQYiLLv7QHt+vET68vZ3sz6EFyycUJ7zD7GyX/N2gF3NTR8r/WaIUb6YeH3htMB4yYEVRtfBAvD6yVaVER0c3Wxrc2rEObutUB1Nuain5Pr4+Fix+qrfbz+Wuz0Z3QO0o+xesJTrUrYK60dcuLD65y3nVq5FMuVH6z6wsV/+mShjRrrbqz6m2AF8fxNgZ3kLec9Q65NkKiyRyLMS/MbK13Uoje0OLtOSXR3ugVe1IdKhbBXMe6ur6DjpSpcK2/t6NnVe/f31vJ4ff+3vvBUnT2UW8T1ZsmUPK0Ee3SHJp6shWGNwiFvVjQtGiVgTOp13x6vG+vrcTHvhuW+nXa57rq1olQU2ZK9QWPtEbAz9YI+tj6smbN7tu1B4p0yr/A73qS5okpAX+vhYcmjoUFgANNDSxxlvPDXZeMdOgWhieG9wU7y497PVzRQb7I71CM/7/uHh+PWtUvTjhveo/ffHDxgT4+liQlVeI+TvOCY5MOYNbeFZx8+uj3fHRiqP492iKzBHZd2/3ei6PCQv0Q1Zeod3vhQQY63Ro46T+qBnp+OTdUbWnIxYL8M6t9nv7iFY2+eVM2abaARw641TDaqG4u5vr15TcRLQKUFtrN6oxzabitYe73nAwmKjsROa46GBZeur2aVLNbiWSlMSKVjirvtajyxWqY89ddn4t7Kwgo5dG20ssfEJ86wKz4FmCQYzpVg/VI4Iwrl+jch8GnurXtDqeGdgEC5/ohWevb4K4Ku6Vdr59i/QJehWlZOZ7fF97RH0I9GtazWXCQg2O+rAAQMTVZsfulgu3qm2/h0a3Bur1kvPWN/d1hq+PRXPTiLyNZlw/172X5Go0+suj3StNTnX6vApMwBWhWnggJgxqiqcGNsELw5pjw8T+okPSnE7x0aq+H9wkoXeI0s3jtfJOsv2lgU4TZGb2zq1tMLx1Tc2972vNu6Paan6q+M3txVSdxYR5vmAcFeKP6QattJZDv6bVUaeK5+9dzna6lOy2+eZe8/ZzmnJj+b6w1cON1T+tYhXfiZRsjx+rU/y1Cuj/E/ReY0+r2kyyq4VJMrLLx8eCpwY2RqvakXhiQGO3Tii71I/2aiXF30/ac2l9gswHt7WTlLAQqWQ13d0ttGMcrDAr0e9KCYNaxKJ3Y232jdBLs/CfH+mOJrHhaF9X+lYqPZ6QPdS7Pmbd0wmvjbC/ZclisWj+YtJBGzVJvPl9LJl2qoaO9eTZRpxX6PkAGq28dqtK3MpVsQG1GdzWKQ4zRmt7SitJUzJ1V23eJKCHtKxR2pqAymtWIxw+Pha3+yWWpYfemSKN7SnvBGitGeBlr8mhra71IIsODUDD6mGICvbH3d3qehsa6RCTZDqyZF+i6BAksUCdZEm8kwqpih5VaZpXWUpULSi59p2aLb2Cz+IgErm3yiolJSuv3NdmO7GyyjDLvUt995MSvj4WIa9FT3w7tjM6x1fB+H6NcX2LWNzTPV6V55W6dcwd3vy4vVnwULp/pRLc3YaoNe5UdoYH6WdbkFEp/Qqp5mHvq8xc+9uSlRbmxu+koyEqSmpRMwLv3NpG9sf96h7HvZHMQo5tZIF+ri9rtbKYoRUjJFRhm0XPClssa0cFY9eUQU4X4YpkOJ8mbWKSTEeOX8wSHYIkD/dpoEpT0kc0frGtxAmckm/Fjd3YlqrHi9+ymlYYuf6NycYpX6yQJFSTvQmXHwiqCHCmVa1I/PJoD0QqvEWvde3Icn1Nnh6oncm+t3SoY3dIgVT2JoqJVLF/nj167lUVExbgVmWnmgTkM3RB6curhU96lnhIy5G37YVUJS0gtOq3x3ugec0I3NBG3km4sRH6WGBUUslwEW9apDhL/OcXFi/eX3ZjQdgMPrqjvSyDDLTA3qXJ/MeuLRx1sjNEpuzC7ahO1wbgSW0zlCHhvIL0Sb9ngybkqErH3nQ+b6ZDli039cSA5rFercb7Sjybru7iOQL9fLB78iCP4zCT/s2q48zlHMnHD/byd0S09CvlT5K86YGhJXd0jpN0nJ/AJKe9i8L/a18bzw1uihl3ebcNSo/X4b8+1r3chKWbO9RxcrS6vM0XRQWrU6E5uKW0LRZ13OytaUZKvYaUTH74MANn15P9G8mWfLF3cakFn6o8pbRka/3kG1q4ONK+C+m5coZjSEq1LyjZfaJGBaKjvr0iOZsyPKCZ972stSA6NAChZXrf+vtayv1/392tXun5b/erPVMnDm1W+v2yE7K7N5TWU7WqFz0KSduYJNMRR1vC/jOocnN4bxNd3pKa6LLncYl9vFxVuFaPCFS8CkTvfHwsWPd8Pzw/pJlb27wcVZcUWvVRdlykkzjdNflGz07c1XQpq3yCctrNrWGxWDCuXyMM93J1Xs6fqqNR8nIL9PP1alFDScmZ4ioO3dG1vrST2SD/4n/n+3rEAwBiwgLxwrBmeLyvtquS1eToNbTx+CWvH7tpbLjXj2EPL1Lsszd5T6omFX5Wj2n0NXJDGzFbxapHBKG/B4mF6z2cFkzSOKt8vrtrXdzXIx5t48zT+Dw2oriYoH+z6vjxQcc9KNvGReEeCROi3SVi+WL/a0PQspb9JOWglrGl1yk9HCTBalxdWJCarG1RU3sJUZIHG1IYgL3V8XABJeuBfj7IK/S+F9kgmU4izqQ6H/1LxeSsrkjLYdmxSCEB+npLP/L6UARI6CGiNAsqJwdETr+rGx2C06nSKzuVIqW/izP2qpyVcLvECspxfRvh7SWH8ET/Rpi9IQEA8HAfbV78e0KOz19H1h69qNhje0svizNqczW1OiTAFzn59gdVNNfRhd9rI1pi8h/7FX+e2ztJe59xZnRXNgCXIirE36PzSWeJjeoRQXjlJvsDeOTm6HWlto0TByAlOw9RwQFCzrXUfmcuaQVTUl1cv0Lv6qAylWKOYpt2c2v8vuuc5Oe8nFMAi8W73q+kTeKvTkiyWxxsw+nbVN4pfVcKPHtz//XRHvjhgS5ePXfPRlUll0KHBGh7ohyRFJ42VtarmKtVH08PbKyJBBmgvUa+vz7aXXQIAIAn+nvXp0SNbS3vjWorebjAwBaxWDbhOvj5aOP3Tm7dGkirqCvhzvAbVwkXkaqwYtyuXBeTWt3ZpqpGn1lPqTVU5a1bWnv9GI7eE9n8uzy9L7h+cqe624DtGdQiFj4+FlQPD5J0rqWX6fTOlOyIKakofNbOTit7Pr2rfWkCu1+z6vjoDuk/P7V2HZD6jHmmaFD2RttOv72dV9PH7MnO82yqUes6kejdWN6EnTMRwdJPjAsN8OavFc5WUyM4Lc1tGyb1Fx2CqtrWiUL3BlUxol1t0aFoVnWNNHFuVVv721JiuNWu1Nir20ilinTjM1SOCwFPJuJK4WkFbf9m1fHfIdIuoogqJrjkLDbm+5g8tLJ43rJWpMd96+QwumtdfKyBRJ3aSl6jGbnFSdYqIdJeVze0qYU3/s+zJHhsRJBqVWStdXBOZiRMkulcCwf7rr1h0Un7a0c92uz5ZftZBSMxl2Y1HfeV6RyvzEWQkWm1H5VSqoQG4KeHu1Uqgy8hx+RUfbyDkRy82S6eInDKqyIU/MWXI3ErdVusWr65rzMe7yutB2qJsyZt48BKp8rc7dPWz8muD71PDNeC2IhAVaqXpbq/V31hzz1xaDO3ByD0aqRekYPSGlSzPyFViV8PNQd/Oeq1Rsow19UZqcbTD3ylJmClc0SvS1L/7Z1NRhTZx0nP/v1vPzx7fROvHsNR0smegXaqUrVi06QBXj8GL+fIGX8/Y75PZeZ6VgUuhVb/xdSeTFxSoaALLt4I3TlPK9TwdltROtaLxju3tJF8fNnJeURaM7xNTdyrQPN+QDufH0okUfNV3Kk0wcvrBHIPk2SkCKklrmU93KcBnhss39aHtnVYluqOJwY0lrTKopWtYN64nK2tC5246BBUC/eu58s/z/SRfGz7ulUwpKXnE3CVHBcexf5CpDC9DbiQypPG/b893kPScRoq0Chn7XP9RIegWa62nr00vLlKkUjjbDKhaCWTcSsa1amOV5+laps6Qp3G9d7yZOgLE7nFBrWI9fgzbsKgpnhVgeEGRv7JxFeVvkDtLSNcf+kJk2SkGS8Ma+6wRNYTs+7pJNtjmUHtqGA8P6SZy+O0XIUkVYNq6n2oqUXNbZtfKvjaMtv2UyKROtStIuk4rY65Z/WyY64ulEd1isOYbpUrR3o0dG8AhBxeGNYM218eqPrzSvX1vZ3t3m6xWLxe4JLTbZ3qoHfjGLvfCw/0wxiVhhx4y52hEiWy85WrpPWU3IPVpPhiTEePd/NEBvvj3h7xmvqdLqviALtgf198d3/lgXH5V4eW2OxsEx/RrhYA7fSvI+3i1YjOcRuhY8y4u0/KlDAj9M7QyoXV/Me0McXQXUb4HSDnQmU6gfz3v9qt9DFCwl8tL2qs6ojk8WDvyn2T5jzUTfU4YsICNb0dsWcj+4knACi0aqdOpmejGPzwQNdKAykOvz4Ee18dLCgq85o9tnICR0n7Xh2sqb5scgsLLP8ecaWgCEXWyhXUS/cnAQCOXcyq9L3rmhQnLgM1Ml2dtIu/ITpXpKEPZ9I/VyvPWt4O4Y7lB5JEhwCguKcJkZHFRXveWF8KT/O1Gyf1x8d3tpM1FtGUbDEQ5UELBdK+elVDcfSNoaLDQIGOJ5Cr3RfPmfyrW64f79uodCty/2bVNZ2AlAuvh5z3DHbHJY0OtamYAKwV6bwYIievSMlwAOj7vYucY5JMZ/o0Mc70kbKUbkb4cJ8Gij6+UVzfwnllxe/jpPWv0brkTG2eABCRe9yZclxWzchgw/Umq6dibxSz0lOVRs0oadX0Zbe411M4qe2I0sl0b7iqnB7VsY7T70uhRIKnbtUQHHhtMGbe3VH2x9aiAvYkc3uipSNd66u/5VqK+jGh6FWmqjPWQZLs0euKJ8+GByn/GV8zUt4kudZ6RZoZk2Q6Y29/tRGMbFdb0ccf0ko/jVVFcvUBWzeaF2FEavhyjDkubIhIGTFh7vUVCg/0w6KneisUjWONq4dp9qIcACYNdd6rtXpEEB6ys23VHXZ2jMkiJMAPATrcVnZH5zi371MrSjsVfSKEBspXLTjlphayPZacQgP98L8Hu5Z+/VBv+wUQ8VWLk+46WtMoNaRVDQTbuRZ7+5bWAqIxN/29c5qcuyc9Rid1Ep7U5sRG89Ed7USHQBLpJf39+shWokMwhUEta3g04YuIyBP+fj4IFdBS4Y4udTXb59LPx4IHHVyIl6WVgTONqss3/EqkyTfqYwqnFAskThD21o6Xr5ftsZrViEBNF1sZRajYNzkq2B/5hZXPniOCi4+LCKp8jeh39bWq5eRxyXCBsrRcbWtU2v0NIbsqZsUbxGizsqfkDUppji7YJ7pY+TOLEQpX6JF89NLX4G4709DktuRp9asZynrkOm1sz/7lUXHbmwNl2rahN1pqwk3OzX/MGNv/taJlLW1OMNUD0ZPy/hrfC/tfHYz2EhaEpS4ui6TVpKknpPxMvPXUgMam6DvXt2n1SrelX8mvdNv1LWLx0R3tMNBOC5kBzarjsb4NMax1TUVilIO9ZDeLZNTHJJnOlG3KOKx1Dc1OcFTrA662g/Lqkv3opD5H48fJOTbGvqZmhNhtE4NaaGN7dru4KGHPrZeKQXsrxd5I0WjDYqqsYz1zVogrYebdHfH2LW0cft9ISQt36CVp3rpOpJAqQBJvweM98IwCfZ1Hti+/yG6vuklt9t6HLKh8m7+vD0a0q223wjM00A/PD2mGcJnPHeQ0prvyi9HkGpNkOmO0RsPesmq0R5sRTygfva4ht3+RKeilqk8ph6YO0fQqa1k+Mr/X5haY+2evJDm3t9zSgVXSchrSqobTnk7uVB+1rxvl1nPLMZHv+SHN8Pu4nl4/TkWxEdKqN6QsWF/K1kYCvpAN7g1FqUq15wY1Lf17vegQfHRHe0WexysWoKfghXkpk0Af6OVez0J7iT9SH5NkpJh6VZXfP63V6psPb2/n0f1+eKAL/jukqesDBZg4tJmkyV6RKm219ca9XKUhckiPzW5J+xpWC0O1cO+3jEQG++P929p5HxBJFipxgdYC4MsxnUq/vrNLXafH144Kxq0yTId8rG9DoZW3ozrWwf09nV8Ia6VfmJwN3sm4yi5ALXm6j8BIHOtQt0rpjqKK/crUEmRnq3WX+tHlvh7YPBbvjWpb+vW4vs53O9lbUPJ0kjd5jkkyndFCuauWBNgppT00dYiASMob3LLyPngpejeuhloyjxN2l70quLu7OT/RLeu5wdeSfC/foM0JOQ01crJK5EqQPz+myRh8fSyyjLdf81xf74Mht9SQ2MR720sDUS08EGOu9q7MzC1wevwLw5obYpugxWJxubPBXoPxEv5+6q1MzLirg2rPRcYQfDUR9ECv+po6JwnSQO/Uvk2qVbqtj53qtrKLAc8NudY3e+bd0iaZsyeZ+rTzm06SqNEAMjWnchNETxQJ6uWghTdNo3l9pPTRw/WqhqJBteKBEgHcnulQRJD+LwyUEhHs51ZiVm5a2sW94HH5txCR8fRrWvlEXS49GlZV7LE9odUKcgKqXr2Qe2FYcyx5ujeut9M4uyybbuY6ey/hUrbD79WMDJYlgSxFp/hoxIRp6zV0l4uKQynOp12RIRJ9UWKLcVkVdwS9fEMLHJo6VNHnVMJ9PeJxc3tltuhbLBY80qf8sKfH+zaSfH9PiypIeUyS6VBctLKVRmEyrepZ3UiS5RUWyfKc5D1fGfZafTe2C+Y/1gOtakfKEJExbXphgNv3WfSk2KmParFYLBjfr7Gw578s00KBHJrX5MQ5NUWHan+7uD39m1We+iWX54dwWjS5JzjAF81qRGBEu9rYNMnxZ11Kpjb6dKlh+6nLTr8/oLl7F8uhAb6YOLQZHvVgGrNc5/lyefNm6QuxdI3SW4y/va8zfnywq6LPoYZXbmqJDzxsgyPFs4PKt8nx8bGUS0QHOKkUrdjGJkIHLWvMgkkyHSrb70EJIpqzF2mpdEMGWhjF/FBv9xpFlnhxeHO08PLCPC46BB3rVUH7ulVw7I2hXj+ekdzXIx5Na4RXGsJhk/AaqCLjBXx2fqFsj2U0IXZ6TJA5XNdEuWSTXrE6m4DKfXakqhEZ5PB8REqfU5Fa1tLmQt+393XGz492x6PXNcTEoe5XoGnhHNVMvrlP2es2pTSoFoaejTix3hV7PcQyrlw7x3anV7PcE7vJc0yS6ZA7U4ZESr/ivBeFHOyN99W7w0mZsjxO3aqhHt3v3h7xWPSUfBVLfr4+GOSinLhLvGcn33o05cYWdivCUrLUrV6qG638YA296qWxk0I9DMMwCiNOJjYCUU2Z6Rp7PWClipUw/VGLpmm0wqlfs+qaTeBRZf2bxeLD29u6PtANn96lwWmTAlXRWGP7krYzZf13SFMMa11DQDTkCeNlGExE6wmiAhV6kpVcPEoZ0+1pZZXakjJyRYcgu3H9GuHR6xxPc/n50e4qRiOWxWKxeyEupZLMTMIE9mzz09h765/j2ZeMnPN0UURtgXZW3KX455nrcH/P+qWTzEhfhrWuid6NY/Ds9U1EhyJZ7ahgt5J7A5qzClVO8x/rIToEWf1fe++nuJaYfns73NCGg9xKPNynAd69tY3QGHpfbdZfP6b4s/i+HvGl3/PzKf7ce7xvI3w22nGj/vdHyZtIJe9o60qAJIkJC8RNbWtiXD/pjQHdcTlHngqwWhKnIanFWZLGiOpU0c7FhL+vDyYObYYTbw4THYpmqb2lqW2dKFWfz11BHl5MG1E9nSRASBx707S0qG/T6h4lumyw4eUbmmM1J1sK402T/VpRwfjhga54vF8jjOooX7JAS9rUjnL6/fZ1nX+fyutYT/lBZVr1i4uF45EKNaGXatJQsX0qK15fvjCsOTrWE7sj5UJ6cYHDrHuKt9be0aVuaasZqbsBbnHw3qi1QRtmwasQHfL39cHHd3ZAk9hwRR6/UbUwWR7n7qsjwEl9vRvH2B1L7A4lGrv6+FhKx0fPe7gbFj/VG3+N7yX78+iRlGadco6AHtXJmBcqRGakZG+n0ED5EvhB/r6Sq7rDK3wGWSwWzVfQG5kcvXJ8fSxoWP3aOaaUXQCiuNu301X1c8taznuzsprc+G7vFOfymDf+rxU6x0dj64sDse/VwZW+HxEsfuiC6CSdjwbbIpRc20SX2faZK9NQuq0vDpTlccg9PNugSuQ61x7bUx/bGwGgqUIJR1EaVgvTbEPcb+/rgqkjWqJrg6poXjMCreuI76sREeSPHg2rCo0hRkI/BUcXiJ5UZlgsFt30NyQCKidNSB11qqjbv/CZgU3QsV6V0mSKxcJmxlrg6VbZitpcnXo9dWQrXN9Cu/153J3q6utjwTu3eL7lKyuPw3SM7m0JWwJHdy0uMKgWHlhusbpLfDSeHtgYy565TrH4pBLRJ7VpDW1fpxUWVU5yS/3srGln59VX91wb9qDV6zmjY5KMFKO1BsjBTibW2ZtMQsro3rAqxnSPFx1GOQF+PpjzUDehMXRvWBW/Pd4DDWLc31r3oYKjrUXx4UlBOb+PU68vmVb/7a9v4XwACOlb/2bFPZ3a143C/Md64JM722NMt3o4+vpQTtjUAFfVzq+NaCnpcXo0isGhqUMwpls9zZ0nlnVnl7pu32d4m5oKREJm1qh6GOJjQvDzo93x9MAmuh2C4Y260SFYbGfglZaUJPHKvqcF+Ep7f3tpeItK04MH8nxHOGYGyBDKjtp1JCTAfhXCfT3inSbQ1CZHxX2eTCW+ahrbM150CEJZLBZ0qFsF7etWufq19PvKtcKvJT4+Frzxf61Eh6EZ7eKiVHmeR/o00OyWtqcGNhYdAimoZOtdyaJVXHQIpo5spblBGmbl6iPpHjcWv7Se9Gym8aoV0q9+Td1rhfLLI93x22PaGt4T4OsDtfPbWtxiWdbrI1vh27Gdy1XZPTWgCYa0jEW4i63Yw9vUxM+PmGeAmV7wzIN0ze9qlt6bpvwN7Yzp1Ts5JoCpXfo/5UZpq9BGN/mGFpj7cDf4arSah4ytvgeVjGrhAANju61THKaOLO7HQ9qj1eS5EqwerlY6q4wb3prTCN01rLV2t+N66tuxXRx+z96vT5XQgHJ9rrTAx8eialLnnu7a73EdHuSPfk3LT7htXScSM8d0crjQ42rx84sxHbHgcWNNedUT83zikSH5+/pg95RBeLRvA9GhaIoc+9c5iUl+Tw9wXQkTGeKPbg3U6Y+WmavtHig5efqriCQiz4QF+ml+C56Z3dsjXnQIqmlVy7NeqUH+vpjzUFe73+suuO8pad9sJwk0s9ry4gA82Lv8NV6dKt4XAog258GumDG6g9NjBresUbq7hNTHJBnpXmSwPywuNwI4Fuin7bJ/d/VrWg03teWKpRY9fX0T0SGUE19V3Ybc7lp+MEl0CKbx1s2tRYdARBoWFx1imm2Ir0rsr2ZPj4YxMkZifJ3qMQkAABsn9UcfL6fSq0mtLdP+PpVTFW/+X2u8epO+d5/0aBQjy64fUg6TZGR6wwzWaPXbsV0QFy1f8uOZgdpK7OhNyVhoTz3Rv5HLY9QaPOEvsQmpXAqtMjToI0lCr07RCuEESd25vVOc6BBIY/5+spfoEHTN262lzWtGyBSJ8f3vQfuVd2bSLi4KNSP1lTBpWSsCrWuLmU7foFqYqSpbSQwmycjUGlUPKzdiWQsu5+SLDqGch/rUFx2CrtlbBXPHs4OaOvxeVIg/PritrWqr++uf76/K85B970gYH++p61vEYuqIlhikwESlj+5oJ/tj0jXeDrjg1npjiasSjJYebhf0lFqDRfTiu7Gd0YPbKyXR+hAHJdxVYXLqF2M6CorEcxaLBUNaKd8zzoy/H6QNTJIZ2Ph+ritQjKKkBVfJCHk9K/KieobbLLXpyf6N8OKw5rI/bmiAH27uUMfjHnTujrivbsLR41pym4IVQ0H+vhjTPV6RE9IR7Wp7dD9Rrana1BGzOu4pb6c/vnxDC5kiIS1Y/Vw/xR67U7z9rXHzHumm2HOK4G1fvOoRQejdWD9b50hdb1ZobxCr03MrT7fK3tlF2rnM4deHIDiASTISg0kyA3tqoOsm4Ubh7+uDXx7tjukeVixoqU/wvW6MUa/oDokfPKSMke2Kk5SdK1xITBjUFA/10d5wiQd7N2AFAGlWeNC1UepNYsMERmJsISpfhLQStEVHDgWFVtEhOBVfNUTRwQeThjbHn+N7YssLA8rdbrTernJP8pSzJcK7ClYUa82G45dEh6CYMd20P7HRla4NquLYG0MR4ObrpW9TaQUNRntfIX1hkowU9eODXVUbE9w5PhoRZS6q7HmgV/mtg23rRAEAxvdrjDYaOXFv7UUVQ+f4aBkjIXeN798I/9e+Nt4f1a70trE94xV7vhqR3q8+zrqnkwyRGIca2wfIfWr13dObprHeb7VuEKNuArJTfDQe79tQ1eeUS0ZugegQnFKyigwo7l3Ypk4Uq4olKJur/ErGz9lRJupBmJaj7debN6aObIWoEH/d74Dx8/XBeAm9c4n0hmedpKiejWLQpb52EjcV+49NvrF4m0mvxjH48wl9N7qtUyVY9tVPck+j6uH48PZ28Lva4H5g81g8I8NEy9dHtrI7yUeOoQrVwgO9fgwjmfZ/5pvyeEuHOoo/xwvDmnl1//oqJ3LMREQCUu3qNbmEBmirh6lIJf0La8qwWGNEDaoVv2dNv72drqYWknpWPtsXn43uIDoMrz05wDw7l8g8eEVNphIaeO3EvFO9KogMdl55RlTRR3e0w11dnffyKklWtqgV4XG/sLLu7lbP7iSfmPAArx+b6JaOnvUMc8fDfTyvHGpfNwr3K1iRqWcXs/JEh+CRWztWrobRwyzbOtH6mkCnpBmjO2DLCwPw95O9RYdS6nMNJRz6N6uOnx/pzl6xKtFjsik6NICN6Yk0ikkyMpXbO11Lbnw7trPASEhPXr6hBWaP7YwlT/fGiHa18Z+rEyfbxkXhfw9UHl9eLTwQcx7siseu8zwx0MFBQ9T/Drk27VJrk1mJnOnewLNpbwse74n2dT1rEEzSLFS5krpGZBDuq5D456KVvvj7+qB6RBCiQ7WzWDO0dU3RIZTy9bGgS/1o+Gip6a2BDdPQz56cs4AVqKR9TJKRqUSG+GPWPZ1wfYvYck2h9eiHB7qIDsFU+jatjmY1IsrdViXEH70ax2DDxP745M725b7Xo1GMV1N5pt/eDrPtJHIf73ut90OdKiEePz7p0x2d43Bvd302/P3fg5UTyhWxQb97ylZHe6NWlPoVUk1rlO+npod2Adc3jxUdApFh1KvKcxijqHgO7IzVZsPGSQMw7+HKU3FLtnETiab9MxIimV3fIlbTzcqdlV7Hlzmh6N24Gka0Yxm/FtSKCsaNMm+pqBUVLHkCkLciglmRphdv3dIGj3hRoSiSr48FsRHOe+D1U+l33ijeGKnfHnq3dYpDy1oRrg/UEC1VKpEywoPEfB42qBaGxtXNtUgg50ADEsuTc+CudqrLBzaPRe/GMejoYDcFkVqYJCNNqC1gFVsp7o5CrsjelpPmNSNwctqwSpOr3r7FPKPASTlLnuojOgSiStrGRYkOQfP03BDc18diuqQAaV/zGmISt2GBfi6rbUN1OvDCnjZ1ItHYxXTeYPbrMqSG1Zy/7397X2fMtVNlRqQmlg+QJuh962NZSq1CljSA79mwaumJRdmqM04plFdcdDDOpF6x+72SaXBd63vWY8lbX47piIMXMmV7PBFbrYic+fD2tujeIAYHL2SIDoVU0KV+NHuSEZnIBAmTvx+5rgGmLz+qQjQkh+jQAKRm5zs95u5udV0mR/10sPWejI9JMiKd+d+DXe1OTHzz//S77UaLooIDcAb2k2RhgX5Y93w/1IwUk1wa1LIGBrWsIeS5jY6TppSVlCFtGmPtqBDUiAxCDSfNfc2+MBAhaFuYEt65pQ182eCcSBXpVwqEPv+j1zWU1E7iqQGNmSTTkV8e7Y4B769xekyrWpEqRUPkHaZqicqoqqEpTWWVvRismCB7f1Rb3NOtHprX1FdvF62rFVV8cV4jMgi9GsVU+n6dKiG8qFOYt1uX3TXnwa5eDVsg1+R8zbjTKNiI7uhS1/VBEgX583SQyCwaVg8VHYIk9haESbtcbaMk0hOeFRHpwJQbWzj83i0d6+C1ka1UjMYcXr6hBV69qSU2TRpQaQobqSNG5Uqh7PwiVZ9PK/x81DsVkHM7enxVfVzo6UFIgB8e6FVfdBhEwvVqXHlRjIikqxUZhC/HdPTovoFcsCGNME6tPpGBcXVGfXWqhODeHvGiwzC1mk622pF82teNUu25okMDkJYjdqsP2RdhoN6gRJ4ID/LDuH6NRIdBpFsLn+iF8CA/j1tXsHqQtEKxdG1qaipGjx6NiIgIREVF4YEHHkBWVpbT45944gk0bdoUwcHBqFu3Lp588kmkp6crFSKpzEj9U8zAx8QfVDPu6iA6BBJAqaEbWjakZQ34s0kuCeKrYhUjkSs1I4M03UYhKkSbLUGISrSqHYl6rPImA1Ds7GT06NHYv38/li1bhoULF2Lt2rV4+OGHHR5//vx5nD9/Hu+99x727duH2bNnY8mSJXjggQeUCpFU9O3YzpjzkPbH+d4nQ+XQ6G71vA+kjLhoMc3hH+/bEM1qhKveF0q0AD8fDG9TU3QYpLIG1ULRJT5adBhEpvLIdQ0wpGUsakaxapTIlXdHtZHlcbo3EDOZWy61o4IRE8aEoRH8Ma6n6BCI7FJk2fzgwYNYsmQJtm7dik6dOgEAPvnkEwwbNgzvvfceatWqVek+rVq1wvz580u/btiwId544w3cfffdKCwshJ+f+Vb4jaSfhCk2WtCvWXW8v+yIV4/xzMDGMkVTbObdnu3r99aA5rEY0DxWyHOT+v4a3ws3frpOdBjC9G9aHT4ariAgqig7r1B0CF5rEhuOmWM6iQ6DSPMigvzQo6E8/dIe69sI646lYNOJVFkeT20xYQHIK7QiJStfdCjkpkbVy7ePaRsXJSYQIhcUKRHZuHEjoqKiShNkADBw4ED4+Phg8+bNkh8nPT0dERERThNkeXl5yMjIKPeHSCS599MH+pmrkku0WzrUER2CEK6mXVULU7eJPpkPKwPcU69qiOgQiEhGkcHq9AX09bGgVpSYXQpkXh/f2R6dWLFPOqHI1XdiYiKqVy9fOeTn54fo6GgkJiZKeoyUlBRMnTrV6RZNAJg2bRoiIyNL/8TFxXkcN1FZvj6Wcics8QIuSN4f1ZZN+1W09cWBeG1ES9FhaNKCcT1Eh0AG9tU9nfD9/V1Fh6ErnjZGJiJtCvL3xdKn+4gOQ1O61GdSxSjM2PeV9MutJNnEiRNhsVic/jl06JDXQWVkZGD48OFo0aIFXnnlFafHTpo0Cenp6aV/zpw54/XzEwHFCap/n+9X+vXKZ/uqHsMtHetw0ouKqoUHsom5A3WqGKNqpWcj+71YbmpXuQ0AqWdgi1i0qBUhOgwA+pgm3KFuFEa0qy06DCJDeaRPQ9EhOJSRK+/26svZ+tiqOPehbuhaIVEWwB0WRKQwt1K6zz77LO677z6nxzRo0AA1atRAcnJyudsLCwuRmpqKGjVqOL1/ZmYmhgwZgvDwcCxYsAD+/s5LjwMDAxEYyG1AJD8/Xwsigq79/qnZr+jmDrVNPV1SaXL19SD9effWtjicmImxs7cCALa8MABVwwI1PdGsohoRQWhfNwo7T6eJDsWQXhzeHAt2nhMdhlPPDmqq2tYsIjNoXjMCt3TUbruFBtXknRhotcn6cIrx8bGg4unwc4ObYfIf+8QERLK7t3s9fLfxlOgwiMpxK0lWrVo1VKtWzeVx3bt3R1paGrZv346OHYubjq9cuRJWqxVduzreTpGRkYHBgwcjMDAQf/75J4KCOO2IzOmD29qJDsGw5j/WvVLjUDK+sEA/ZOUVIsDPB/2aXWsHUD1Cf58zPj4WTLi+CcZ8vUV0KKqrFRmE8+m5ij5HTFgggv19caWgSNHnIXLmuibVsObIRcnHP3pdAwWjIdEGt3ReZGAmFbdgXtfE9bUpqcudfsqvjmiFRtXD8PIf++HLAgHSCEXqVZs3b44hQ4bgoYcewpYtW7B+/XqMHz8ed9xxR+lky3PnzqFZs2bYsqX4JD8jIwODBg1CdnY2vv76a2RkZCAxMRGJiYkoKuKJKhERee7xfg3x7djOiCkzgEDulXk1Zcq09aazzvq9/PY4x8WTOdzXI96t4ycOba5MICaRmVsgOgSHBjavjv8Maio6DNlc30Leifcv39BC1scj70WFBGD+Y9J72d7YthbG9oxH36ZMeJI2KLap+8cff0SzZs0wYMAADBs2DL169cKXX35Z+v2CggIcPnwYOTk5AIAdO3Zg8+bN2Lt3Lxo1aoSaNWuW/mGfMSIi8saQljXQr+m1E/MfH+yKr+/tLDAi7+QVyrN4NNbNC3HRakTqr/JPbx5hRZJTtVT6HawWHoghrB5STbCGB2G8c2tbXbUEcOanh7qhYz15F2dYfKRNHetVwdE3hkr63Y0KCcCUG1siNJDN/UkbFEuSRUdHY86cOcjMzER6ejq++eYbhIVd2+IUHx8Pm82Gvn37AgD69u0Lm81m9098fLxSYZIdvRszi09kNoF+voiLNuZI+GGta6BBhWbsPRvFoH6MfivJ5KJmr0U9MfNWy24N7A+3oGJfjOmk+HMMbVUDrWpH4oPb2yr+XFTsw9vbiQ7BruY1IxAdGiA6DKGqhVdOTJv5PVpP/H198NSAxqLDIHIbx4NQOf6+FtzZJU6xxzfKShjpV5CGV4tF8vWx4I9xvUSHQRL5+Xj/8c0kIZH7WteJVPw56lYtniYcEsCqCiWFB/rhyf6NsHzCdWhVW/mfK3mmRkTlAW1RHFyiG//Xvjb6Na2GDnFVRIdCJBmTZFROeJA/LArWLb82oqVijy23xtXDRYdACgjlRYdDbJiqH/2bVfd6e9CchxwP0tGy8CBzvob7lenVItd2WyKjcetjzAJMGNRUU8N8YiMCERXij+GtawIAIoPN+X5XVpPYyufj4UFMkulFXHQIvh3bBZEh/JmRfjBJRuRA0xpMkpG5RIb448a2tUSHQRKEBvrhv0M8b+Q8aWgz1IzU3vbaWlHFMYUEOE4AlgwtKLLZVIlJi+Qa3EBkNGuf6yc6BK9EhQRg1+RB+PSu9nigV31MvkE/i8tKuaVDHax49joAQOjVz4ayU6qJiOTGJBkREZUa3pqNos1gZPvaokOw6/bOcfj2vs5oWSvC5bE2BZNk7eKiFHtsT6jdk2hUxzqqPh+V14e9YT0WFx0iOgRZWCwWvHxDC7SQ8F5odD4+FjSo0B7g/p7xYoIhIlNgkoyIiIiEeH9UW3w5pmPp10H+vujXrLqi2/6l+Gx0B6HPX9GSp3or/hxRV7fCBPr5lCblnh/STPHnpfL6N6uOno1iSr/uHM8+PkQVif6MICJj40Z3UtXl7HzRIRCRm2bdo/w0NzKnWzRasVSy7VMrqkdUnu4mt1Gd6sDP14Ku9ati7ZEUAED7ulGKPy+VV3G+0Xf3d0GLyUud3qcDf05kYnUNUj1IRNrBSjJSlVHK4InM5PoWsaJD8Er/ZvqOnwhQvnIiJMAPo7vW4xRqjZEy4XL2/V1UiEQ/5j7cTXQIpJDs/MpDS/x9eTlLRPLiuwqp6oY2xm4K7u/LiwsiLYkODcCtGq1WInLHQ70biA6BSBe6NagqOgSSmb1FgtiIQK+nPBMR2cMkGanK6CvUDatpZ4w4EQFv/l9r0SEQyaJ7w6qYOoKT7ohIvwL85Lv0/PXRHvj7yV6yPR4RUQn2JDMwPx8LqoYG4JLG+4A9OaCx6BBKVQsPFB0CEcloSCtO6zSqmDDzvV8Pa10Te86mo2/T6oo+z80damPLyVQ0jQ1X9HmIyDym3NgC7b2YHNytQTSGta5Z+jVbuBCRUlhJZmAWiwXv3dZWdBguTbi+iegQSsWq0ByZSMt8ODGKNK721ab6QSbcZlM1LBDvjmqLyGB/RZ+nSWw4fnu8B6pcnXJJpDejJGyzb1Sd1f9qurNLXfh4saNk7sPdcU/3ePkCIiJygEkyIiIq1atxTLmvN78wQFAknjs5bRgaVw/DnV3iRIdCCvjhgS745dHuosMgUkRBkU10CIbw7ijXi8TvSziGiIjMh0kyIiIqFRLgh/t6xJd+HRWibMWK3CKD/WGxWLDoqd7sR2ZQDaqFoXN8tOgwiBQx5cYWlW77+REmheUWFeKPBuwjS0REdjBJZnRckFTMTw9xxDgZU8mWy071qiBAZ6PV/32+H4DikfD2pmFRsSIrPxyItCa+aojdxE2X+tFY9kwfARERERGZj76ufshtWm/arzdlWyk0qBYqLhAiBd3drS6ubxGL7x/oortEk96Set7oWr+qx/dliozIc53qVVH9OauzZyoREZEqzHM1YVJ1qgSLDkH3yo6rZvWY/lUJYSNqVxpUC8OsezohJEDbA5DrVQ3BR3e0Ex2GMC1qRWBwy1jRYRCZznf3d1H9OYP8ecpORESkBm1fAZHXutaPRo+GVbHh+CXRoejWL490R/qVAtFhkAx2Tb4ekTrrsUXO3dCmFuKrhmLFoWR8vOIofL2YnEVEJEVooPqnz4F+9qe5PtCrPsIFxKN3aTk8ryMiIvv4qWpwFosFtaNYTeaNtnFRokMgmeht6yC55utjQdu4KLSuHYkx3erB30TbLYmIHu7TgJ9tDjSvGY6DFzJRPTwQyZl5osMhIiKd4NWECWTlFYoOgYjIa86qxHx8LKgWHqhiNEREpGWLnuyN5wY3xYzRHUSHYnq9G8eYqmcoEekbK8lM4EpBkegQiIi89se4nrDabLjp0/UAoPmeaaRvTWPDcTgpU3QYROQhi8WCcf0aiQ6DAPzwQFfRIRARScaUvglorQi/W4No0SEQkQ61rBWBNnWisPCJXujTOAavjWgpOiTh4qqEuDxmVMc6lW6rGsoBFq58dW8n0SEQuRQSYL9XGREREXmGSTJSXfOaEaJDICIda1U7Et8/0BWd45lwf2JAY3x6V3unxzw5oHG5r98f1RZB/rywdiUu2nUCkkhN8x4uP2H7t8d7IDyIw2iIiIjkxCQZERGRTkUG++OGNrVEh0FEMqjiosKz4hbzUG45l2zjpP6iQyAiIp1gkoxU16dxNdEhEBGZylMVqsmISHumjmglOgTDqhlZftJ7g2qhgiIxn3+e6SM6BCIitzBJRqrr16y66BAU07FeFdEhEBFV8uh1DUWHQEQutKod6fT7l7LzVIrE+D7jxEvVNIkNFx0CEZFbWKdNVMFdXeqiekSgR/edOLSZzNEQEQCM79cIFovWxpAQEanHZhMdgTHEhAWiWQ32xyUiIvtYSUZUwZs3t8bTA5u4PC7CTrPcsEDmnb1RKzJIdAikUeP6NRIdgi7Meahr6d/jq4agWY1wxIR5lvQnbYgI4ucKFYsI5u8CERGR0pgkI/JQgF/llw8rXbyz7nk21iXyRo+GMaV/7xwfjSVP90FwACdZ6tm8R7qLDoE0okPdKpjzYFcMbVVDdChERESGxSQZEWmGjw+TjESe8Pfla8eIQgJ80bwmt4VRMYvFgh6NYhAZXLmSnUiL5j3cTXQIRERuY5KMiHSjW4No0SEQadKUG1tycIgCnpGw9Z5IDuFs10AG1LVBVdEhEBG5jUkyIi+0vjqJirsslfPRHe1K//7F3Z2w8Ile4oIhYfgac+7ubvUw/7EeAIDlE/oIjsY4nhrYWHQIZAKvjWiJr+/rLPn4QDvtHki6e7rXEx2CcFl5haJDICLSLC5bUTks4XdPye7AGXd1QJGVY6eUMKJdbcSEBaKgyIrIEH9EhkQiPMgPmbk8wTO6OlWCAQD/GdQEQf7sq0VExnRP93i3jn+wdwPkFVpRPyZUmYAMKq5KMG7rFIfx/TkIJjO3QHQIRESaxSQZlfPWza1Fh6BpDaqFlf69bPusRtXD0CQ2XEBE5tCzUYzrg8hwbu8ch4bVwtCjIbdrEJExTR3R0u37xEWH4K1b2igQjbGt/W8/Dli6yt9X+WrExtXDXB9ERKRBrNemctg7wLlq4YFIeGs4Et4azhMtHWlWgwlMPQr080XPRjF8rbmpWlgQIoL8MLxNTdGhEJELY9ysIiPP8bPkGt+rK70ta5UfDNIlXr7er9+4sYWYiEhLmCQjIt0Z3LIGAOmrlFVCApQMh0hTIkP8seeVwejbtLroUIiISIOeub4J2teNwq0d65S73U+mScnxVUMQFx0iy2MREamNSTIi0p2pI1ph9X/6ok+TapKOrxLKXntEREREANCvaXUseLwnfFhdR0RUCZNkRKQ7wQG+iHejYfHzQ5rhif6NEBHENoxEROS9JU/3Fh0CERERKYBJMiIynIoLo/WqhuLZQU3Zj4RMy1+mLTRy+vmR7qJDIPKYo8mS7eKiJD/GTw91kykaIiIikguTZESkW1XDXPcau7lDbRUiIdI2P18fvHqT+1P0lNSUAzXIgG7vHIeT04ZJOrY7J/eSwbzmwbRWIiKtYZKMiDQrLND59sgx3eqhQbXi1fxgf1+7x3xwWzu5wyLSJR8NFJM1cGObNAEj2tUSHQJ5wGKxYPZYTvYjc3mkTwPc1ilOdBhERF5jkoyINMvV+PDwIH+8dlMrAMCX93RUIyQi3WpaIwIA0FDiVFglvDailbDn1pt2cVF49Sb+e+lNZHDxoJi+TaujTZ3Ict8bcnUyM5FWedOV4okBjRHox0tLItI/vpMReWH32XTRIRhal/rRLo/p1TgGe18ZhJa1Il0eS2RmXepHY/+rg93qmSS3WlHB6GGQLWYv39BC0cevERGEAF5w6k7/ZtVL//7HuJ6lf29dOxJx0cEiQiJy6a2bW6NhtVA82KuBLI83rHVNWR6HiEgEnn0Rke6FB/mLDoFIF0JdbGEm6R7oVR+1ozxLevhpYe8rye6HB7ogqMzWfw6LIb0I8vfFimf7ol+ZJK9UneOrlP7dYrFgx8vX4z+DmsoZHhGRqpgkIyJDKNuTjJclRKRls+7tJDoEUkCQg96YRGYSHRoAHy4EEJGOMUlGRIYQHOCLY28MRY2IIDw9sAmqhgYgNiJQdFhEVEH81eb9ermGeml480q3LX6qt4BISOtEbmUmkkvZyjBHYsJ4fkVExsUkGRFpUsd6rk/SKvLz9cH6if3x5IDG+H1cT/z2eE/XdyIiVT03qCk+vau9brZJP9i7co+e5jUjBERCWjamWz34+1Y+rf7t8R4AgKgQffy+k7nUjAwCAFQLv5b0+uXRHi7vV3aR48a2xVN4A+z8/hMR6RGbkxDJwGYTHYHxfHJne4/u53v1zC0uOkTOcIhIJlVCA3BDm1qiwyDyip+PD2pEBCExI9fpce3jovBInwa4pWMd7DqTpk5wRBINbB6L38f1RNs6ng8/uqd7PG7rFMdBI0RkGHw3IyFKVp2MIjU7X3QIhtKtQTRqedgQm4jICOpVZaJfy3x9LNj0wgCXx1ksFkwa1hxNYsNxS4c6aF83SvngiCTy8bGgXVyU10Mm2I+PiIyElWQkRLTBth0E+TPfLJclT/dGNfa6ICKT+ve//XDwQga6N6wqOhSSma+PBY2qhWHn6TR8f38X0eEQeSwsyA/JmXmiwyAiUgSv7EmIR/s2FB2CrNrWiRIdgmE0qxGBqkySEZGB3NklDuFBxeuS8VVD0LyG455mAX4+GNSyhm56tlGx5Ezn2y5LtKhV/LNvHBumZDhEinpjZGvRIRARKYaVZCREzUhjbaXjqGsiIvOpFh6Ic2lXKt0+4fomuL5FLPadS0fLWpFoUSsCB85nYPfZdCx+qg+CA3wxdWQrvPz7vkr3DQvkqZke9WtaXdJxY7rVQ89GMYY7DyJzaFsnEt/c15mLmURkaKwkIyIiIvLAtJsdV1M0rxmBUZ3iSiuHKhrTrZ7d2wPZ/Fp3Av18cEeXupKO9fP1QZPYcIUjIlLGA70blCbIPritLaaOaCk4IiIi+fFMjIiIiEiisheFzWtGSG7EXpIsc9Uf29sG2qS+u7pKS5AR6V1wmQb9N3eogzHd48UFQ0SkENb0ExEREUnk6UXhxKHNMax1TadT4La8MAC+3L6vK9tfGoiokADRYRAp6uBrQ/DztjO4rkk10aEQESmOlWRUqkv9aNEh6M5dXesiMpjNlYmIyLnIYH/0blz+ArN2VPm+VNUjgtQMiWRQNSyQiU0ynIoFrcEBvri3RzwCuB2ciEyA73RUataYTqJD0J3XR7TChon9RYdBREQaUjH55cjK/1xX+vc/x/dUKhwiIrf8Nb6X6BCIiIThdksqFRnCiih3+fhYEMpJZEREdNWojnUwol0tSccG+vmibVwkfCwWtKkTpWxgREQStaodKToEIiJheHVPREREJJNejWPg5yu9UH/ew90VjIaIiIiI3KHYdsvU1FSMHj0aERERiIqKwgMPPICsrCxJ97XZbBg6dCgsFgt+//13pUKkMqqwioyIiMgrD/Sqj0Etarh1nyB/X6fN/ImIRBreuqboEIiIVKVYJdno0aNx4cIFLFu2DAUFBRg7diwefvhhzJkzx+V9p0+fzhHoKvuTvQeIiIg89u19ndGvWXXRYRARyWrG6A6iQyAiUpUilWQHDx7EkiVL8NVXX6Fr167o1asXPvnkE8ydOxfnz593et9du3bh/fffxzfffKNEaGTH+on9ERcdIjoMIiIi3SmZDN2jUVXBkRARERGRtxRJkm3cuBFRUVHo1OnatMSBAwfCx8cHmzdvdni/nJwc3HXXXZgxYwZq1HBvuwJ5TuoULiIiIipvwvVNsHxCHwT6ccskERERkd4pkiRLTExE9erltxz4+fkhOjoaiYmJDu/3zDPPoEePHhgxYoTk58rLy0NGRka5P6QP4ZwKSUREOjB1ZCsAQJPYsErfC/TzRaPq4WqHRESkqFEd66B7A1bIEpH5uJWlmDhxIt5++22nxxw8eNCjQP7880+sXLkSO3fudOt+06ZNw6uvvurRc5pFdl6R6BDs+unhbkjLKRAdBhERkVNjutXDXV3qotBqFR0KEZEq3rm1DWw20VEQEanPrSTZs88+i/vuu8/pMQ0aNECNGjWQnJxc7vbCwkKkpqY63Ea5cuVKHD9+HFFRUeVuv+WWW9C7d2+sXr3a7v0mTZqECRMmlH6dkZGBuLg4l/8vZpKRq81EVKvakaJDICIiksTXxwJfH26pJCJzsFgs4Bw1IjIjt5Jk1apVQ7Vq1Vwe1717d6SlpWH79u3o2LEjgOIkmNVqRdeuXe3eZ+LEiXjwwQfL3da6dWt8+OGHuPHGGx0+V2BgIAIDA934vzCfAD/Hu2obVgtVMRIiIiIiIiIiIm1SpClU8+bNMWTIEDz00EOYOXMmCgoKMH78eNxxxx2oVasWAODcuXMYMGAAvv/+e3Tp0gU1atSwW2VWt25d1K9fX4kwCcCX93RyfRARERERERERkcEp0rgfAH788Uc0a9YMAwYMwLBhw9CrVy98+eWXpd8vKCjA4cOHkZOTo1QIdFXLWhGiQyAiIiIiIiIi0jSLzWaslowZGRmIjIxEeno6IiKYHAKA9CsFaPvqP3a/d/j1IRxbT0RERERERESGJTVXpFglGWlHZLC/3dvf/L/WTJAREREREREREYFJMiIiIiIiIiIiIibJzKxqWIDoEIiIiIiIiIiINIFJMpOKCvHHwOaxosMgIiIiIiIiItIEJslMqk/javD1sYgOg4iIiIiIiIhIE5gkIyIiIiIiIiIi02OSjIiIiIiIiIiITI9JMiIiIiIiIiIiMj0myYiIiIiIiIiIyPSYJCMiIiIiIiIiItNjkoyIiIiIiIiIiEyPSTIiIiIiIiIiIjI9JsmIiIiIiIiIiMj0mCQjIiIiIiIiIiLTY5KMiIiIiIiIiIhMj0kyIiIiIiIiIiIyPSbJiIiIiIiIiIjI9JgkIyIiIiIiIiIi02OSjIiIiIiIiIiITI9JMiIiIiIiIiIiMj0myUyid+MY0SEQEREREREREWkWk2Qm8eWYTtg1+XrRYRARERERERERaZKf6ABIHcEBvggO8BUdBhERERERERGRJrGSjIiIiIiIiIiITI9JMiIiIiIiIiIiMj0myYiIiIiIiIiIyPSYJDMpH4voCIiIiIiIiIiItINJMpMa27O+6BCIiIiIiIiIiDSDSTITio0IRNu4KNFhEBERERERERFpBpNkJtSmTpToEIiIiIiIiIiINMVPdACkrtljO6NlrUjRYRARERERERERaQqTZCbTt2l10SEQEREREREREWkOt1sSEREREREREZHpMUlGRERERERERESmxyQZERERERERERGZHpNkRERERERERERkekySERERERERERGR6TFJRkREREREREREpsckGRERERERERERmR6TZEREREREREREZHpMkhERERERERERkekxSUZERERERERERKbHJBkREREREREREZkek2RERERERERERGR6TJIREREREREREZHpMUlGRERERERERESm5yc6ALnZbDYAQEZGhuBIiIiIiIiIiIhItJIcUUnOyBHDJckyMzMBAHFxcYIjISIiIiIiIiIircjMzERkZKTD71tsrtJoOmO1WnH+/HmEh4fDYrGIDkcWGRkZiIuLw5kzZxARESE6HCLV8TVAxNcBEV8DZHZ8DRDxdUCes9lsyMzMRK1ateDj47jzmOEqyXx8fFCnTh3RYSgiIiKCbwRkanwNEPF1QMTXAJkdXwNEfB2QZ5xVkJVg434iIiIiIiIiIjI9JsmIiIiIiIiIiMj0mCTTgcDAQEyZMgWBgYGiQyESgq8BIr4OiPgaILPja4CIrwNSnuEa9xMREREREREREbmLlWRERERERERERGR6TJIREREREREREZHpMUlGRERERERERESmxyQZERERERERERGZHpNkOjBjxgzEx8cjKCgIXbt2xZYtW0SHRKSKtWvX4sYbb0StWrVgsVjw+++/iw6JSFXTpk1D586dER4ejurVq2PkyJE4fPiw6LCIVPX555+jTZs2iIiIQEREBLp3747FixeLDotImLfeegsWiwVPP/206FCIVPHKK6/AYrGU+9OsWTPRYZFBMUmmcfPmzcOECRMwZcoU7NixA23btsXgwYORnJwsOjQixWVnZ6Nt27aYMWOG6FCIhFizZg3GjRuHTZs2YdmyZSgoKMCgQYOQnZ0tOjQi1dSpUwdvvfUWtm/fjm3btqF///4YMWIE9u/fLzo0ItVt3boVX3zxBdq0aSM6FCJVtWzZEhcuXCj9s27dOtEhkUFZbDabTXQQ5FjXrl3RuXNnfPrppwAAq9WKuLg4PPHEE5g4caLg6IjUY7FYsGDBAowcOVJ0KETCXLx4EdWrV8eaNWvQp08f0eEQCRMdHY13330XDzzwgOhQiFSTlZWFDh064LPPPsPrr7+Odu3aYfr06aLDIlLcK6+8gt9//x27du0SHQqZACvJNCw/Px/bt2/HwIEDS2/z8fHBwIEDsXHjRoGRERGRCOnp6QCKEwREZlRUVIS5c+ciOzsb3bt3Fx0OkarGjRuH4cOHl7s2IDKLo0ePolatWmjQoAFGjx6N06dPiw6JDMpPdADkWEpKCoqKihAbG1vu9tjYWBw6dEhQVEREJILVasXTTz+Nnj17olWrVqLDIVLV3r170b17d+Tm5iIsLAwLFixAixYtRIdFpJq5c+dix44d2Lp1q+hQiFTXtWtXzJ49G02bNsWFCxfw6quvonfv3ti3bx/Cw8NFh0cGwyQZERGRDowbNw779u1jDw4ypaZNm2LXrl1IT0/Hr7/+invvvRdr1qxhooxM4cyZM3jqqaewbNkyBAUFiQ6HSHVDhw4t/XubNm3QtWtX1KtXDz///DO33ZPsmCTTsJiYGPj6+iIpKanc7UlJSahRo4agqIiISG3jx4/HwoULsXbtWtSpU0d0OESqCwgIQKNGjQAAHTt2xNatW/HRRx/hiy++EBwZkfK2b9+O5ORkdOjQofS2oqIirF27Fp9++iny8vLg6+srMEIidUVFRaFJkyY4duyY6FDIgNiTTMMCAgLQsWNHrFixovQ2q9WKFStWsA8HEZEJ2Gw2jB8/HgsWLMDKlStRv3590SERaYLVakVeXp7oMIhUMWDAAOzduxe7du0q/dOpUyeMHj0au3btYoKMTCcrKwvHjx9HzZo1RYdCBsRKMo2bMGEC7r33XnTq1AldunTB9OnTkZ2djbFjx4oOjUhxWVlZ5VaITp48iV27diE6Ohp169YVGBmROsaNG4c5c+bgjz/+QHh4OBITEwEAkZGRCA4OFhwdkTomTZqEoUOHom7dusjMzMScOXOwevVqLF26VHRoRKoIDw+v1IsyNDQUVatWZY9KMoX//Oc/uPHGG1GvXj2cP38eU6ZMga+vL+68807RoZEBMUmmcbfffjsuXryIyZMnIzExEe3atcOSJUsqNfMnMqJt27ahX79+pV9PmDABAHDvvfdi9uzZgqIiUs/nn38OAOjbt2+527/99lvcd9996gdEJEBycjLuueceXLhwAZGRkWjTpg2WLl2K66+/XnRoRESkgrNnz+LOO+/EpUuXUK1aNfTq1QubNm1CtWrVRIdGBmSx2Ww20UEQERERERERERGJxJ5kRERERERERERkekySERERERERERGR6TFJRkREREREREREpsckGRERERERERERmR6TZEREREREREREZHpMkhERERERERERkekxSUZERERERERERKbHJBkRERGRTt13330YOXKk6DCIiIiIDMFPdABEREREVJnFYnH6/SlTpuCjjz6CzWZTKSIiIiIiY2OSjIiIiEiDLly4UPr3efPmYfLkyTh8+HDpbWFhYQgLCxMRGhEREZEhcbslERERkQbVqFGj9E9kZCQsFku528LCwiptt+zbty+eeOIJPP3006hSpQpiY2Mxa9YsZGdnY+zYsQgPD0ejRo2wePHics+1b98+DB06FGFhYYiNjcWYMWOQkpKi8v8xERERkVhMkhEREREZyHfffYeYmBhs2bIFTzzxBB577DGMGjUKPXr0wI4dOzBo0CCMGTMGOTk5AIC0tDT0798f7du3x7Zt27BkyRIkJSXhtttuE/x/QkRERKQuJsmIiIiIDKRt27Z46aWX0LhxY0yaNAlBQUGIiYnBQw89hMaNG2Py5Mm4dOkS9uzZAwD49NNP0b59e7z55pto1qwZ2rdvj2+++QarVq3CkSNHBP/fEBEREamHPcmIiIiIDKRNmzalf/f19UXVqlXRunXr0ttiY2MBAMnJyQCA3bt3Y9WqVXb7mx0/fhxNmjRROGIiIiIibWCSjIiIiMhA/P39y31tsVjK3VYyNdNqtQIAsrKycOONN+Ltt9+u9Fg1a9ZUMFIiIiIibWGSjIiIiMjEOnTogPnz5yM+Ph5+fjw1JCIiIvNiTzIiIiIiExs3bhxSU1Nx5513YuvWrTh+/DiWLl2KsWPHoqioSHR4RERERKphkoyIiIjIxGrVqoX1/9+eHRQBEMJAEMyhACPYQA8mkUVxMnhst4J9piZ71zmn5pw1xqi1VvXeqzWnIgCQ47v33tcjAAAAAOAl70EAAAAA4olkAAAAAMQTyQAAAACIJ5IBAAAAEE8kAwAAACCeSAYAAABAPJEMAAAAgHgiGQAAAADxRDIAAAAA4olkAAAAAMQTyQAAAACIJ5IBAAAAEO8HzDi1ivBSs0EAAAAASUVORK5CYII=\n" }, "metadata": {} } ], "source": [ "import librosa.display\n", "import matplotlib.pyplot as plt\n", "\n", "text = \"Text To Speech models have made great strides in quality over the last few years.\"\n", "\n", "# Generate raw waveform speech\n", "speech, rate = tts(text), 22050\n", "\n", "# Print waveplot\n", "plt.figure(figsize=(15, 5))\n", "plot = librosa.display.waveshow(speech[0], sr=speech[1])" ] }, { "cell_type": "markdown", "metadata": { "id": "ARFO5J46SJyj" }, "source": [ "The graph shows a plot of the audio. It clearly shows pauses between words and sentences as we would expect in spoken language. Now let's play the generated speech." ] }, { "cell_type": "code", "execution_count": 47, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 60 }, "id": "GpNhbItq3QGL", "outputId": "c85abf97-1b39-4938-f4e3-c4f415b3b132" }, "outputs": [ { "output_type": "display_data", "data": { "text/plain": [ "" ], "text/html": [ "\n", " \n", " " ] }, "metadata": {} } ], "source": [ "from IPython.display import Audio, display\n", "\n", "import os\n", "\n", "import soundfile as sf\n", "\n", "def play(speech):\n", " # Convert to MP3 to save space\n", " sf.write(\"speech.wav\", speech[0], speech[1])\n", " !ffmpeg -i speech.wav -y -b:a 64 speech.mp3 2> /dev/null\n", "\n", " # Play speech\n", " display(Audio(filename=\"speech.mp3\"))\n", "\n", "play(speech)" ] }, { "cell_type": "markdown", "metadata": { "id": "bDxW-tsCELob" }, "source": [ "# Transcribe audio back to text\n", "\n", "Next we'll use [OpenAI Whisper](https://github.com/openai/whisper) to transcribe the generated audio back to text." ] }, { "cell_type": "code", "execution_count": 48, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 70 }, "id": "-KgYwAQzFVll", "outputId": "47857ba8-0942-42f9-ee8a-fde0abe5140a" }, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "'Text to speech models have made great strides in quality over the last few years.'" ], "application/vnd.google.colaboratory.intrinsic+json": { "type": "string" } }, "metadata": {}, "execution_count": 48 } ], "source": [ "from txtai.pipeline import Transcription\n", "\n", "# Transcribe files\n", "transcribe = Transcription(\"openai/whisper-base\")\n", "\n", "# Print result\n", "transcribe(speech, rate)" ] }, { "cell_type": "markdown", "metadata": { "id": "lmKDL32ySfXl" }, "source": [ "And as expected, the transcription matches the original text." ] }, { "cell_type": "markdown", "source": [ "# Streaming speech generation\n", "\n", "The TextToSpeech pipeline supports incrementally generating snippets of speech. This enables the pipeline to work with streaming LLM generation." ], "metadata": { "id": "oI2bCz0kBDSO" } }, { "cell_type": "code", "source": [ "text = \"This is streaming speech generation. It's designed to take output tokens from a streaming LLM. It returns snippets of speech.\".split()\n", "for speech, _ in tts(text, stream=True):\n", " print(speech.shape)" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "QGpF2T8ce0Md", "outputId": "4f8eefb3-eb46-4bdf-ce45-bd752d9a3822" }, "execution_count": 49, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "(32768,)\n", "(31488,)\n", "(26368,)\n" ] } ] }, { "cell_type": "markdown", "metadata": { "id": "8miGM2xKSkq_" }, "source": [ "# Audio books\n", "\n", "The TextToSpeech pipeline is designed to work with large blocks of text. It could be used to build audio for entire chapters of books.\n", "\n", "In the next example below, we'll read the beginning of the book the `Great Gatsby`. We'll load a new model that enables setting a speaker." ] }, { "cell_type": "code", "execution_count": 50, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 60 }, "id": "liLFTAAvOWpi", "outputId": "76210dec-7b43-4383-ebd0-6320c71d42d4" }, "outputs": [ { "output_type": "display_data", "data": { "text/plain": [ "" ], "text/html": [ "\n", " \n", " " ] }, "metadata": {} } ], "source": [ "# Beginning of The Great Gatsby from Project Gutenberg\n", "# https://www.gutenberg.org/ebooks/64317\n", "\n", "text = \"\"\"\n", "In my younger and more vulnerable years my father gave me some advice\n", "that I've been turning over in my mind ever since.\n", "\n", "“Whenever you feel like criticizing anyone,” he told me, “just\n", "remember that all the people in this world haven't had the advantages\n", "that you've had.”\n", "\n", "He didn't say any more, but we've always been unusually communicative\n", "in a reserved way, and I understood that he meant a great deal more\n", "than that.\n", "\"\"\"\n", "\n", "tts = TextToSpeech(\"neuml/vctk-vits-onnx\")\n", "speech = tts(text, speaker=3)\n", "play(speech)" ] }, { "cell_type": "markdown", "metadata": { "id": "NsmhYciqTAX6" }, "source": [ "# Text To Speech Workflow\n", "\n", "In the last example, we'll cover building a text-to-speech workflow. This workflow is no different in that it connects multiple pipelines together, each of which are backed by machine learning models.\n", "\n", "The workflow extracts text from a webpage, summarizes it and then generates audio of the summary." ] }, { "cell_type": "code", "execution_count": 51, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "TAv7XVgzHb_4", "outputId": "1d88f847-5773-4be3-8f6c-ed33e49ef548" }, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Overwriting workflow.yml\n" ] } ], "source": [ "%%writefile workflow.yml\n", "summary:\n", " path: sshleifer/distilbart-cnn-12-6\n", "\n", "textractor:\n", " join: true\n", " lines: false\n", " minlength: 100\n", " paragraphs: true\n", " sentences: false\n", "\n", "texttospeech:\n", " path: neuml/vctk-vits-onnx\n", "\n", "workflow:\n", " tts:\n", " tasks:\n", " - action: textractor\n", " task: url\n", " - action: summary\n", " - action: texttospeech\n", " args:\n", " speaker: 15" ] }, { "cell_type": "code", "execution_count": 52, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 115 }, "id": "_dmQZ6i8IDAA", "outputId": "47bda0fc-c7df-42d3-d540-834576c806f8" }, "outputs": [ { "output_type": "display_data", "data": { "text/plain": [ "" ], "text/html": [ "\n", " \n", " " ] }, "metadata": {} } ], "source": [ "from txtai.app import Application\n", "\n", "app = Application(\"workflow.yml\")\n", "\n", "speech = list(app.workflow(\"tts\", [\"https://en.wikipedia.org/wiki/Natural_language_processing\"]))[0]\n", "\n", "play(speech)" ] }, { "cell_type": "markdown", "metadata": { "id": "VCU8zGGDXQ0Y" }, "source": [ "# Wrapping up\n", "\n", "This notebook gave a brief introduction on text to speech models. The text to speech pipeline in txtai is designed to be easy to use and handles the most common text to speech tasks in English. \n", "\n", "This work is made possible by the excellent advancements in text to speech modeling. [ESPnet](https://github.com/espnet/espnet) is a great project and should be checked out for more advanced and a wider range of use cases. This pipeline was also made possible by the great work from [espnet_onnx](https://github.com/espnet/espnet_onnx) in building a framework to export models to ONNX.\n", "\n", "Looking forward to seeing what the community dreams up using this pipeline!\n", "\n" ] } ], "metadata": { "accelerator": "GPU", "colab": { "provenance": [] }, "gpuClass": "standard", "kernelspec": { "display_name": "Python 3", "name": "python3" } }, "nbformat": 4, "nbformat_minor": 0 } ================================================ FILE: examples/41_Train_a_language_model_from_scratch.ipynb ================================================ { "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "provenance": [] }, "kernelspec": { "name": "python3", "display_name": "Python 3" }, "language_info": { "name": "python" }, "gpuClass": "standard", "accelerator": "GPU" }, "cells": [ { "cell_type": "markdown", "source": [ "# Train a language model from scratch\n", "\n", "txtai has a robust training pipeline that can fine-tune large language models (LLMs) for downstream tasks such as labeling text. txtai also has the ability to train language models from scratch.\n", "\n", "The vast majority of time, fine-tuning a LLM yields the best results. But when making significant changes to the structure of a model, training from scratch is often required.\n", "\n", "Examples of significant changes are:\n", "\n", "- Changing the vocabulary size\n", "- Changing the number of hidden dimensions\n", "- Changing the number of attention heads or layers\n", "- Create a custom model architecture\n", "\n", "This notebook will show how to build a new tokenizer and train a small language model (known as a micromodel) from scratch.\n" ], "metadata": { "id": "-xU9P9iSR-Cy" } }, { "cell_type": "markdown", "source": [ "# Install dependencies\n", "\n", "Install `txtai` and all dependencies." ], "metadata": { "id": "shlUi2kKS7KT" } }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "xEvX9vCpn4E0" }, "outputs": [], "source": [ "%%capture\n", "!pip install git+https://github.com/neuml/txtai#egg=txtai[pipeline-train] datasets sentence-transformers onnxruntime onnx" ] }, { "cell_type": "markdown", "source": [ "# Load dataset\n", "\n", "This example will use the `ag_news` dataset, which is a collection of news article headlines." ], "metadata": { "id": "408IyXzKFSiG" } }, { "cell_type": "code", "source": [ "from datasets import load_dataset\n", "\n", "dataset = load_dataset(\"ag_news\", split=\"train\")" ], "metadata": { "id": "IQ_ns6YvFRm1" }, "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "source": [ "# Train the tokenizer\n", "\n", "The first step is to train the tokenizer. We could use an existing tokenizer but in this case, we want a smaller vocabulary.\n" ], "metadata": { "id": "-vNVSA2FQnKj" } }, { "cell_type": "code", "source": [ "from transformers import AutoTokenizer\n", "\n", "def stream(batch=10000):\n", " for x in range(0, len(dataset), batch):\n", " yield dataset[x: x + batch][\"text\"]\n", "\n", "tokenizer = AutoTokenizer.from_pretrained(\"bert-base-uncased\")\n", "tokenizer = tokenizer.train_new_from_iterator(stream(), vocab_size=500, length=len(dataset))\n", "tokenizer.model_max_length = 512\n", "\n", "tokenizer.save_pretrained(\"bert\")" ], "metadata": { "id": "LJ2FskiiQ_l_" }, "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "source": [ "Let's test the tokenizer." ], "metadata": { "id": "BOW5JlxYS3Rm" } }, { "cell_type": "code", "source": [ "print(tokenizer.tokenize(\"Red Sox defeat Yankees 5-3\"))" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "slLtmzfbRuf6", "outputId": "2391b58b-7428-49a2-9225-0268a1f2ad64" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "['re', '##d', 'so', '##x', 'de', '##f', '##e', '##at', 'y', '##ank', '##e', '##es', '5', '-', '3']\n" ] } ] }, { "cell_type": "markdown", "source": [ "With a limited vocabulary size of 500, most words require multiple tokens. This limited vocabulary lowers the number of token representations the model needs to learn." ], "metadata": { "id": "IozRdXdEqegD" } }, { "cell_type": "markdown", "source": [ "# Train the language model\n", "\n", "Now it's time to train the model. We'll train a micromodel, which is an extremely small language model with a limited vocabulary. Micromodels, when paired with a limited vocabulary have the potential to work in limited compute environments like edge devices and microcontrollers." ], "metadata": { "id": "gqEBeBEoTrup" } }, { "cell_type": "code", "source": [ "from transformers import AutoTokenizer, BertConfig, BertForMaskedLM\n", "\n", "from txtai.pipeline import HFTrainer\n", "\n", "config = BertConfig(\n", " vocab_size = 500,\n", " hidden_size = 50,\n", " num_hidden_layers = 2,\n", " num_attention_heads = 2,\n", " intermediate_size = 100,\n", ")\n", "\n", "model = BertForMaskedLM(config)\n", "model.save_pretrained(\"bert\")\n", "tokenizer = AutoTokenizer.from_pretrained(\"bert\")\n", "\n", "train = HFTrainer()\n", "\n", "# Train model\n", "train((model, tokenizer), dataset, task=\"language-modeling\", output_dir=\"bert\",\n", " fp16=True, per_device_train_batch_size=128, num_train_epochs=10,\n", " dataloader_num_workers=2)" ], "metadata": { "id": "MEpZAr0TUMCK" }, "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "source": [ "# Sentence embeddings\n", "\n", "Next let's take the language model and fine-tune it to build sentence embeddings. " ], "metadata": { "id": "53bvB9c6MbPS" } }, { "cell_type": "code", "source": [ "%%capture\n", "!wget https://raw.githubusercontent.com/UKPLab/sentence-transformers/master/examples/training/nli/training_nli_v2.py\n", "!python training_nli_v2.py bert\n", "!mv output/* bert-nli" ], "metadata": { "id": "f11f5tjfS85m" }, "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "source": [ "# Embeddings search\n", "\n", "Now we'll build a txtai embeddings index using the fine-tuned model. We'll index the `ag_news` dataset. " ], "metadata": { "id": "FTOm5ofaMmcv" } }, { "cell_type": "code", "source": [ "from txtai.embeddings import Embeddings\n", "\n", "# Get list of all text\n", "texts = dataset[\"text\"]\n", "\n", "embeddings = Embeddings({\"path\": \"bert-nli\", \"content\": True})\n", "embeddings.index((x, text, None) for x, text in enumerate(texts))" ], "metadata": { "id": "_kKe5kRnVRhM" }, "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "source": [ "Let's run a search and see how much the model has learned." ], "metadata": { "id": "Rh9yA6ZJM47H" } }, { "cell_type": "code", "source": [ "embeddings.search(\"Boston Red Sox Cardinals World Series\")" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "XCRhpfLmV1-q", "outputId": "662f1b1d-fcf5-4383-fd8a-369081a77501" }, "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "[{'id': '76733',\n", " 'text': 'Red Sox sweep Cardinals to win World Series The Boston Red Sox ended their 86-year championship drought with a 3-0 win over the St. Louis Cardinals in Game Four of the World Series.',\n", " 'score': 0.8008379936218262},\n", " {'id': '71169',\n", " 'text': 'Red Sox lead 2-0 over Cardinals of World Series The host Boston Red Sox scored a 6-2 victory over the St. Louis Cardinals, helped by Curt Schilling #39;s pitching through pain and seeping blood, in World Series Game 2 on Sunday night.',\n", " 'score': 0.7896029353141785},\n", " {'id': '70100',\n", " 'text': 'Sports: Red Sox 9 Cardinals 7 after 7 innings BOSTON Boston has scored twice in the seventh inning to take an 9-to-7 lead over the St. Louis Cardinals in the World Series opener at Fenway Park.',\n", " 'score': 0.7735188603401184}]" ] }, "metadata": {}, "execution_count": 49 } ] }, { "cell_type": "markdown", "source": [ "Not too bad. It's far from perfect but we can tell that it has some knowledge! This model was trained for 5 minutes, there is certainly room for improvement in training longer and/or with a larger dataset.\n", "\n", "The standard `bert-base-uncased` model has 110M parameters and is around 440MB. Let's see how many parameters this model has." ], "metadata": { "id": "M5Pk1spcM72L" } }, { "cell_type": "code", "source": [ "# Show number of parameters\n", "parameters = sum(p.numel() for p in embeddings.model.model.parameters())\n", "print(f\"Number of parameters:\\t\\t{parameters:,}\")\n", "print(f\"% of bert-base-uncased\\t\\t{(parameters / 110000000) * 100:.2f}%\")" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "RIEnDwuxeakq", "outputId": "131930f1-e6cd-4b23-b9dc-a62e670eb8e4" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Number of parameters:\t\t94,450\n", "% of bert-base-uncased\t\t0.09%\n" ] } ] }, { "cell_type": "code", "source": [ "!ls -lh bert-nli/pytorch_model.bin" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "ATg4aInQeRN-", "outputId": "a6181fbc-ee6c-426e-882e-1d6cbefa22a0" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "-rw-r--r-- 1 root root 386K Jan 11 20:52 bert-nli/pytorch_model.bin\n" ] } ] }, { "cell_type": "markdown", "source": [ "This model is 386KB and has only 0.1% of the parameters. With proper vocabulary selection, a small language model has potential." ], "metadata": { "id": "7GfsF8ziNFJa" } }, { "cell_type": "markdown", "source": [ "# Quantization\n", "\n", "If 386KB isn't small enough, we can quantize the model to get it down even further. " ], "metadata": { "id": "CcbJNidNwuXt" } }, { "cell_type": "code", "source": [ "from txtai.pipeline import HFOnnx\n", "\n", "onnx = HFOnnx()\n", "onnx(\"bert-nli\", task=\"pooling\", output=\"bert-nli.onnx\", quantize=True)" ], "metadata": { "id": "IYZnex9kRcb0" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "embeddings = Embeddings({\"path\": \"bert-nli.onnx\", \"tokenizer\": \"bert-nli\", \"content\": True})\n", "embeddings.index((x, text, None) for x, text in enumerate(texts))\n", "embeddings.search(\"Boston Red Sox Cardinals World Series\")" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "QL_1UosIVkZ7", "outputId": "6b2bedb8-5cc6-44b6-855a-617a6a07478c" }, "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "[{'id': '76733',\n", " 'text': 'Red Sox sweep Cardinals to win World Series The Boston Red Sox ended their 86-year championship drought with a 3-0 win over the St. Louis Cardinals in Game Four of the World Series.',\n", " 'score': 0.8008379936218262},\n", " {'id': '71169',\n", " 'text': 'Red Sox lead 2-0 over Cardinals of World Series The host Boston Red Sox scored a 6-2 victory over the St. Louis Cardinals, helped by Curt Schilling #39;s pitching through pain and seeping blood, in World Series Game 2 on Sunday night.',\n", " 'score': 0.7896029353141785},\n", " {'id': '70100',\n", " 'text': 'Sports: Red Sox 9 Cardinals 7 after 7 innings BOSTON Boston has scored twice in the seventh inning to take an 9-to-7 lead over the St. Louis Cardinals in the World Series opener at Fenway Park.',\n", " 'score': 0.7735188603401184}]" ] }, "metadata": {}, "execution_count": 50 } ] }, { "cell_type": "code", "source": [ "!ls -lh bert-nli.onnx" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "oCa3RDeInCkN", "outputId": "89cf379a-09cf-4fbc-8568-4d66085450f3" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "-rw-r--r-- 1 root root 187K Jan 11 20:53 bert-nli.onnx\n" ] } ] }, { "cell_type": "markdown", "source": [ "We're down to 187KB with a quantized model!\n" ], "metadata": { "id": "r95T1HZXVhnZ" } }, { "cell_type": "markdown", "source": [ "# Train on BERT dataset\n", "\n", "The [BERT paper](https://arxiv.org/abs/1810.04805) has all the information regarding training parameters and datasets used. Hugging Face Datasets hosts the `bookcorpus` and `wikipedia` datasets.\n", "\n", "Training on this size of a dataset is out of scope for this notebook but example code is shown below on how to build the BERT dataset.\n", "\n", "```python\n", "bookcorpus = load_dataset(\"bookcorpus\", split=\"train\")\n", "wiki = load_dataset(\"wikipedia\", \"20220301.en\", split=\"train\")\n", "wiki = wiki.remove_columns([col for col in wiki.column_names if col != \"text\"])\n", "dataset = concatenate_datasets([bookcorpus, wiki])\n", "```\n", "\n", "Then the same steps to train the tokenizer and model can be run. The dataset is 25GB compressed, so it will take some space and time to process!" ], "metadata": { "id": "aPaZsoxnYW8I" } }, { "cell_type": "markdown", "source": [ "# Wrapping up\n", "\n", "This notebook covered how to build micromodels from scratch with txtai. Micromodels can be fully rebuilt in hours using the most up-to-date knowledge available. If properly constructed, prepared and trained, micromodels have the potential to be a viable choice for limited resource environments. They can also help when realtime response is more important than having the highest accuracy scores.\n", "\n", "It's our hope that further research and exploration into micromodels leads to productive and useful models." ], "metadata": { "id": "4L8smyyXc8q8" } } ] } ================================================ FILE: examples/42_Prompt_driven_search_with_LLMs.ipynb ================================================ { "cells": [ { "cell_type": "markdown", "metadata": { "id": "vwELCooy4ljr" }, "source": [ "# Prompt-driven search with LLMs\n", "\n", "This notebook revisits the RAG pipeline, which has been covered in a number of previous notebooks. This pipeline is a combination of a similarity instance (embeddings or similarity pipeline) to build a question context and a model that answers questions.\n", "\n", "The RAG pipeline recently underwent a number of major upgrades to support the following.\n", "\n", "- Ability to run embeddings searches. Given that content is supported, text can be retrieved from the embeddings instance.\n", "- In addition to extractive qa, support text generation models, sequence to sequence models and custom pipelines\n", "\n", "These changes enable embeddings-guided and prompt-driven search with Large Language Models (LLMs) 🔥🔥🔥" ] }, { "cell_type": "markdown", "metadata": { "id": "ew7orE2O441o" }, "source": [ "# Install dependencies\n", "\n", "Install `txtai` and all dependencies." ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "LPQTb25tASIG" }, "outputs": [], "source": [ "%%capture\n", "!pip install git+https://github.com/neuml/txtai datasets" ] }, { "cell_type": "markdown", "metadata": { "id": "_YnqorRKAbLu" }, "source": [ "# Create Embeddings and RAG instances\n", "\n", "An Embeddings instance defines methods to represent text as vectors and build vector indexes for search.\n", "\n", "The RAG pipeline is a combination of a similarity instance (embeddings or similarity pipeline) to build a question context and a model that answers questions. The model can be a prompt-driven large language model (LLM), an extractive question-answering model or a custom pipeline.\n", "\n", "Let's run a basic example.\n" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "id": "OUc9gqTyAYnm" }, "outputs": [], "source": [ "%%capture\n", "\n", "from txtai import Embeddings, RAG\n", "\n", "# Create embeddings model with content support\n", "embeddings = Embeddings(path=\"sentence-transformers/all-MiniLM-L6-v2\", content=True)\n", "\n", "# Create the RAG pipeline\n", "rag = RAG(embeddings, \"Qwen/Qwen3-4B-Instruct-2507\", template=\"\"\"\n", " Answer the following question using the provided context.\n", "\n", " Question:\n", " {question}\n", "\n", " Context:\n", " {context}\n", "\"\"\")" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "4X5z3UjnAGe7", "outputId": "cacb7e9d-471a-437d-c68d-a8b51f876413" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "---- Red Sox - Blue Jays ----\n", "{'answer': 'The Blue Jays won the game.'}\n", "{'answer': 'The score was 2-1 in favor of the Blue Jays.'}\n", "\n", "---- Phillies - Braves ----\n", "{'answer': 'The Phillies won the game.'}\n", "{'answer': 'The score was 5-0 in favor of the Phillies.'}\n", "\n", "---- Dodgers - Giants ----\n", "{'answer': 'The Giants won the game.'}\n", "{'answer': 'The score was Giants 5, Dodgers 4.'}\n", "\n", "---- Flyers - Lightning ----\n", "{'answer': 'The Flyers won the game.'}\n", "{'answer': 'The score was Flyers 4, Lightning 1.'}\n", "\n" ] } ], "source": [ "data = [\"Giants hit 3 HRs to down Dodgers\",\n", " \"Giants 5 Dodgers 4 final\",\n", " \"Dodgers drop Game 2 against the Giants, 5-4\",\n", " \"Blue Jays beat Red Sox final score 2-1\",\n", " \"Red Sox lost to the Blue Jays, 2-1\",\n", " \"Blue Jays at Red Sox is over. Score: 2-1\",\n", " \"Phillies win over the Braves, 5-0\",\n", " \"Phillies 5 Braves 0 final\",\n", " \"Final: Braves lose to the Phillies in the series opener, 5-0\",\n", " \"Lightning goaltender pulled, lose to Flyers 4-1\",\n", " \"Flyers 4 Lightning 1 final\",\n", " \"Flyers win 4-1\"]\n", "\n", "questions = [\"What team won the game?\", \"What was score?\"]\n", "\n", "for query in [\"Red Sox - Blue Jays\", \"Phillies - Braves\", \"Dodgers - Giants\", \"Flyers - Lightning\"]:\n", " print(\"----\", query, \"----\")\n", " for answer in rag([f\"{query} {x}\" for x in questions], data):\n", " print(answer)\n", " print()" ] }, { "cell_type": "markdown", "metadata": { "id": "7AnPvSeM3N1Z" }, "source": [ "This code runs a series of questions. First it runs an embeddings filtering query to find the most relevant text. For example, `Red Sox - Blue Jays` finds text related to those teams. Then `What team won the game?` and `What was the score?` are asked.\n", "\n", "This logic is the same logic found in Notebook 5 - Extractive QA with txtai but uses prompt-based QA vs extractive QA. " ] }, { "cell_type": "markdown", "metadata": { "id": "Aj8GoDk331cS" }, "source": [ "# Embeddings-guided and Prompt-driven Search\n", "\n", "Now for the fun stuff. Let's build an embeddings index for the `ag_news` dataset (a set of news stories from the mid 2000s). Then we'll use prompts to ask questions with embeddings results as the context." ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "yL716oEZ43t-", "outputId": "23f4b0e7-a60a-4e89-fb57-06966e6612f8" }, "outputs": [ ], "source": [ "from datasets import load_dataset\n", "\n", "dataset = load_dataset(\"ag_news\", split=\"train\")\n", "\n", "# Create an embeddings index over the dataset\n", "embeddings = Embeddings(path=\"sentence-transformers/all-MiniLM-L6-v2\", content=True)\n", "embeddings.index(dataset[\"text\"])\n", "\n", "# Create RAG instance\n", "rag = RAG(embeddings, \"Qwen/Qwen3-4B-Instruct-2507\", template=\"\"\"\n", " Answer the following question using the provided context.\n", "\n", " Question:\n", " {question}\n", "\n", " Context:\n", " {context}\n", "\"\"\", output=\"flatten\")" ] }, { "cell_type": "markdown", "metadata": { "id": "Ifl8JwLDBL7k" }, "source": [ "Now let's run a prompt-driven search!" ] }, { "cell_type": "code", "execution_count": 22, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "5O1WBJ8153Mo", "outputId": "ddf09da2-7d4c-4fd3-b0da-df5631bacd13" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Who won the 2004 presidential election? George W. Bush won the 2004 presidential election.\n", "Who did the candidate beat? George W. Bush beat John F. Kerry in the 2004 presidential election.\n" ] } ], "source": [ "question = \"Who won the 2004 presidential election?\"\n", "answer = rag(question)\n", "print(question, answer)\n", "\n", "nquestion = \"Who did the candidate beat?\"\n", "print(nquestion, rag(f\"{question} {answer}. {nquestion}\"))" ] }, { "cell_type": "markdown", "metadata": { "id": "AhViFXH_BZSo" }, "source": [ "And there are the answers. Let's unpack how this works.\n", "\n", "The first thing the RAG pipeline does is run an embeddings search to find the most relevant text within the index. A context string is then built using those search results.\n", "\n", "After that, a prompt is generated, run and the answer printed." ] }, { "cell_type": "markdown", "metadata": { "id": "JtDcVPdOB0Rv" }, "source": [ "# Additional examples\n", "\n", "Before moving on, a couple more example questions." ] }, { "cell_type": "code", "execution_count": 23, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "0NNLBwC-83MM", "outputId": "4b9fabd7-baf9-4f7d-bb8b-c9c60c5101e4" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Who won the World Series in 2004? The Boston Red Sox won the World Series in 2004.\n", "What team did the Red Sox beat in the World Series? The Boston Red Sox beat the St. Louis Cardinals in the World Series.\n" ] } ], "source": [ "question = \"Who won the World Series in 2004?\"\n", "answer = rag(question)\n", "print(question, answer)\n", "\n", "nquestion = \"What team did the Red Sox beat in the World Series?\"\n", "print(nquestion, rag(f\"{question} {answer}. {nquestion}\"))" ] }, { "cell_type": "code", "execution_count": 29, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 36 }, "id": "1P0zqkTW9cZW", "outputId": "4f2232db-761b-464f-b47d-6695c84ffb80" }, "outputs": [ { "data": { "text/plain": [ "'An interesting fact is that herrings communicate by farting—a quirky and unusual discovery that was honored with an Ig Nobel Prize for its oddball research.'" ] }, "execution_count": 29, "metadata": {}, "output_type": "execute_result" } ], "source": [ "rag(\"Tell me something interesting\")" ] }, { "cell_type": "markdown", "metadata": { "id": "ygFFcwWPGI9p" }, "source": [ "Whhaaaattt??? Is this a model hallucination?\n", "\n", "Let's run an embeddings query and see if that text is in the results." ] }, { "cell_type": "code", "execution_count": 28, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "qZPhLqSxGMbK", "outputId": "e3b2909a-1ee8-480f-afa3-201a8e27cb08" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "herrings communicate by farting\n" ] } ], "source": [ "answer = \"herrings communicate by farting\"\n", "for x in embeddings.search(\"Tell me something interesting\"):\n", " if answer in x[\"text\"]:\n", " start = x[\"text\"].find(answer)\n", " print(x[\"text\"][start:start + len(answer)])" ] }, { "cell_type": "markdown", "metadata": { "id": "IpgxMc1DZcds" }, "source": [ "Sure enough it is 😃" ] }, { "cell_type": "markdown", "metadata": { "id": "KqfvCXp2B3li" }, "source": [ "# Wrapping up\n", "\n", "This notebook covered how to run embeddings-guided and prompt-driven search with LLMs. This functionality is a major step forward towards `Generative Semantic Search` for txtai. More to come, stay tuned!" ] } ], "metadata": { "accelerator": "GPU", "colab": { "provenance": [] }, "gpuClass": "standard", "kernelspec": { "display_name": "local", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.19" } }, "nbformat": 4, "nbformat_minor": 0 } ================================================ FILE: examples/43_Embeddings_in_the_Cloud.ipynb ================================================ [File too large to display: 19.4 KB] ================================================ FILE: examples/44_Prompt_templates_and_task_chains.ipynb ================================================ { "cells": [ { "cell_type": "markdown", "metadata": { "id": "vwELCooy4ljr" }, "source": [ "# Prompt templates and task chains\n", "\n", "txtai has long had support for workflows. Workflows connect the input and outputs of machine learning models together to create powerful transformation and processing functions.\n", "\n", "There has been a recent surge in interest in \"model prompting\", which is the process of building a natural language description of a task and passing it to a large language model (LLM). txtai has recently improved support for task templating, which builds string outputs from a set of parameters.\n", "\n", "This notebook demonstrates how txtai workflows can be used to apply prompt templates and chain those tasks together." ] }, { "cell_type": "markdown", "metadata": { "id": "ew7orE2O441o" }, "source": [ "# Install dependencies\n", "\n", "Install `txtai` and all dependencies." ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "LPQTb25tASIG" }, "outputs": [], "source": [ "%%capture\n", "!pip install git+https://github.com/neuml/txtai#egg=txtai[api]" ] }, { "cell_type": "markdown", "metadata": { "id": "_YnqorRKAbLu" }, "source": [ "# Prompt workflow\n", "\n", "First, we'll look at building a workflow with a series of model prompts. This workflow creates a conditional translation using a statement and target language. Another task reads that output text and detects the language.\n", "\n", "This workflow uses a LLM pipeline. The LLM pipeline loads a local model for inference, in this case [Qwen3-4B-Instruct-2507](https://huggingface.co/Qwen/Qwen3-4B-Instruct-2507). The [LLM pipeline](https://neuml.github.io/txtai/pipeline/llm/llm) supports local transformers models, llama.cpp models and LLM APIs such as Ollama, vLLM, OpenAI, Claude etc. \n", "\n", "It's important to note that a pipeline is simply a callable function. It can easily be replaced with a call to an external API." ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "OUc9gqTyAYnm", "outputId": "83300311-736c-47c8-bc16-ec0303274054" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "['French', 'German']\n" ] } ], "source": [ "from txtai import LLM, Workflow\n", "from txtai.workflow import TemplateTask\n", "\n", "# Create LLM\n", "llm = LLM(\"Qwen/Qwen3-4B-Instruct-2507\")\n", "\n", "# Define workflow or chaining of tasks together.\n", "workflow = Workflow([\n", " TemplateTask(\n", " template=\"Translate text '{statement}' to {language} if the text is English, otherwise keep the original text\",\n", " action=llm\n", " ),\n", " TemplateTask(\n", " template=\"What language is the following text. Only print the answer? {text}\",\n", " action=llm\n", " )\n", "])\n", "\n", "inputs = [\n", " {\"statement\": \"Hello, how are you\", \"language\": \"French\"},\n", " {\"statement\": \"Hallo, wie geht's dir\", \"language\": \"French\"}\n", "]\n", "\n", "print(list(workflow(inputs)))" ] }, { "cell_type": "markdown", "metadata": { "id": "_zz4Do8BV-Lk" }, "source": [ "Let's recap what happened here. The first workflow task conditionally translates text to a language if it's English.\n", "\n", "The first statement is `Hello, how are you` with a target language of French. So the statement is translated to French.\n", "\n", "The second statement is German, so it's not converted to French.\n", "\n", "The next step asks the model what the language is and it correctly prints `French` and `German`." ] }, { "cell_type": "markdown", "metadata": { "id": "iXDAKP4CX0W9" }, "source": [ "# Prompt Workflow as YAML\n", "\n", "The same workflow above can be created with YAML configuration." ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "GwV5A9xRYtYs", "outputId": "ffe6ee65-95a7-46c6-e6b9-5324eab26ca8" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Writing workflow.yml\n" ] } ], "source": [ "%%writefile workflow.yml\n", "\n", "llm:\n", " path: Qwen/Qwen3-4B-Instruct-2507\n", "\n", "workflow:\n", " chain:\n", " tasks:\n", " - task: template\n", " template: Translate text '{statement}' to {language} if the text is English, otherwise keep the original text\n", " action: llm\n", " - task: template\n", " template: What language is the following text. Only print the answer? {text}\n", " action: llm" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "dr7Lv5S5X98e", "outputId": "d6ac0427-671d-4525-aa21-664430109af3" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "['French', 'German']\n" ] } ], "source": [ "from txtai import Application\n", "\n", "app = Application(\"workflow.yml\")\n", "print(list(app.workflow(\"chain\", inputs)))" ] }, { "cell_type": "markdown", "metadata": { "id": "EGqiV45fYVse" }, "source": [ "As expected, the same result! This is a matter of preference on how you want to create a workflow. One advantage of YAML workflows is that an API can easily be created from the workflow file." ] }, { "cell_type": "markdown", "metadata": { "id": "9PqMU0bNYinf" }, "source": [ "# Prompt Workflow via an API call\n", "\n", "Let's say you want the workflow to be available via an API call. Well good news, txtai has a built in API mechanism using FastAPI. " ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "vDxQj1ZIYsz3" }, "outputs": [], "source": [ "# Start an API service\n", "!CONFIG=workflow.yml nohup uvicorn \"txtai.api:app\" &> api.log &\n", "!sleep 60" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "R1o08SVtZW7h", "outputId": "99875acd-18a8-4c2c-ead3-cb6975a4b2d2" }, "outputs": [ { "data": { "text/plain": [ "['French', 'German']" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import requests\n", "\n", "# Run API request\n", "requests.post(\"http://localhost:8000/workflow\", json={\"name\": \"chain\", \"elements\": inputs}).json()" ] }, { "cell_type": "markdown", "metadata": { "id": "B88mCrGFl5W-" }, "source": [ "Just like the previous steps, except through an API call. Let's run via cURL for good measure." ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "hRUyh0cQl_P2", "outputId": "9db8481d-0b6e-4a31-bdf6-5443df5f768a" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[\"French\",\"German\"]" ] } ], "source": [ "%%bash\n", "\n", "curl -s -X POST \"http://localhost:8000/workflow\" \\\n", " -H \"Content-Type: application/json\" \\\n", " --data @- << EOF\n", "{\n", " \"name\": \"chain\",\n", " \"elements\": [\n", " {\"statement\": \"Hello, how are you\", \"language\": \"French\"},\n", " {\"statement\": \"Hallo, wie geht's dir\", \"language\": \"French\"}\n", " ]\n", "}\n", "EOF" ] }, { "cell_type": "markdown", "metadata": { "id": "W0zL93WPoaCo" }, "source": [ "One last time, the same output is shown.\n", "\n", "If your primary development environment isn't Python, txtai does have API bindings for [JavaScript](https://github.com/neuml/txtai.js), [Rust](https://github.com/neuml/txtai.rs), [Go](https://github.com/neuml/txtai.go) and [Java](https://github.com/neuml/txtai.java).\n", "\n", "More information on the API is available [here](https://neuml.github.io/txtai/api/)." ] }, { "cell_type": "markdown", "metadata": { "id": "q9WiFG6fpzw5" }, "source": [ "# Chat with your data\n", "\n", "\"Chat with your data\" is a popular entry point into the AI space. Let's run an example." ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "rM3Y551LqF-J", "outputId": "85623785-c15f-4996-9460-0644f69cf5bf" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Writing search.yml\n" ] } ], "source": [ "%%writefile search.yml\n", "\n", "writable: false\n", "cloud:\n", " provider: huggingface-hub\n", " container: neuml/txtai-intro\n", "\n", "rag:\n", " path: Qwen/Qwen3-4B-Instruct-2507\n", " output: reference\n", " template: |\n", " Answer the following question using only the context below.\n", "\n", " Question: {question}\n", " Context: {context}\n", "\n", "workflow:\n", " search:\n", " tasks:\n", " - action: rag\n", " - task: template\n", " template: \"{answer}\\n\\nReference: {reference}\"\n", " rules:\n", " answer: I don't have data on that" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "1Elb8JANqpwX", "outputId": "b1f1ffa1-6c47-4d90-b6f1-8098d4dc45f8" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[\"Canada's last fully intact ice shelf has suddenly collapsed, forming a Manhattan-sized iceberg.\\n\\nReference: 1\"]\n" ] } ], "source": [ "app = Application(\"search.yml\")\n", "print(list(app.workflow(\"search\", [\"Find something about North America\"])))" ] }, { "cell_type": "markdown", "metadata": { "id": "4r49V4c9s5nf" }, "source": [ "The first thing the code above does is run an embeddings search to build a conversational context. That context is then used to build a prompt and inference is run against the LLM. \n", "\n", "The next task formats the outputs with a reference to the best matching record. In this case, it's only an id of 1. But this can be much more useful if the id is a URL or there is logic to format the id back to a unique reference string." ] }, { "cell_type": "markdown", "metadata": { "id": "KqfvCXp2B3li" }, "source": [ "# Wrapping up\n", "\n", "This notebook covered how to build prompt templates and task chains through a series of results. txtai has long had a robust and efficient workflow framework for connecting models together. This can be small and simple models and/or prompting with large models. Go ahead and give it a try!" ] } ], "metadata": { "accelerator": "GPU", "colab": { "provenance": [] }, "gpuClass": "standard", "kernelspec": { "display_name": "local", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.19" } }, "nbformat": 4, "nbformat_minor": 0 } ================================================ FILE: examples/45_Customize_your_own_embeddings_database.ipynb ================================================ { "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "provenance": [] }, "kernelspec": { "name": "python3", "display_name": "Python 3" }, "language_info": { "name": "python" }, "gpuClass": "standard", "accelerator": "GPU" }, "cells": [ { "cell_type": "markdown", "source": [ "# Customize your own embeddings database\n", "\n", "txtai supports a number of different database and vector index backends, including external databases. With modern hardware, it's amazing how far a single node index can take us. Easily into the hundreds of millions and even billions of records.\n", "\n", "txtai provides maximum flexibility in creating your own embeddings database. Sensible defaults are used out of the box. So unless you seek out this configuration, it's not necessary. This notebook will explore the options available when you do want to customize your embeddings database.\n", "\n", "More on [embeddings configuration settings can be found here](https://neuml.github.io/txtai/embeddings/configuration). " ], "metadata": { "id": "-xU9P9iSR-Cy" } }, { "cell_type": "markdown", "source": [ "# Install dependencies\n", "\n", "Install `txtai` and all dependencies." ], "metadata": { "id": "shlUi2kKS7KT" } }, { "cell_type": "code", "execution_count": 39, "metadata": { "id": "xEvX9vCpn4E0" }, "outputs": [], "source": [ "%%capture\n", "!pip install git+https://github.com/neuml/txtai#egg=txtai[database,similarity] datasets" ] }, { "cell_type": "markdown", "source": [ "# Load dataset\n", "\n", "This example will use the `ag_news` dataset, which is a collection of news article headlines. We'll use a subset of 25,000 headlines." ], "metadata": { "id": "408IyXzKFSiG" } }, { "cell_type": "code", "source": [ "import timeit\n", "\n", "from datasets import load_dataset\n", "\n", "def timer(embeddings, query=\"red sox\"):\n", " elapsed = timeit.timeit(lambda: embeddings.search(query), number=250)\n", " print(f\"{elapsed / 250} seconds per query\")\n", "\n", "dataset = load_dataset(\"ag_news\", split=\"train\")[\"text\"][:25000]" ], "metadata": { "id": "IQ_ns6YvFRm1" }, "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "source": [ "# NumPy\n", "\n", "Let's start with the simplest possible embeddings database. This will just be a thin wrapper around vectorizing text with sentence-transformers, storing the results as a NumPy array and running similarity queries." ], "metadata": { "id": "K15V3Sj_CvG7" } }, { "cell_type": "code", "source": [ "from txtai.embeddings import Embeddings\n", "\n", "# Create embeddings instance\n", "embeddings = Embeddings({\"path\": \"sentence-transformers/all-MiniLM-L6-v2\", \"backend\": \"numpy\"})\n", "\n", "# Index data\n", "embeddings.index((x, text, None) for x, text in enumerate(dataset))" ], "metadata": { "id": "DMqiTrTbC-VJ" }, "execution_count": 41, "outputs": [] }, { "cell_type": "code", "source": [ "embeddings.search(\"red sox\")" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "hcAcJikVDMNQ", "outputId": "a587620a-5657-4082-84ee-3d73114e1d3a" }, "execution_count": 42, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "[(19831, 0.6780003309249878),\n", " (18302, 0.6639199256896973),\n", " (16370, 0.6617192029953003)]" ] }, "metadata": {}, "execution_count": 42 } ] }, { "cell_type": "code", "source": [ "embeddings.info()" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "FQYx76IgMinE", "outputId": "9375bf2b-e641-4b01-cc8a-b400c5baf399" }, "execution_count": 43, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "{\n", " \"backend\": \"numpy\",\n", " \"build\": {\n", " \"create\": \"2023-05-16T13:38:32Z\",\n", " \"python\": \"3.10.11\",\n", " \"settings\": {\n", " \"numpy\": \"1.22.4\"\n", " },\n", " \"system\": \"Linux (x86_64)\",\n", " \"txtai\": \"5.6.0\"\n", " },\n", " \"dimensions\": 384,\n", " \"offset\": 25000,\n", " \"path\": \"sentence-transformers/all-MiniLM-L6-v2\",\n", " \"update\": \"2023-05-16T13:38:32Z\"\n", "}\n" ] } ] }, { "cell_type": "markdown", "source": [ "The embeddings instance above vectorizes the text and stores the content as a NumPy array. Array index positions are returned with similarity scores. While the same can easily be done using sentence-transformers, using the txtai framework makes it easy to swap out different options as seen next." ], "metadata": { "id": "NkHMOoE9L4Nw" } }, { "cell_type": "markdown", "source": [ "# SQLite and NumPy\n", "\n", "The next combination we'll test is a SQLite database with a NumPy array." ], "metadata": { "id": "AtEdP7Utw3mk" } }, { "cell_type": "code", "source": [ "# Create embeddings instance\n", "embeddings = Embeddings({\"path\": \"sentence-transformers/all-MiniLM-L6-v2\", \"content\": \"sqlite\", \"backend\": \"numpy\"})\n", "\n", "# Index data\n", "embeddings.index((x, text, None) for x, text in enumerate(dataset))" ], "metadata": { "id": "DPWrubv5oOn7" }, "execution_count": 44, "outputs": [] }, { "cell_type": "markdown", "source": [ "Now let's run a search." ], "metadata": { "id": "SDaDLMyXLGe1" } }, { "cell_type": "code", "source": [ "embeddings.search(\"red sox\")" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "ILSfWHxVHex0", "outputId": "f4cc3f44-da63-4be9-9187-58386fad58df" }, "execution_count": 45, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "[{'id': '19831',\n", " 'text': 'Boston Red Sox Team Report - September 6 (Sports Network) - Two of the top teams in the American League tangle in a possible American League Division Series preview tonight, as the West-leading Oakland Athletics host the wild card-leading Boston Red Sox for the first of a three-game set at the ',\n", " 'score': 0.6780003309249878},\n", " {'id': '18302',\n", " 'text': 'BASEBALL: RED-HOT SOX CLIP THE ANGELS #39; WINGS BOSTON RED SOX fans are enjoying their best week of the season. While their beloved team swept wild-card rivals Anaheim in a three-game series to establish a nine-game winning streak, the hated New York Yankees endured the heaviest loss in their history.',\n", " 'score': 0.6639199256896973},\n", " {'id': '16370',\n", " 'text': 'Boston Red Sox Team Report - September 1 (Sports Network) - The red-hot Boston Red Sox hope to continue rolling as they continue their three-game set with the Anaheim Angels this evening at Fenway Park.',\n", " 'score': 0.6617192029953003}]" ] }, "metadata": {}, "execution_count": 45 } ] }, { "cell_type": "code", "source": [ "embeddings.info()" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "0IuVqFxUMwe8", "outputId": "fa13d767-d549-4b9c-f63e-7e09b05159ee" }, "execution_count": 46, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "{\n", " \"backend\": \"numpy\",\n", " \"build\": {\n", " \"create\": \"2023-05-16T13:38:52Z\",\n", " \"python\": \"3.10.11\",\n", " \"settings\": {\n", " \"numpy\": \"1.22.4\"\n", " },\n", " \"system\": \"Linux (x86_64)\",\n", " \"txtai\": \"5.6.0\"\n", " },\n", " \"content\": \"sqlite\",\n", " \"dimensions\": 384,\n", " \"offset\": 25000,\n", " \"path\": \"sentence-transformers/all-MiniLM-L6-v2\",\n", " \"update\": \"2023-05-16T13:38:52Z\"\n", "}\n" ] } ] }, { "cell_type": "markdown", "source": [ "Same results as before. The only difference is the content is now available via the associated SQLite database. \n", "\n", "Let's inspect the ANN object to see how it looks. " ], "metadata": { "id": "B_XnpIpXNKSP" } }, { "cell_type": "code", "source": [ "print(embeddings.ann.backend.shape)\n", "print(type(embeddings.ann.backend))" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "c2FVQlxSLKgP", "outputId": "1b8e454d-c5f7-4d58-b2b0-a1a9eaca4b9b" }, "execution_count": 47, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "(25000, 384)\n", "\n" ] } ] }, { "cell_type": "markdown", "source": [ "As expected, it's a NumPy array. Let's calculate how long a search query takes to execute.\n" ], "metadata": { "id": "00dnum6fNNM0" } }, { "cell_type": "code", "source": [ "timer(embeddings)" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "JvodDi4w6JxS", "outputId": "3671753d-0dd1-47fe-bef6-04841204cb6b" }, "execution_count": 48, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "0.028768999292000445 seconds per query\n" ] } ] }, { "cell_type": "markdown", "source": [ "Not too bad at all!\n", "\n" ], "metadata": { "id": "eqom3l_87jFv" } }, { "cell_type": "markdown", "source": [ "# SQLite and PyTorch\n", "\n", "Let's now try a PyTorch backend." ], "metadata": { "id": "Y54lSbQd5rzy" } }, { "cell_type": "code", "source": [ "# Create embeddings instance\n", "embeddings = Embeddings({\"path\": \"sentence-transformers/all-MiniLM-L6-v2\", \"content\": \"sqlite\", \"backend\": \"torch\"})\n", "\n", "# Index data\n", "embeddings.index((x, text, None) for x, text in enumerate(dataset))" ], "metadata": { "id": "OYAqPoTmNaNN" }, "execution_count": 49, "outputs": [] }, { "cell_type": "markdown", "source": [ "Let's run a search again." ], "metadata": { "id": "DT52loQU7zmt" } }, { "cell_type": "code", "source": [ "embeddings.search(\"red sox\")" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "zvlrEunM7vi4", "outputId": "e591495f-0622-4291-fbbd-cbe655f92a07" }, "execution_count": 50, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "[{'id': '19831',\n", " 'text': 'Boston Red Sox Team Report - September 6 (Sports Network) - Two of the top teams in the American League tangle in a possible American League Division Series preview tonight, as the West-leading Oakland Athletics host the wild card-leading Boston Red Sox for the first of a three-game set at the ',\n", " 'score': 0.678000271320343},\n", " {'id': '18302',\n", " 'text': 'BASEBALL: RED-HOT SOX CLIP THE ANGELS #39; WINGS BOSTON RED SOX fans are enjoying their best week of the season. While their beloved team swept wild-card rivals Anaheim in a three-game series to establish a nine-game winning streak, the hated New York Yankees endured the heaviest loss in their history.',\n", " 'score': 0.6639199256896973},\n", " {'id': '16370',\n", " 'text': 'Boston Red Sox Team Report - September 1 (Sports Network) - The red-hot Boston Red Sox hope to continue rolling as they continue their three-game set with the Anaheim Angels this evening at Fenway Park.',\n", " 'score': 0.6617191433906555}]" ] }, "metadata": {}, "execution_count": 50 } ] }, { "cell_type": "code", "source": [ "embeddings.info()" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "QmJZb56SM6Up", "outputId": "a6e9b73a-6248-43ca-a57f-c3a4fec9112f" }, "execution_count": 51, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "{\n", " \"backend\": \"torch\",\n", " \"build\": {\n", " \"create\": \"2023-05-16T13:39:19Z\",\n", " \"python\": \"3.10.11\",\n", " \"settings\": {\n", " \"torch\": \"2.0.0+cu118\"\n", " },\n", " \"system\": \"Linux (x86_64)\",\n", " \"txtai\": \"5.6.0\"\n", " },\n", " \"content\": \"sqlite\",\n", " \"dimensions\": 384,\n", " \"offset\": 25000,\n", " \"path\": \"sentence-transformers/all-MiniLM-L6-v2\",\n", " \"update\": \"2023-05-16T13:39:19Z\"\n", "}\n" ] } ] }, { "cell_type": "markdown", "source": [ "And once against inspect the ANN object." ], "metadata": { "id": "jqdMjDiO8Dy3" } }, { "cell_type": "code", "source": [ "print(embeddings.ann.backend.shape)\n", "print(type(embeddings.ann.backend))" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "u5JFEJ-q5Zow", "outputId": "fffb3e3d-2d5c-4f20-877a-cce52836e5f3" }, "execution_count": 52, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "torch.Size([25000, 384])\n", "\n" ] } ] }, { "cell_type": "markdown", "source": [ "As expected, this time the backend is a Torch tensor. Next we'll calculate the average search time." ], "metadata": { "id": "jGwHvEHE6ALO" } }, { "cell_type": "code", "source": [ "timer(embeddings)" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "7aAI_jUm6goL", "outputId": "e4ec71dc-c1d8-40ff-9e68-113a5da1ad1e" }, "execution_count": 53, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "0.0198183048359997 seconds per query\n" ] } ] }, { "cell_type": "markdown", "source": [ "A bit faster since Torch uses the GPU to compute the similarity matrix." ], "metadata": { "id": "mp3nLHz38OIp" } }, { "cell_type": "markdown", "source": [ "# SQLite and Faiss\n", "\n", "Now lets run the same code with the standard txtai settings of Faiss + SQLite." ], "metadata": { "id": "8h3SXoGr9YIL" } }, { "cell_type": "code", "source": [ "# Create embeddings instance\n", "embeddings = Embeddings({\"path\": \"sentence-transformers/all-MiniLM-L6-v2\", \"content\": True})\n", "\n", "# Index data\n", "embeddings.index((x, text, None) for x, text in enumerate(dataset))" ], "metadata": { "id": "DQECU7y-9doj" }, "execution_count": 54, "outputs": [] }, { "cell_type": "code", "source": [ "embeddings.search(\"red sox\")" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "wGqcmE6M9kJW", "outputId": "25c7ef4c-a437-4d64-ebaf-1481d0873ca1" }, "execution_count": 55, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "[{'id': '19831',\n", " 'text': 'Boston Red Sox Team Report - September 6 (Sports Network) - Two of the top teams in the American League tangle in a possible American League Division Series preview tonight, as the West-leading Oakland Athletics host the wild card-leading Boston Red Sox for the first of a three-game set at the ',\n", " 'score': 0.6780003309249878},\n", " {'id': '18302',\n", " 'text': 'BASEBALL: RED-HOT SOX CLIP THE ANGELS #39; WINGS BOSTON RED SOX fans are enjoying their best week of the season. While their beloved team swept wild-card rivals Anaheim in a three-game series to establish a nine-game winning streak, the hated New York Yankees endured the heaviest loss in their history.',\n", " 'score': 0.6639199256896973},\n", " {'id': '16370',\n", " 'text': 'Boston Red Sox Team Report - September 1 (Sports Network) - The red-hot Boston Red Sox hope to continue rolling as they continue their three-game set with the Anaheim Angels this evening at Fenway Park.',\n", " 'score': 0.6617192029953003}]" ] }, "metadata": {}, "execution_count": 55 } ] }, { "cell_type": "code", "source": [ "embeddings.info()" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "zDxqlMT9d-Q3", "outputId": "befdb0a1-01b4-4948-94e2-4570efada3ce" }, "execution_count": 56, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "{\n", " \"backend\": \"faiss\",\n", " \"build\": {\n", " \"create\": \"2023-05-16T13:39:47Z\",\n", " \"python\": \"3.10.11\",\n", " \"settings\": {\n", " \"components\": \"IVF632,Flat\"\n", " },\n", " \"system\": \"Linux (x86_64)\",\n", " \"txtai\": \"5.6.0\"\n", " },\n", " \"content\": true,\n", " \"dimensions\": 384,\n", " \"offset\": 25000,\n", " \"path\": \"sentence-transformers/all-MiniLM-L6-v2\",\n", " \"update\": \"2023-05-16T13:39:47Z\"\n", "}\n" ] } ] }, { "cell_type": "code", "source": [ "timer(embeddings)" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "zlbc43qg9qKb", "outputId": "ae06092c-ff5f-4540-afd5-e054db6ffb84" }, "execution_count": 57, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "0.00825659705599992 seconds per query\n" ] } ] }, { "cell_type": "markdown", "source": [ "Everything lines up with the previous examples. Note that Faiss is faster, given it's a vector index. For 25,000 records, the different is negligible but vector index performance increases rapidly for datasets in the million+ range." ], "metadata": { "id": "j5u1GEbV91GH" } }, { "cell_type": "markdown", "source": [ "# SQLite and HNSW\n", "\n", "While txtai strives to keep things as simple as possible with many common default settings out of the box, customizing the backend options can lead to increased performance. The next example will store vectors in a HNSW index and customize the index options." ], "metadata": { "id": "f4Hnjfy--ye0" } }, { "cell_type": "code", "source": [ "# Create embeddings instance\n", "embeddings = Embeddings({\"path\": \"sentence-transformers/all-MiniLM-L6-v2\", \"content\": True, \"backend\": \"hnsw\", \"hnsw\": {\"m\": 32}})\n", "\n", "# Index data\n", "embeddings.index((x, text, None) for x, text in enumerate(dataset))" ], "metadata": { "id": "5dqxj2hr_ICl" }, "execution_count": 58, "outputs": [] }, { "cell_type": "code", "source": [ "embeddings.search(\"red sox\")" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "pUw-3WCHFGf9", "outputId": "22e827a9-ed34-4847-b41b-70499f85cff0" }, "execution_count": 59, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "[{'id': '19831',\n", " 'text': 'Boston Red Sox Team Report - September 6 (Sports Network) - Two of the top teams in the American League tangle in a possible American League Division Series preview tonight, as the West-leading Oakland Athletics host the wild card-leading Boston Red Sox for the first of a three-game set at the ',\n", " 'score': 0.678000271320343},\n", " {'id': '18302',\n", " 'text': 'BASEBALL: RED-HOT SOX CLIP THE ANGELS #39; WINGS BOSTON RED SOX fans are enjoying their best week of the season. While their beloved team swept wild-card rivals Anaheim in a three-game series to establish a nine-game winning streak, the hated New York Yankees endured the heaviest loss in their history.',\n", " 'score': 0.6639199256896973},\n", " {'id': '16370',\n", " 'text': 'Boston Red Sox Team Report - September 1 (Sports Network) - The red-hot Boston Red Sox hope to continue rolling as they continue their three-game set with the Anaheim Angels this evening at Fenway Park.',\n", " 'score': 0.6617191433906555}]" ] }, "metadata": {}, "execution_count": 59 } ] }, { "cell_type": "code", "source": [ "embeddings.info()" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "u5DAfB1MeCLF", "outputId": "d00dec7f-e1da-49f1-afd3-aa852238769f" }, "execution_count": 60, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "{\n", " \"backend\": \"hnsw\",\n", " \"build\": {\n", " \"create\": \"2023-05-16T13:40:21Z\",\n", " \"python\": \"3.10.11\",\n", " \"settings\": {\n", " \"efconstruction\": 200,\n", " \"m\": 32,\n", " \"seed\": 100\n", " },\n", " \"system\": \"Linux (x86_64)\",\n", " \"txtai\": \"5.6.0\"\n", " },\n", " \"content\": true,\n", " \"deletes\": 0,\n", " \"dimensions\": 384,\n", " \"hnsw\": {\n", " \"m\": 32\n", " },\n", " \"metric\": \"ip\",\n", " \"offset\": 25000,\n", " \"path\": \"sentence-transformers/all-MiniLM-L6-v2\",\n", " \"update\": \"2023-05-16T13:40:21Z\"\n", "}\n" ] } ] }, { "cell_type": "code", "source": [ "timer(embeddings)" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "Z3Qg6EEjFIom", "outputId": "49d01a91-5d53-4792-8244-316b6e79cc20" }, "execution_count": 61, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "0.006280380824000531 seconds per query\n" ] } ] }, { "cell_type": "markdown", "source": [ "Once again, everything matches up with the previous examples. There is a negligible performance difference vs Faiss.\n", "\n", "Hnswlib powers a number of popular vector databases. It's definitely an option worth evaluating." ], "metadata": { "id": "JREMWY5NHAX-" } }, { "cell_type": "markdown", "source": [ "# External Vectorization\n", "\n", "txtai has a number of built-in vectorizers backed by Hugging Face Transformers and Sentence Transformers. Just like other txtai modules, vectorization can also be customized.\n", "\n", "The next example uses the Hugging Face Inference API to vectorize text.\n" ], "metadata": { "id": "Wuj_gZeBs57O" } }, { "cell_type": "code", "source": [ "import numpy as np\n", "import requests\n", "\n", "BASE = \"https://api-inference.huggingface.co/pipeline/feature-extraction\"\n", "\n", "def transform(inputs):\n", " # Your API provider of choice\n", " response = requests.post(f\"{BASE}/sentence-transformers/all-MiniLM-L6-v2\", json={\"inputs\": inputs})\n", " return np.array(response.json(), dtype=np.float32)\n", "\n", "embeddings = Embeddings({\"transform\": transform, \"backend\": \"numpy\", \"content\": True})\n", "embeddings.index([(0, \"sunny\", None), (1, \"rainy\", None)])\n", "embeddings.search(\"nice day\") " ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "FAhg1TcdtWIJ", "outputId": "22e34bd0-b6d3-40b2-8948-d08397d744f5" }, "execution_count": 62, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "[{'id': '0', 'text': 'sunny', 'score': 0.28077083826065063},\n", " {'id': '1', 'text': 'rainy', 'score': 0.18051263689994812}]" ] }, "metadata": {}, "execution_count": 62 } ] }, { "cell_type": "markdown", "source": [ "# Configuration storage\n", "\n", "Configuration is passed to an embeddings instance as a dictionary. When saving an embeddings instance, the default behavior is to save configuration as a pickled object. JSON can alternatively be used." ], "metadata": { "id": "RvHkAloSl4y3" } }, { "cell_type": "code", "source": [ "# Create embeddings instance\n", "embeddings = Embeddings({\"path\": \"sentence-transformers/all-MiniLM-L6-v2\", \"content\": True, \"format\": \"json\"})\n", "\n", "# Index data\n", "embeddings.index((x, text, None) for x, text in enumerate(dataset))\n", "\n", "# Save embeddings\n", "embeddings.save(\"index\")\n", "\n", "!cat index/config.json" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "X1DYuyPmmSgU", "outputId": "975c0f7f-f38e-478e-a8b8-16155f1f4865" }, "execution_count": 63, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "{\n", " \"path\": \"sentence-transformers/all-MiniLM-L6-v2\",\n", " \"content\": true,\n", " \"format\": \"json\",\n", " \"dimensions\": 384,\n", " \"backend\": \"faiss\",\n", " \"offset\": 25000,\n", " \"build\": {\n", " \"create\": \"2023-05-16T13:40:49Z\",\n", " \"python\": \"3.10.11\",\n", " \"settings\": {\n", " \"components\": \"IVF632,Flat\"\n", " },\n", " \"system\": \"Linux (x86_64)\",\n", " \"txtai\": \"5.6.0\"\n", " },\n", " \"update\": \"2023-05-16T13:40:49Z\"\n", "}" ] } ] }, { "cell_type": "markdown", "source": [ "Looking at the stored configuration, it's almost identical to an `embeddings.info()` call. This is by design, JSON configuration is designed to be human-readable. This is a good option when sharing an embeddings database on the [Hugging Face Hub](https://huggingface.co/models)." ], "metadata": { "id": "ETdcrP7dqI8J" } }, { "cell_type": "markdown", "source": [ "# SQLite vs DuckDB\n", "\n", "The last thing we'll explore is the database backend.\n", "\n", "[SQLite](https://sqlite.org/index.html) is a row-oriented database, [DuckDB](https://duckdb.org/) is column-oriented. This design difference is important to note and a factor to consider when evaluating the expected workload. Let's explore." ], "metadata": { "id": "z6zmhGRVHawG" } }, { "cell_type": "code", "source": [ "# Create embeddings instance\n", "embeddings = Embeddings({\"path\": \"sentence-transformers/all-MiniLM-L6-v2\", \"content\": \"sqlite\"})\n", "\n", "# Index data\n", "embeddings.index((x, text, None) for x, text in enumerate(dataset))" ], "metadata": { "id": "KZ-x_53SHsNK" }, "execution_count": 64, "outputs": [] }, { "cell_type": "code", "source": [ "timer(embeddings, \"SELECT text FROM txtai where id = 3980\")" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "LQZCGu-9H70K", "outputId": "06e41f55-687d-4d98-e194-13efdd2991ef" }, "execution_count": 65, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "0.00012401376399975562 seconds per query\n" ] } ] }, { "cell_type": "code", "source": [ "timer(embeddings, \"SELECT count(*), text FROM txtai group by text order by count(*) desc\")" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "SwgR2TwPHvdP", "outputId": "d88a457a-7789-4881-9420-914e2992d132" }, "execution_count": 66, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "0.03863514600000053 seconds per query\n" ] } ] }, { "cell_type": "code", "source": [ "# Create embeddings instance\n", "embeddings = Embeddings({\"path\": \"sentence-transformers/all-MiniLM-L6-v2\", \"content\": \"duckdb\"})\n", "\n", "# Index data\n", "embeddings.index((x, text, None) for x, text in enumerate(dataset))" ], "metadata": { "id": "ZdrLBOmaIKbF" }, "execution_count": 67, "outputs": [] }, { "cell_type": "code", "source": [ "timer(embeddings, \"SELECT text FROM txtai where id = 3980\")" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "cce1PAVqIMpU", "outputId": "51b8749b-7a87-42d0-af17-2710f8460c68" }, "execution_count": 68, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "0.0038918176440001844 seconds per query\n" ] } ] }, { "cell_type": "code", "source": [ "timer(embeddings, \"SELECT count(*), text FROM txtai group by text order by count(*) desc\")" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "lxBfoh3TINmE", "outputId": "f56ed46b-abb6-4f6f-e986-10ec570e8cbe" }, "execution_count": 69, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "0.0198518766039997 seconds per query\n" ] } ] }, { "cell_type": "markdown", "source": [ "While the dataset of 25,000 rows is small, we can start to see the differences. SQLite has a much faster single row retrieval time. DuckDB does better with an aggregate query. This is a product of a row-oriented vs column oriented database and a factor to consider when developing a solution." ], "metadata": { "id": "_hBm-yZTJtUQ" } }, { "cell_type": "markdown", "source": [ "# Wrapping up\n", "\n", "This notebook explored different combinations of database and vector index backends. With modern hardware, it's amazing how far a single node index can take us. Easily into the hundreds of millions and even billions of records. When a hardware bottleneck becomes an issue, external vector databases are one option to consider. Another is [building a distributed txtai embeddings cluster](https://neuml.github.io/txtai/api/cluster/).\n", "\n", "There is power in simplicity. Many paid services try to convince us that signing up for an API account is the best place to start. In some cases, such as teams with very few to no developers, this is true. But for teams with developers, options like txtai should be evaluated." ], "metadata": { "id": "4L8smyyXc8q8" } } ] } ================================================ FILE: examples/46_Whats_new_in_txtai_6_0.ipynb ================================================ [File too large to display: 28.2 KB] ================================================ FILE: examples/47_Building_an_efficient_sparse_keyword_index_in_Python.ipynb ================================================ { "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "provenance": [] }, "kernelspec": { "name": "python3", "display_name": "Python 3" }, "language_info": { "name": "python" } }, "cells": [ { "cell_type": "markdown", "source": [ "# Building an efficient sparse keyword index in Python\n", "\n", "Semantic search is a new category of search built on recent advances in Natural Language Processing (NLP). Traditional search systems use keywords to find data. Semantic search has an understanding of natural language and identifies results that have the same meaning, not necessarily the same keywords.\n", "\n", "While semantic search adds amazing capabilities, sparse keyword indexes can still add value. There may be cases where finding an exact match is important or we just want a fast index to quickly do an initial scan of a dataset.\n", "\n", "Unfortunately, there aren't a ton of great options for a local Python-based keyword index library. Most of the options available don't scale and/or are highly inefficient, designed only for simple situations.\n", "\n", "Given that Python is an interpreted language, it often gets a bad rap from a performance standpoint. In some cases, it's justified as Python can be memory hungry and has a global interpreter lock (GIL) that forces single thread execution. But it is possible to build performant Python on par with other languages.\n", "\n", "This notebook will explore how to build an efficient sparse keyword index in Python and compare the results with other approaches." ], "metadata": { "id": "v4J3FxbUn9CT" } }, { "cell_type": "markdown", "source": [ "# Install dependencies\n", "\n", "Install `txtai` and all dependencies." ], "metadata": { "id": "W70a-UjTdDiA" } }, { "cell_type": "code", "source": [ "%%capture\n", "!pip install txtai pytrec_eval rank-bm25 elasticsearch==7.10.1\n", "!pip uninstall -y tensorflow" ], "metadata": { "id": "nfgwb14J4LO2" }, "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "source": [ "# Introducing the problem\n", "\n", "At a high level, keyword indexes work by tokenizing text into lists of tokens per document. These tokens are aggregated into frequencies per document and stored in term frequency sparse arrays.\n", "\n", "The term frequency arrays are sparse given that they only store a frequency when the token exists in a document. For example, if a token exists in 1 of 1000 documents, the sparse array only has a single entry. A dense array stores 1000 entries all with zeros except for one.\n", "\n", "One simple approach to store a term frequency sparse array in Python would be having a dictionary of `{id: frequency}` per token. The problem with this approach is that Python has significant object overhead.\n", "\n", "Let's inspect the size used for a single number." ], "metadata": { "id": "vF3hlZGkqMlh" } }, { "cell_type": "code", "source": [ "import sys\n", "\n", "a = 100\n", "sys.getsizeof(a)" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "nGX8FMTcqn2c", "outputId": "8580d98b-1901-49a5-d81c-8a2827257d3c" }, "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "28" ] }, "metadata": {}, "execution_count": 2 } ] }, { "cell_type": "markdown", "source": [ "28 bytes for a single integer. Compared to a native int/long which is 4 or 8 bytes, this is quite wasteful. Imagine having thousands of `id: frequency` mappings. Memory usage will grow fast.\n", "\n", "Let's demonstrate. The code below runs a self contained Python process that creates a list of 10 million numbers.\n", "\n", "Running as a separate process helps calculate more accurate memory usage stats." ], "metadata": { "id": "99ixudd1q7jB" } }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "e_cBVXU-jYDQ", "outputId": "ba6e00a3-6acb-4f2e-c985-719eb0d1d36e" }, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Writing arrays.py\n" ] } ], "source": [ "%%writefile arrays.py\n", "import psutil\n", "\n", "results = []\n", "for x in range(int(1e7)):\n", " results.append(x)\n", "\n", "print(f\"MEMORY USAGE = {psutil.Process().memory_info().rss / (1024 * 1024)} MB\")" ] }, { "cell_type": "code", "source": [ "!python arrays.py" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "IJaAjPbGnGya", "outputId": "adab040c-b7de-4d2f-83dc-5ec0f867a993" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "MEMORY USAGE = 394.640625 MB\n" ] } ] }, { "cell_type": "markdown", "source": [ "Approximately 395 MB of memory is used for this array. That seems high." ], "metadata": { "id": "bRrR8eOlvBRD" } }, { "cell_type": "markdown", "source": [ "# Efficient numeric arrays in Python\n", "\n", "Fortunately, Python has a module for building [efficient arrays of numeric values](https://docs.python.org/3/library/array.html). This module enables building arrays with the same native type.\n", "\n", "Let's try doing that with a `long long` type, which takes 8 bytes." ], "metadata": { "id": "C1fA62VGwLya" } }, { "cell_type": "code", "source": [ "%%writefile arrays.py\n", "from array import array\n", "\n", "import psutil\n", "\n", "results = array(\"q\")\n", "for x in range(int(1e7)):\n", " results.append(x)\n", "\n", "print(f\"MEMORY USAGE = {psutil.Process().memory_info().rss / (1024 * 1024)} MB\")" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "bqtUmEj4kjOQ", "outputId": "736c3780-fed3-458c-be38-49feb42f416b" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Overwriting arrays.py\n" ] } ] }, { "cell_type": "code", "source": [ "!python arrays.py" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "5lgHsgQqnI5q", "outputId": "726c596b-9896-4e8d-baa7-04cb8892bdea" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "MEMORY USAGE = 88.54296875 MB\n" ] } ] }, { "cell_type": "markdown", "source": [ "As we can see, memory usage went from 395 MB to 89 MB. That's a 4x reduction which is in line with the earlier calculate of 28 bytes/number vs 8 bytes/number." ], "metadata": { "id": "HS_uKPRhv2mV" } }, { "cell_type": "markdown", "source": [ "# Efficient processing of numeric data\n", "\n", "Large computations in pure Python can also be painfully slow. Luckily, there is a robust landscape of options for numeric processing. The most popular framework is [NumPy](https://github.com/numpy/numpy). There is also [PyTorch](https://github.com/pytorch/pytorch) and other GPU-based tensor processing frameworks.\n", "\n", "Below is a simple example that sorts an array in Python vs NumPy to demonstrate." ], "metadata": { "id": "vGEjWEaGwX9s" } }, { "cell_type": "code", "source": [ "import random\n", "import time\n", "\n", "data = [random.randint(1, 500) for x in range(1000000)]\n", "\n", "start = time.time()\n", "sorted(data, reverse=True)\n", "print(time.time() - start)" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "Ggln2VOSw5tY", "outputId": "6bcbf8b2-01b1-41a3-bcad-f73f7220ca17" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "0.33922290802001953\n" ] } ] }, { "cell_type": "code", "source": [ "import numpy as np\n", "\n", "data = np.array(data)\n", "\n", "start = time.time()\n", "np.sort(data)[::-1]\n", "print(time.time() - start)" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "IV-FhI4UxkV2", "outputId": "ac3d33d1-c8e3-4ae0-f6fa-fd619e81dd5a" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "0.10296249389648438\n" ] } ] }, { "cell_type": "markdown", "source": [ "As we can see, sorting an array in NumPy is significantly faster. It might not seem like a lot but this adds up when run in bulk." ], "metadata": { "id": "sejHtgn1zBjk" } }, { "cell_type": "markdown", "source": [ "# Sparse keyword indexes in txtai\n", "\n", "Now that we've discussed the key performance concepts, let's talk about how to apply this to building sparse keyword indexes.\n", "\n", "Going back to the original approach for a term frequency sparse array, we see that using the Python array package is more efficient. In txtai, this method is used to build term frequency arrays for each token. This results in near native speed and memory usage.\n", "\n", "The search method uses a number of NumPy methods to efficiently calculate query term matches. Each query is tokenized and those token term frequency arrays are retrieved to calculate query scores. These NumPy methods are all written in C and often drop the GIL. So once again, near native speed and the ability to use multithreading.\n", "\n", "Read the [full implementation on GitHub](https://github.com/neuml/txtai/blob/master/src/python/txtai/scoring/terms.py) to learn more.\n" ], "metadata": { "id": "gPeTqCflzP5B" } }, { "cell_type": "markdown", "source": [ "# Evaluating performance\n", "\n", "First, a review of the landscape. As said in the introduction, there aren't a ton of good options. [Apache Lucene](https://github.com/apache/lucene) is by far the best traditional search index from a speed, performance and functionality standpoint. It's the base for Elasticsearch/OpenSearch and many other projects. But it requires Java.\n", "\n", "Here are the options we'll explore.\n", "\n", "- [Rank-BM25](https://github.com/dorianbrown/rank_bm25) project, the top result when searching for `python bm25`.\n", "\n", "- [SQLite FTS5](https://www.sqlite.org/fts5.html) extension. This extension builds a sparse keyword index right in SQLite.\n", "\n", "We'll use the BEIR dataset. We'll also use a [benchmarks script](https://raw.githubusercontent.com/neuml/txtai/master/examples/benchmarks.py) from the txtai project. This benchmarks script has methods to work with the BEIR dataset.\n", "\n", "Couple important caveats on the benchmarks script.\n", "\n", "- For the SQLite FTS implementation, each token is joined together with an `OR` clause. SQLite FTS [implicitly joins clauses together](https://www.sqlite.org/fts5.html) with `AND` clauses by default. By contrast, [Lucene's default operator](https://lucene.apache.org/core/9_7_0/queryparser/org/apache/lucene/queryparser/classic/package-summary.html#Boolean_operators) is an `OR`.\n", "- The Elasticsearch implementation uses 7.x as it's simpler to instantiate in a notebook.\n", "- All methods except Elasticsearch use txtai's [unicode tokenizer](https://github.com/neuml/txtai/blob/master/src/python/txtai/pipeline/data/tokenizer.py) to tokenize text for consistency" ], "metadata": { "id": "rKCRLFNh39hV" } }, { "cell_type": "code", "source": [ "%%capture\n", "import os\n", "\n", "# Get benchmarks script\n", "os.system(\"wget https://raw.githubusercontent.com/neuml/txtai/master/examples/benchmarks.py\")\n", "\n", "# Create output directory\n", "os.makedirs(\"beir\", exist_ok=True)\n", "\n", "# Download subset of BEIR datasets\n", "datasets = [\"trec-covid\", \"nfcorpus\", \"webis-touche2020\", \"scidocs\", \"scifact\"]\n", "for dataset in datasets:\n", " url = f\"https://public.ukp.informatik.tu-darmstadt.de/thakur/BEIR/datasets/{dataset}.zip\"\n", " os.system(f\"wget {url}\")\n", " os.system(f\"mv {dataset}.zip beir\")\n", " os.system(f\"unzip -d beir beir/{dataset}.zip\")\n", "\n", " # Remove existing benchmark data\n", "if os.path.exists(\"benchmarks.json\"):\n", " os.remove(\"benchmarks.json\")" ], "metadata": { "id": "IGKzkKWB60pg" }, "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "source": [ "Now let's run the benchmarks." ], "metadata": { "id": "SEH7Og8LiWRd" } }, { "cell_type": "code", "source": [ "# Remove existing benchmark data\n", "if os.path.exists(\"benchmarks.json\"):\n", " os.remove(\"benchmarks.json\")\n", "\n", "# Runs benchmark evaluation\n", "def evaluate(method):\n", " for dataset in datasets:\n", " command = f\"python benchmarks.py beir {dataset} {method}\"\n", " print(command)\n", " os.system(command)\n", "\n", "# Calculate benchmarks\n", "for method in [\"bm25\", \"rank\", \"sqlite\"]:\n", " evaluate(method)" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "Hfpok07_5N1m", "outputId": "190d6821-7ff2-4c25-d8ef-5c0ec0b6b04f" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "python benchmarks.py beir trec-covid bm25\n", "python benchmarks.py beir nfcorpus bm25\n", "python benchmarks.py beir webis-touche2020 bm25\n", "python benchmarks.py beir scidocs bm25\n", "python benchmarks.py beir scifact bm25\n", "python benchmarks.py beir trec-covid rank\n", "python benchmarks.py beir nfcorpus rank\n", "python benchmarks.py beir webis-touche2020 rank\n", "python benchmarks.py beir scidocs rank\n", "python benchmarks.py beir scifact rank\n", "python benchmarks.py beir trec-covid sqlite\n", "python benchmarks.py beir nfcorpus sqlite\n", "python benchmarks.py beir webis-touche2020 sqlite\n", "python benchmarks.py beir scidocs sqlite\n", "python benchmarks.py beir scifact sqlite\n" ] } ] }, { "cell_type": "code", "source": [ "import json\n", "import pandas as pd\n", "\n", "def benchmarks():\n", " # Read JSON lines data\n", " with open(\"benchmarks.json\") as f:\n", " data = f.read()\n", "\n", " df = pd.read_json(data, lines=True).sort_values(by=[\"source\", \"search\"])\n", " return df[[\"source\", \"method\", \"index\", \"memory\", \"search\", \"ndcg_cut_10\", \"map_cut_10\", \"recall_10\", \"P_10\"]].reset_index(drop=True)\n", "\n", "# Load benchmarks dataframe\n", "df = benchmarks()" ], "metadata": { "id": "cpmNpwag73DW" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "df[df.source == \"trec-covid\"].reset_index(drop=True)" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 143 }, "id": "ln4oUAfgLas4", "outputId": "3e3249df-600a-444a-c46e-b839d215ef83" }, "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ " source method index memory search ndcg_cut_10 map_cut_10 \\\n", "0 trec-covid bm25 101.96 997 0.28 0.58119 0.01247 \n", "1 trec-covid sqlite 60.16 880 23.09 0.56778 0.01190 \n", "2 trec-covid rank 61.75 3245 75.49 0.57773 0.01210 \n", "\n", " recall_10 P_10 \n", "0 0.01545 0.618 \n", "1 0.01519 0.610 \n", "2 0.01550 0.632 " ], "text/html": [ "\n", "\n", "
\n", "
\n", "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
sourcemethodindexmemorysearchndcg_cut_10map_cut_10recall_10P_10
0trec-covidbm25101.969970.280.581190.012470.015450.618
1trec-covidsqlite60.1688023.090.567780.011900.015190.610
2trec-covidrank61.75324575.490.577730.012100.015500.632
\n", "
\n", " \n", "\n", "\n", "\n", "
\n", " \n", "
\n", "\n", "\n", "\n", " \n", "\n", "\n", " \n", " \n", "\n", " \n", "
\n", "
\n" ] }, "metadata": {}, "execution_count": 12 } ] }, { "cell_type": "code", "source": [ "df[df.source == \"nfcorpus\"].reset_index(drop=True)" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 143 }, "id": "bSx6dXhLM66g", "outputId": "504f47de-e2ca-4837-a158-4f0f0704f08b" }, "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ " source method index memory search ndcg_cut_10 map_cut_10 \\\n", "0 nfcorpus bm25 2.64 648 1.08 0.30639 0.11728 \n", "1 nfcorpus sqlite 1.50 630 12.73 0.30695 0.11785 \n", "2 nfcorpus rank 2.75 700 23.78 0.30692 0.11711 \n", "\n", " recall_10 P_10 \n", "0 0.14891 0.21734 \n", "1 0.14871 0.21641 \n", "2 0.15320 0.21889 " ], "text/html": [ "\n", "\n", "
\n", "
\n", "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
sourcemethodindexmemorysearchndcg_cut_10map_cut_10recall_10P_10
0nfcorpusbm252.646481.080.306390.117280.148910.21734
1nfcorpussqlite1.5063012.730.306950.117850.148710.21641
2nfcorpusrank2.7570023.780.306920.117110.153200.21889
\n", "
\n", " \n", "\n", "\n", "\n", "
\n", " \n", "
\n", "\n", "\n", "\n", " \n", "\n", "\n", " \n", " \n", "\n", " \n", "
\n", "
\n" ] }, "metadata": {}, "execution_count": 13 } ] }, { "cell_type": "code", "source": [ "df[df.source == \"webis-touche2020\"].reset_index(drop=True)" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 143 }, "id": "W-hAhuYHNK_6", "outputId": "6c28d842-f572-454b-86f3-a19783640757" }, "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ " source method index memory search ndcg_cut_10 map_cut_10 \\\n", "0 webis-touche2020 bm25 374.66 1137 0.37 0.36920 0.14588 \n", "1 webis-touche2020 sqlite 220.46 1416 34.61 0.37194 0.14812 \n", "2 webis-touche2020 rank 224.07 10347 81.22 0.39861 0.16492 \n", "\n", " recall_10 P_10 \n", "0 0.22736 0.34694 \n", "1 0.22890 0.35102 \n", "2 0.23770 0.36122 " ], "text/html": [ "\n", "\n", "
\n", "
\n", "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
sourcemethodindexmemorysearchndcg_cut_10map_cut_10recall_10P_10
0webis-touche2020bm25374.6611370.370.369200.145880.227360.34694
1webis-touche2020sqlite220.46141634.610.371940.148120.228900.35102
2webis-touche2020rank224.071034781.220.398610.164920.237700.36122
\n", "
\n", " \n", "\n", "\n", "\n", "
\n", " \n", "
\n", "\n", "\n", "\n", " \n", "\n", "\n", " \n", " \n", "\n", " \n", "
\n", "
\n" ] }, "metadata": {}, "execution_count": 14 } ] }, { "cell_type": "code", "source": [ "df[df.source == \"scidocs\"].reset_index(drop=True)" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 143 }, "id": "ln7p-b9XNPmO", "outputId": "26a53b7f-a047-4062-b7a9-45d0b27dbef9" }, "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ " source method index memory search ndcg_cut_10 map_cut_10 recall_10 \\\n", "0 scidocs bm25 17.95 717 1.64 0.15063 0.08756 0.15637 \n", "1 scidocs sqlite 17.85 670 56.64 0.15156 0.08822 0.15717 \n", "2 scidocs rank 13.11 1056 162.99 0.14932 0.08670 0.15408 \n", "\n", " P_10 \n", "0 0.0772 \n", "1 0.0776 \n", "2 0.0761 " ], "text/html": [ "\n", "\n", "
\n", "
\n", "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
sourcemethodindexmemorysearchndcg_cut_10map_cut_10recall_10P_10
0scidocsbm2517.957171.640.150630.087560.156370.0772
1scidocssqlite17.8567056.640.151560.088220.157170.0776
2scidocsrank13.111056162.990.149320.086700.154080.0761
\n", "
\n", " \n", "\n", "\n", "\n", "
\n", " \n", "
\n", "\n", "\n", "\n", " \n", "\n", "\n", " \n", " \n", "\n", " \n", "
\n", "
\n" ] }, "metadata": {}, "execution_count": 15 } ] }, { "cell_type": "code", "source": [ "df[df.source == \"scifact\"].reset_index(drop=True)" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 143 }, "id": "CsHEwmV0NTjm", "outputId": "591030c6-57fb-4f06-c133-9ab0fef3646e" }, "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ " source method index memory search ndcg_cut_10 map_cut_10 recall_10 \\\n", "0 scifact bm25 5.51 653 1.07 0.66324 0.61764 0.78761 \n", "1 scifact sqlite 1.85 631 20.28 0.66630 0.61966 0.79494 \n", "2 scifact rank 1.85 724 42.22 0.65618 0.61204 0.77400 \n", "\n", " P_10 \n", "0 0.087 \n", "1 0.088 \n", "2 0.085 " ], "text/html": [ "\n", "\n", "
\n", "
\n", "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
sourcemethodindexmemorysearchndcg_cut_10map_cut_10recall_10P_10
0scifactbm255.516531.070.663240.617640.787610.087
1scifactsqlite1.8563120.280.666300.619660.794940.088
2scifactrank1.8572442.220.656180.612040.774000.085
\n", "
\n", " \n", "\n", "\n", "\n", "
\n", " \n", "
\n", "\n", "\n", "\n", " \n", "\n", "\n", " \n", " \n", "\n", " \n", "
\n", "
\n" ] }, "metadata": {}, "execution_count": 16 } ] }, { "cell_type": "markdown", "source": [ "The sections above show the metrics per source and method.\n", "\n", "The table headers list the `source (dataset)`, `index method`, `index time(s)`, `memory usage(MB)`, `search time(s)` and `NDCG@10`/`MAP@10`/`RECALL@10`/`P@10` accuracy metrics. The tables are sorted by `search time`.\n", "\n", "As we can see, txtai's implementation has the fastest search times across the board. But it is slower when it comes to index time. The accuracy metrics vary slightly but are all about the same per method.\n", "\n", "Memory usage stands out. SQLite and txtai both have around the same usage per source. Rank-BM25 memory usage can get out of hand fast. For example, `webis-touch2020`, which is only ~400K records, uses `10 GB` of memory compared to `700 MB` for the other implementations." ], "metadata": { "id": "tU1eFDZUh0NQ" } }, { "cell_type": "markdown", "source": [ "# Compare with Elasticsearch\n", "\n", "Now that we've reviewed methods to build keyword indexes in Python, let's see how txtai's sparse keyword index compares to Elasticsearch.\n", "\n", "We'll spin up an inline instance and run the same evaluations." ], "metadata": { "id": "_9tn39MN0LV9" } }, { "cell_type": "code", "source": [ "%%capture\n", "# Download and extract elasticsearch\n", "os.system(\"wget https://artifacts.elastic.co/downloads/elasticsearch/elasticsearch-7.10.1-linux-x86_64.tar.gz\")\n", "os.system(\"tar -xzf elasticsearch-7.10.1-linux-x86_64.tar.gz\")\n", "os.system(\"chown -R daemon:daemon elasticsearch-7.10.1\")" ], "metadata": { "id": "GZu0nj_R_NqB" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "from subprocess import Popen, PIPE, STDOUT\n", "\n", "# Start and wait for server\n", "server = Popen(['elasticsearch-7.10.1/bin/elasticsearch'], stdout=PIPE, stderr=STDOUT, preexec_fn=lambda: os.setuid(1))\n", "!sleep 30" ], "metadata": { "id": "SsQsr-my_Poy" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "# Add benchmark evaluations for Elasticsearch\n", "evaluate(\"es\")\n", "\n", "# Reload benchmarks dataframe\n", "df = benchmarks()" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "QSnpA2sjA5X0", "outputId": "d4805ee9-1e3c-4fec-8f73-9b30edd4854d" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "python benchmarks.py beir trec-covid es\n", "python benchmarks.py beir nfcorpus es\n", "python benchmarks.py beir webis-touche2020 es\n", "python benchmarks.py beir scidocs es\n", "python benchmarks.py beir scifact es\n" ] } ] }, { "cell_type": "code", "source": [ "df[df.source == \"trec-covid\"].reset_index(drop=True)" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 175 }, "id": "zAZolShYaXyf", "outputId": "1a338a45-799c-483c-83a4-037b8e8c1780" }, "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ " source method index memory search ndcg_cut_10 map_cut_10 \\\n", "0 trec-covid bm25 101.96 997 0.28 0.58119 0.01247 \n", "1 trec-covid es 71.24 636 2.09 0.59215 0.01261 \n", "2 trec-covid sqlite 60.16 880 23.09 0.56778 0.01190 \n", "3 trec-covid rank 61.75 3245 75.49 0.57773 0.01210 \n", "\n", " recall_10 P_10 \n", "0 0.01545 0.618 \n", "1 0.01590 0.636 \n", "2 0.01519 0.610 \n", "3 0.01550 0.632 " ], "text/html": [ "\n", "\n", "
\n", "
\n", "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
sourcemethodindexmemorysearchndcg_cut_10map_cut_10recall_10P_10
0trec-covidbm25101.969970.280.581190.012470.015450.618
1trec-covides71.246362.090.592150.012610.015900.636
2trec-covidsqlite60.1688023.090.567780.011900.015190.610
3trec-covidrank61.75324575.490.577730.012100.015500.632
\n", "
\n", " \n", "\n", "\n", "\n", "
\n", " \n", "
\n", "\n", "\n", "\n", " \n", "\n", "\n", " \n", " \n", "\n", " \n", "
\n", "
\n" ] }, "metadata": {}, "execution_count": 20 } ] }, { "cell_type": "code", "source": [ "df[df.source == \"nfcorpus\"].reset_index(drop=True)" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 175 }, "id": "3kKe6A6CbKbp", "outputId": "5c8ca9cb-0d59-4110-eacf-547bba8f1445" }, "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ " source method index memory search ndcg_cut_10 map_cut_10 \\\n", "0 nfcorpus bm25 2.64 648 1.08 0.30639 0.11728 \n", "1 nfcorpus es 3.95 627 11.47 0.30676 0.11761 \n", "2 nfcorpus sqlite 1.50 630 12.73 0.30695 0.11785 \n", "3 nfcorpus rank 2.75 700 23.78 0.30692 0.11711 \n", "\n", " recall_10 P_10 \n", "0 0.14891 0.21734 \n", "1 0.14894 0.21610 \n", "2 0.14871 0.21641 \n", "3 0.15320 0.21889 " ], "text/html": [ "\n", "\n", "
\n", "
\n", "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
sourcemethodindexmemorysearchndcg_cut_10map_cut_10recall_10P_10
0nfcorpusbm252.646481.080.306390.117280.148910.21734
1nfcorpuses3.9562711.470.306760.117610.148940.21610
2nfcorpussqlite1.5063012.730.306950.117850.148710.21641
3nfcorpusrank2.7570023.780.306920.117110.153200.21889
\n", "
\n", " \n", "\n", "\n", "\n", "
\n", " \n", "
\n", "\n", "\n", "\n", " \n", "\n", "\n", " \n", " \n", "\n", " \n", "
\n", "
\n" ] }, "metadata": {}, "execution_count": 21 } ] }, { "cell_type": "code", "source": [ "df[df.source == \"webis-touche2020\"].reset_index(drop=True)" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 175 }, "id": "wKCYo54hbVUC", "outputId": "2314f23c-1ed6-4f77-db4d-c579f163878b" }, "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ " source method index memory search ndcg_cut_10 map_cut_10 \\\n", "0 webis-touche2020 bm25 374.66 1137 0.37 0.36920 0.14588 \n", "1 webis-touche2020 es 168.28 629 0.62 0.37519 0.14819 \n", "2 webis-touche2020 sqlite 220.46 1416 34.61 0.37194 0.14812 \n", "3 webis-touche2020 rank 224.07 10347 81.22 0.39861 0.16492 \n", "\n", " recall_10 P_10 \n", "0 0.22736 0.34694 \n", "1 0.22889 0.35102 \n", "2 0.22890 0.35102 \n", "3 0.23770 0.36122 " ], "text/html": [ "\n", "\n", "
\n", "
\n", "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
sourcemethodindexmemorysearchndcg_cut_10map_cut_10recall_10P_10
0webis-touche2020bm25374.6611370.370.369200.145880.227360.34694
1webis-touche2020es168.286290.620.375190.148190.228890.35102
2webis-touche2020sqlite220.46141634.610.371940.148120.228900.35102
3webis-touche2020rank224.071034781.220.398610.164920.237700.36122
\n", "
\n", " \n", "\n", "\n", "\n", "
\n", " \n", "
\n", "\n", "\n", "\n", " \n", "\n", "\n", " \n", " \n", "\n", " \n", "
\n", "
\n" ] }, "metadata": {}, "execution_count": 22 } ] }, { "cell_type": "code", "source": [ "df[df.source == \"scidocs\"].reset_index(drop=True)" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 175 }, "id": "yt5j8wF1bNka", "outputId": "5f2d0506-3578-47e4-abde-1cb6736fc1d5" }, "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ " source method index memory search ndcg_cut_10 map_cut_10 recall_10 \\\n", "0 scidocs bm25 17.95 717 1.64 0.15063 0.08756 0.15637 \n", "1 scidocs es 11.07 632 10.25 0.14924 0.08671 0.15497 \n", "2 scidocs sqlite 17.85 670 56.64 0.15156 0.08822 0.15717 \n", "3 scidocs rank 13.11 1056 162.99 0.14932 0.08670 0.15408 \n", "\n", " P_10 \n", "0 0.0772 \n", "1 0.0765 \n", "2 0.0776 \n", "3 0.0761 " ], "text/html": [ "\n", "\n", "
\n", "
\n", "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
sourcemethodindexmemorysearchndcg_cut_10map_cut_10recall_10P_10
0scidocsbm2517.957171.640.150630.087560.156370.0772
1scidocses11.0763210.250.149240.086710.154970.0765
2scidocssqlite17.8567056.640.151560.088220.157170.0776
3scidocsrank13.111056162.990.149320.086700.154080.0761
\n", "
\n", " \n", "\n", "\n", "\n", "
\n", " \n", "
\n", "\n", "\n", "\n", " \n", "\n", "\n", " \n", " \n", "\n", " \n", "
\n", "
\n" ] }, "metadata": {}, "execution_count": 23 } ] }, { "cell_type": "code", "source": [ "df[df.source == \"scifact\"].reset_index(drop=True)" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 175 }, "id": "7o3RVNt2bQFZ", "outputId": "6723c801-b485-45b3-e619-ca5ddcf8e7c3" }, "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ " source method index memory search ndcg_cut_10 map_cut_10 recall_10 \\\n", "0 scifact bm25 5.51 653 1.07 0.66324 0.61764 0.78761 \n", "1 scifact es 2.90 625 9.62 0.66058 0.61518 0.78428 \n", "2 scifact sqlite 1.85 631 20.28 0.66630 0.61966 0.79494 \n", "3 scifact rank 1.85 724 42.22 0.65618 0.61204 0.77400 \n", "\n", " P_10 \n", "0 0.08700 \n", "1 0.08667 \n", "2 0.08800 \n", "3 0.08500 " ], "text/html": [ "\n", "\n", "
\n", "
\n", "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
sourcemethodindexmemorysearchndcg_cut_10map_cut_10recall_10P_10
0scifactbm255.516531.070.663240.617640.787610.08700
1scifactes2.906259.620.660580.615180.784280.08667
2scifactsqlite1.8563120.280.666300.619660.794940.08800
3scifactrank1.8572442.220.656180.612040.774000.08500
\n", "
\n", " \n", "\n", "\n", "\n", "
\n", " \n", "
\n", "\n", "\n", "\n", " \n", "\n", "\n", " \n", " \n", "\n", " \n", "
\n", "
\n" ] }, "metadata": {}, "execution_count": 24 } ] }, { "cell_type": "markdown", "source": [ "Once again txtai's implementation compares well with Elasticsearch. The accuracy metrics vary but are all about the same.\n", "\n", "It's important to note that in internal testing with solid state storage, Elasticsearch and txtai's speed is about the same. These times for Elasticsearch being a little slower are a product of running in a Google Colab environment." ], "metadata": { "id": "1INPBYQ2lf22" } }, { "cell_type": "markdown", "source": [ "# Wrapping up\n", "\n", "This notebook showed how to build an efficient sparse keyword index in Python. The benchmarks show that txtai provides a strong implementation both from an accuracy and speed standpoint, on par with Apache Lucene.\n", "\n", "This keyword index can be used as a standalone index in Python or in combination with dense vector indexes to form a `hybrid` index." ], "metadata": { "id": "f41NSYWc0dsy" } } ] } ================================================ FILE: examples/48_Benefits_of_hybrid_search.ipynb ================================================ [File too large to display: 63.0 KB] ================================================ FILE: examples/49_External_database_integration.ipynb ================================================ [File too large to display: 30.7 KB] ================================================ FILE: examples/50_All_about_vector_quantization.ipynb ================================================ [File too large to display: 45.0 KB] ================================================ FILE: examples/51_Custom_API_Endpoints.ipynb ================================================ [File too large to display: 12.1 KB] ================================================ FILE: examples/52_Build_RAG_pipelines_with_txtai.ipynb ================================================ { "cells": [ { "cell_type": "markdown", "metadata": { "id": "VGeVB8M41jqW" }, "source": [ "# Build RAG pipelines with txtai\n", "\n", "Large Language Models (LLMs) have completely dominated the tech space in recent years. The results have been amazing and the public imagination is almost endless.\n", "\n", "While LLMs have been impressive, they are not problem free. The biggest challenge is with hallucinations. Hallucinations is the term for when a LLM generates output that is factually incorrect. The alarming part of this is that on a cursory glance, it actually sounds like good content. The default behavior of LLMs is to produce plausible answers even when no plausible answer exists. LLMs are not great at saying I don't know.\n", "\n", "Retrieval augmented generation (RAG) helps reduce the risk of hallucinations by limiting the context in which a LLM can generate answers. This is typically done with a vector search query that hydrates a prompt with a relevant context. RAG is one of the most practical and production-ready use cases for *Generative AI*. It's so popular now, that some are creating their entire companies around it.\n", "\n", "[txtai](https://github.com/neuml/txtai) has long had question-answering pipelines, which employ the same process of retrieving a relevant context. LLMs are now the preferred approach for analyzing that context and RAG pipelines are one of the main features of txtai. One of the other main features of txtai is that it's a vector database! You can build your prompts and limit your context all with one library. Hence the phrase *all-in-one AI framework*.\n", "\n", "This notebook shows how to build RAG pipelines with txtai." ] }, { "cell_type": "markdown", "metadata": { "id": "ZQrHIw351lwE" }, "source": [ "# Install dependencies\n", "\n", "Install `txtai` and all dependencies. Since this notebook is using optional pipelines, we need to install the pipeline extras package." ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "R0AqRP7v1hdr" }, "outputs": [], "source": [ "%%capture\n", "!pip install git+https://github.com/neuml/txtai#egg=txtai[pipeline]\n", "\n", "# Get test data\n", "!wget -N https://github.com/neuml/txtai/releases/download/v6.2.0/tests.tar.gz\n", "!tar -xvzf tests.tar.gz\n", "\n", "# Install NLTK\n", "import nltk\n", "nltk.download(['punkt', 'punkt_tab'])" ] }, { "cell_type": "markdown", "metadata": { "id": "xmPN8RDF1pXd" }, "source": [ "# Start with the basics\n", "\n", "Let's jump right in and start with a simple LLM pipeline. The [LLM pipeline](https://neuml.github.io/txtai/pipeline/text/llm/) supports local LLM models via [Hugging Face Transformers](https://github.com/huggingface/transformers) and [llama.cpp](https://github.com/abetlen/llama-cpp-python).\n", "\n", "The LLM pipeline also supports [API services (i.e. OpenAI, Claude, Bedrock etc) via LiteLLM](https://github.com/BerriAI/litellm). The LLM pipeline automatically detects the underlying LLM framework from the `path` parameter.\n" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "id": "XZ7vPBIs1rGZ" }, "outputs": [], "source": [ "from txtai import LLM\n", "\n", "# Create LLM\n", "llm = LLM(\"Qwen/Qwen3-4B-Instruct-2507\")" ] }, { "cell_type": "markdown", "metadata": { "id": "9rmTWMxAH3Vx" }, "source": [ "Next, we'll load a document to query. The [Textractor pipeline](https://neuml.github.io/txtai/pipeline/data/textractor/) has support for extracting text from common document formats (docx, pdf, xlsx, web)." ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "nifStGtOHuyc", "outputId": "5a4010e0-75f9-4095-a24c-cd4c859847d0" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "# txtai – the all-in-one embeddings database\n", "txtai is an all-in-one embeddings database for semantic search, LLM orchestration and language model workflows.\n", "\n", "Summary of txtai features:\n", "· *Vector search* with SQL, object storage, topic modeling\n", "· Create *embeddings* for text, documents, audio, images and video\n", "· *Pipelines* powered by language models that run LLM prompts\n", "· *Workflows* to join pipelines together and aggregate business logic\n", "· Build with *Python* or *YAML* . API bindings available for JavaScript, Java, Rust and Go.\n", "· *Run local or scale out with container orchestration* \n", "\n", "\n", "## Examples\n", "List of example notebooks.\n", "|Notebook|Description|\n", "|---|---|\n", "|Introducing txtai |Overview of the functionality provided by txtai|\n", "|Similarity search with images|Embed images and text into the same space for search|\n", "|Build a QA database|Question matching with semantic search|\n", "|Semantic Graphs|Explore topics, data connectivity and run network analysis|\n", "\n", "## Install\n", "The easiest way to install is via pip and PyPI\n", "pip install txtai\n", "Python 3.10+ is supported. Using a Python virtual environment is **recommended** .\n", "See the detailed install instructions for more information covering optional dependencies, environment specific prerequisites, installing from source, conda support and how to run with containers.\n", "\n", "\n", "\n", "\n", "\n", "## Model guide\n", "The following shows a list of suggested models.\n", "|Component|Model(s)|\n", "|---|---|\n", "|Embeddings|all-MiniLM-L6-v2|\n", "||E5-base-v2|\n", "|Image Captions|BLIP|\n", "|Labels - Zero Shot|BART-Large-MNLI|\n", "|Labels - Fixed|Fine-tune with training pipeline|\n", "|Large Language Model (LLM)|Flan T5 XL|\n", "||Mistral 7B OpenOrca|\n", "|Summarization|DistilBART|\n", "|Text-to-Speech|ESPnet JETS|\n", "|Transcription|Whisper|\n", "|Translation|OPUS Model Series|\n" ] } ], "source": [ "from txtai import Textractor\n", "\n", "# Create Textractor\n", "textractor = Textractor()\n", "text = textractor(\"txtai/document.docx\")\n", "print(text)" ] }, { "cell_type": "markdown", "metadata": { "id": "2jkamwgdIgEp" }, "source": [ "Now we'll define a simple LLM pipeline. It takes a question and context (which in this case is the whole file), creates a prompt and runs it with the LLM." ] }, { "cell_type": "code", "execution_count": 18, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 59 }, "id": "9HU6C0OIIAKn", "outputId": "f9d556c4-cd7a-4774-ef62-1f3fff90aa47" }, "outputs": [ { "data": { "text/plain": [ "'txtai is an all-in-one embeddings database that supports semantic search, LLM orchestration, and language model workflows with features like vector search, embeddings for text, audio, images, and video, pipelines powered by language models, and scalable workflows available via Python or YAML with API bindings for multiple languages.'" ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "def execute(question, text):\n", " return llm([\n", " {\"role\": \"system\", \"content\": \"You are a friendly assistant. You answer questions from users.\"},\n", " {\"role\": \"user\", \"content\": f\"\"\"\n", " Answer the following question using only the context below. Only include information specifically discussed.\n", "\n", " question: {question}\n", " context: {text} \n", " \"\"\"}\n", " ], maxlength=4096)\n", "\n", "execute(\"Tell me about txtai in one sentence\", text)" ] }, { "cell_type": "code", "execution_count": 19, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 39 }, "id": "xxF7ajCPJP5_", "outputId": "0e6f6dbb-c784-4841-fe3f-82754ef478eb" }, "outputs": [ { "data": { "text/plain": [ "'txtai recommends using Whisper for transcription.'" ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ "execute(\"What model does txtai recommend for transcription?\", text)" ] }, { "cell_type": "code", "execution_count": 20, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 97 }, "id": "AKmmTqsnJa5X", "outputId": "834bf3ee-b7ed-4e38-e2ef-d5950f99ed9a" }, "outputs": [ { "data": { "text/plain": [ "'The best thing to read if you don\\'t know anything about txtai would be the \"Introducing txtai\" notebook, as it provides an overview of the functionality offered by txtai.'" ] }, "execution_count": 20, "metadata": {}, "output_type": "execute_result" } ], "source": [ "execute(\"I don't know anything about txtai, what would be the best thing to read?\", text)" ] }, { "cell_type": "markdown", "metadata": { "id": "WaVeEHrIMpFr" }, "source": [ "If this is the first time you've seen *Generative AI*, then these statements are 🤯. Even if you've been in the space a while, it's still amazing how much a language model can understand and the high level of quality in it's answers.\n", "\n", "While this use case is fun, lets try to scale it to a larger set of documents." ] }, { "cell_type": "markdown", "metadata": { "id": "viVVft59NbKv" }, "source": [ "# Build a RAG pipeline with vector search\n", "\n", "Let's say we have a large number of documents, hundreds/thousands etc. We can't just put all those documents into a single prompt, we'll run out of GPU memory fast!\n", "\n", "This is where retrieval augmented generation enters the picture. We can use a query step that finds the best candidates to add to the prompt.\n", "\n", "Typically, this candidate query uses vector search but it can be anything that runs a search and returns results. In fact, many complex production systems have customized retrieval pipelines that feed a context into LLM prompts.\n", "\n", "The first step in building our RAG pipeline is creating the knowledge store. In this case, it's a vector database of file content. The files will be split into paragraphs with each paragraph stored as a separate row." ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "ipmsmtN1NahT", "outputId": "64733e1f-fb7b-4a2d-bf02-8930478a8ee8" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Indexing txtai/article.pdf\n", "Indexing txtai/document.docx\n", "Indexing txtai/document.pdf\n", "Indexing txtai/spreadsheet.xlsx\n" ] } ], "source": [ "import os\n", "\n", "from txtai import Embeddings\n", "\n", "def stream(path):\n", " for f in sorted(os.listdir(path)):\n", " fpath = os.path.join(path, f)\n", "\n", " # Only accept documents\n", " if f.endswith((\"docx\", \"xlsx\", \"pdf\")):\n", " print(f\"Indexing {fpath}\")\n", " for paragraph in textractor(fpath):\n", " yield paragraph\n", "\n", "# Document text extraction, split into paragraphs\n", "textractor = Textractor(paragraphs=True)\n", "\n", "# Vector Database\n", "embeddings = Embeddings(content=True)\n", "embeddings.index(stream(\"txtai\"))" ] }, { "cell_type": "markdown", "metadata": { "id": "ASlmAaR3nBPN" }, "source": [ "The next step is defining the RAG pipeline. This pipeline takes the input question, runs a vector search and builds a context using the search results. The context is then inserted into a prompt template and run with the LLM." ] }, { "cell_type": "code", "execution_count": 23, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 39 }, "id": "_-9SW6r4P5ha", "outputId": "6a7bcd69-bcd0-4f6e-81c1-e32c323b3ffb" }, "outputs": [ { "data": { "text/plain": [ "'txtai recommends using BLIP for image captioning.'" ] }, "execution_count": 23, "metadata": {}, "output_type": "execute_result" } ], "source": [ "def context(question):\n", " context = \"\\n\".join(x[\"text\"] for x in embeddings.search(question))\n", " return context\n", "\n", "def rag(question):\n", " return execute(question, context(question))\n", "\n", "rag(\"What model does txtai recommend for image captioning?\")" ] }, { "cell_type": "code", "execution_count": 24, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "NbQhSunPQtB0", "outputId": "de0caf04-4cdb-48e8-aadf-37283be9909a" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "The BLIP model was added for image captioning on 2022-03-17.\n" ] } ], "source": [ "result = rag(\"When was the BLIP model added for image captioning?\")\n", "print(result)" ] }, { "cell_type": "markdown", "metadata": { "id": "D6HW-3GtnTFl" }, "source": [ "As we can see, the result is similar to what we had before without vector search. The difference is that we only used a relevant portion of the documents to generate the answer.\n", "\n", "As we discussed before, this is important when dealing with large volumes of data. Not all of the data can be added to a LLM prompt. Additionally, having only the most relevant context helps the LLM generate higher quality answers." ] }, { "cell_type": "markdown", "metadata": { "id": "FZ-yPC-xiUqa" }, "source": [ "# Citations for LLMs\n", "\n", "A healthy level of skepticism should be applied to answers generated by AI. We're far from the day where we can blindly trust answers from an AI model.\n", "\n", "txtai has a couple approaches for generating citations. The basic approach is to take the answer and search the vector database for the closest match." ] }, { "cell_type": "code", "execution_count": 25, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "obttSg_dSFT5", "outputId": "c7ae7675-6959-4bcb-ad06-065ea8609c31" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "E5-base-v2\n", "Image Captions BLIP\n", "Labels - Zero Shot BART-Large-MNLI\n", "# Model Guide\n", "|Component |Model(s)|Date Added|\n", "|---|---|---|\n", "|Embeddings |all-MiniLM-L6-v2|2022-04-15|\n", "|Image Captions |BLIP|2022-03-17|\n", "|Labels - Zero Shot |BART-Large-MNLI|2022-01-01|\n", "|Large Language Model (LLM) |Mistral 7B OpenOrca|2023-10-01|\n", "|Summarization |DistilBART|2021-02-22|\n", "|Text-to-Speech |ESPnet JETS|2022-08-01|\n", "|Transcription |Whisper|2022-08-01|\n", "|Translation |OPUS Model Series|2021-04-06|\n", "&\"Times New Roman,Regular\"&12&A\n", "## Model guide\n", "The following shows a list of suggested models.\n", "|Component|Model(s)|\n", "|---|---|\n", "|Embeddings|all-MiniLM-L6-v2|\n", "||E5-base-v2|\n", "|Image Captions|BLIP|\n", "|Labels - Zero Shot|BART-Large-MNLI|\n", "|Labels - Fixed|Fine-tune with training pipeline|\n", "|Large Language Model (LLM)|Flan T5 XL|\n", "||Mistral 7B OpenOrca|\n", "|Summarization|DistilBART|\n", "|Text-to-Speech|ESPnet JETS|\n", "|Transcription|Whisper|\n", "|Translation|OPUS Model Series|\n" ] } ], "source": [ "for x in embeddings.search(result):\n", " print(x[\"text\"])" ] }, { "cell_type": "markdown", "metadata": { "id": "MDcB7GCWY6TO" }, "source": [ "While the basic approach above works in this case, txtai has a more robust pipeline to handle citations and references.\n", "\n", "The RAG pipeline is defined below. A RAG pipeline works in the same way as a LLM + Vector Search pipeline, except it has special logic for generating citations. This pipeline takes the answers and compares it to the context passed to the LLM to determine the most likely reference." ] }, { "cell_type": "code", "execution_count": 26, "metadata": { "id": "Lm6gg85_Y7ot" }, "outputs": [], "source": [ "from txtai import RAG\n", "\n", "# Create the RAG pipeline\n", "rag = RAG(embeddings, \"Qwen/Qwen3-4B-Instruct-2507\", template=\"\"\"\n", " Answer the following question using the provided context.\n", "\n", " Question:\n", " {question}\n", "\n", " Context:\n", " {context}\n", "\"\"\", output=\"reference\")" ] }, { "cell_type": "code", "execution_count": 27, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "4pOfE5paZatH", "outputId": "2bed2de5-22ff-4f7b-dba5-41b8e4cc6c75" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "ANSWER: Python 3.10 and later versions (Python 3.10+) are supported.\n", "CITATION: [{'id': '24', 'text': 'Python 3.10+ is supported. Using a Python virtual environment is recommended.'}]\n" ] } ], "source": [ "result = rag(\"What version of Python is supported?\", maxlength=4096)\n", "print(\"ANSWER:\", result[\"answer\"])\n", "print(\"CITATION:\", embeddings.search(\"select id, text from txtai where id = :id\", limit=1, parameters={\"id\": result[\"reference\"]}))" ] }, { "cell_type": "markdown", "metadata": { "id": "vHdE2Q59jNnF" }, "source": [ "And as we can see, not only is the answer to the statement shown, the RAG pipeline also provides a citation. This step is crucial in any line of work where answers must be verified (which is most lines of work)." ] }, { "cell_type": "markdown", "metadata": { "id": "oPwgCgBc2Er2" }, "source": [ "# Wrapping up\n", "\n", "This notebook introduced retrieval augmented generation (RAG), explained why we need it and showed the options available for running RAG pipelines with txtai.\n", "\n", "The advantages of building RAG pipelines with txtai are:\n", "\n", "- **All-in-one AI framework** - one library can handle LLM inference and vector search retrieval\n", "- **Generating citations** - generating answers is useful but referencing where those answers came from is crucial in gaining the trust of users\n", "- **Simple yet powerful** - building pipelines can be done in a small amount of Python. Options are available to build pipelines in YAML and/or run through the API" ] } ], "metadata": { "accelerator": "GPU", "colab": { "gpuType": "T4", "provenance": [] }, "kernelspec": { "display_name": "local", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.19" } }, "nbformat": 4, "nbformat_minor": 0 } ================================================ FILE: examples/53_Integrate_LLM_Frameworks.ipynb ================================================ [File too large to display: 12.7 KB] ================================================ FILE: examples/54_API_Authorization_and_Authentication.ipynb ================================================ [File too large to display: 14.8 KB] ================================================ FILE: examples/55_Generate_knowledge_with_Semantic_Graphs_and_RAG.ipynb ================================================ [File too large to display: 1.2 MB] ================================================ FILE: examples/56_External_vectorization.ipynb ================================================ [File too large to display: 9.8 KB] ================================================ FILE: examples/57_Build_knowledge_graphs_with_LLM_driven_entity_extraction.ipynb ================================================ [File too large to display: 531.0 KB] ================================================ FILE: examples/58_Advanced_RAG_with_graph_path_traversal.ipynb ================================================ [File too large to display: 177.0 KB] ================================================ FILE: examples/59_Whats_new_in_txtai_7_0.ipynb ================================================ [File too large to display: 115.6 KB] ================================================ FILE: examples/60_Advanced_RAG_with_guided_generation.ipynb ================================================ [File too large to display: 12.1 KB] ================================================ FILE: examples/61_Integrate_txtai_with_Postgres.ipynb ================================================ [File too large to display: 215.3 KB] ================================================ FILE: examples/62_RAG_with_llama_cpp_and_external_API_services.ipynb ================================================ [File too large to display: 16.9 KB] ================================================ FILE: examples/63_How_RAG_with_txtai_works.ipynb ================================================ [File too large to display: 20.3 KB] ================================================ FILE: examples/64_Embeddings_index_format_for_open_data_access.ipynb ================================================ [File too large to display: 17.8 KB] ================================================ FILE: examples/65_Speech_to_Speech_RAG.ipynb ================================================ [File too large to display: 8.5 KB] ================================================ FILE: examples/66_Generative_Audio.ipynb ================================================ [File too large to display: 709.0 KB] ================================================ FILE: examples/67_Whats_new_in_txtai_8_0.ipynb ================================================ [File too large to display: 25.7 KB] ================================================ FILE: examples/68_Analyzing_Hugging_Face_Posts_with_Graphs_and_Agents.ipynb ================================================ [File too large to display: 1.3 MB] ================================================ FILE: examples/69_Granting_autonomy_to_agents.ipynb ================================================ [File too large to display: 30.4 KB] ================================================ FILE: examples/70_Getting_started_with_LLM_APIs.ipynb ================================================ [File too large to display: 42.5 KB] ================================================ FILE: examples/71_Analyzing_LinkedIn_Company_Posts_with_Graphs_and_Agents.ipynb ================================================ [File too large to display: 151.8 KB] ================================================ FILE: examples/72_Parsing_the_stars_with_txtai.ipynb ================================================ [File too large to display: 213.2 KB] ================================================ FILE: examples/73_Chunking_your_data_for_RAG.ipynb ================================================ [File too large to display: 47.5 KB] ================================================ FILE: examples/74_OpenAI_Compatible_API.ipynb ================================================ [File too large to display: 186.9 KB] ================================================ FILE: examples/75_Medical_RAG_Research_with_txtai.ipynb ================================================ [File too large to display: 16.8 KB] ================================================ FILE: examples/76_Whats_new_in_txtai_9_0.ipynb ================================================ [File too large to display: 12.6 KB] ================================================ FILE: examples/77_GraphRAG_with_Wikipedia_and_GPT_OSS.ipynb ================================================ [File too large to display: 144.6 KB] ================================================ FILE: examples/78_Accessing_Low_Level_Vector_APIs.ipynb ================================================ [File too large to display: 17.1 KB] ================================================ FILE: examples/79_RAG_is_more_than_Vector_Search.ipynb ================================================ [File too large to display: 8.6 KB] ================================================ FILE: examples/80_Distilling_Knowledge_into_Tiny_LLMs.ipynb ================================================ [File too large to display: 9.6 KB] ================================================ FILE: examples/81_OpenCode_as_a_txtai_LLM.ipynb ================================================ [File too large to display: 9.9 KB] ================================================ FILE: examples/82_Agentic_College_Search.ipynb ================================================ [File too large to display: 37.3 KB] ================================================ FILE: examples/83_TxtAI_got_skills.ipynb ================================================ [File too large to display: 12.2 KB] ================================================ FILE: examples/84_Agent_Tools.ipynb ================================================ [File too large to display: 455.4 KB] ================================================ FILE: examples/agent_quickstart.py ================================================ [File too large to display: 2.1 KB] ================================================ FILE: examples/article.py ================================================ [File too large to display: 1.3 KB] ================================================ FILE: examples/baseball.py ================================================ [File too large to display: 20.3 KB] ================================================ FILE: examples/benchmarks.py ================================================ [File too large to display: 20.5 KB] ================================================ FILE: examples/books.py ================================================ [File too large to display: 5.3 KB] ================================================ FILE: examples/images.py ================================================ [File too large to display: 2.6 KB] ================================================ FILE: examples/rag_quickstart.py ================================================ [File too large to display: 2.0 KB] ================================================ FILE: examples/similarity.py ================================================ [File too large to display: 1.9 KB] ================================================ FILE: examples/wiki.py ================================================ [File too large to display: 1.8 KB] ================================================ FILE: examples/workflow_quickstart.py ================================================ [File too large to display: 1.5 KB] ================================================ FILE: examples/workflows.py ================================================ [File too large to display: 22.5 KB] ================================================ FILE: mkdocs.yml ================================================ [File too large to display: 4.9 KB] ================================================ FILE: pyproject.toml ================================================ [File too large to display: 31 B] ================================================ FILE: setup.py ================================================ [File too large to display: 4.6 KB] ================================================ FILE: src/python/txtai/__init__.py ================================================ [File too large to display: 364 B] ================================================ FILE: src/python/txtai/agent/__init__.py ================================================ [File too large to display: 233 B] ================================================ FILE: src/python/txtai/agent/base.py ================================================ [File too large to display: 4.3 KB] ================================================ FILE: src/python/txtai/agent/factory.py ================================================ [File too large to display: 1002 B] ================================================ FILE: src/python/txtai/agent/model.py ================================================ [File too large to display: 3.2 KB] ================================================ FILE: src/python/txtai/agent/placeholder.py ================================================ [File too large to display: 340 B] ================================================ FILE: src/python/txtai/agent/tool/__init__.py ================================================ [File too large to display: 325 B] ================================================ FILE: src/python/txtai/agent/tool/bash.py ================================================ [File too large to display: 1.5 KB] ================================================ FILE: src/python/txtai/agent/tool/edit.py ================================================ [File too large to display: 1.5 KB] ================================================ FILE: src/python/txtai/agent/tool/embeddings.py ================================================ [File too large to display: 1.5 KB] ================================================ FILE: src/python/txtai/agent/tool/factory.py ================================================ [File too large to display: 5.4 KB] ================================================ FILE: src/python/txtai/agent/tool/function.py ================================================ [File too large to display: 1.1 KB] ================================================ FILE: src/python/txtai/agent/tool/glob.py ================================================ [File too large to display: 1.1 KB] ================================================ FILE: src/python/txtai/agent/tool/grep.py ================================================ [File too large to display: 2.0 KB] ================================================ FILE: src/python/txtai/agent/tool/read.py ================================================ [File too large to display: 1.4 KB] ================================================ FILE: src/python/txtai/agent/tool/skill.py ================================================ [File too large to display: 1.7 KB] ================================================ FILE: src/python/txtai/agent/tool/todo.py ================================================ [File too large to display: 945 B] ================================================ FILE: src/python/txtai/agent/tool/write.py ================================================ [File too large to display: 979 B] ================================================ FILE: src/python/txtai/ann/__init__.py ================================================ [File too large to display: 86 B] ================================================ FILE: src/python/txtai/ann/base.py ================================================ [File too large to display: 3.3 KB] ================================================ FILE: src/python/txtai/ann/dense/__init__.py ================================================ [File too large to display: 213 B] ================================================ FILE: src/python/txtai/ann/dense/annoy.py ================================================ [File too large to display: 1.8 KB] ================================================ FILE: src/python/txtai/ann/dense/factory.py ================================================ [File too large to display: 1.7 KB] ================================================ FILE: src/python/txtai/ann/dense/faiss.py ================================================ [File too large to display: 7.4 KB] ================================================ FILE: src/python/txtai/ann/dense/ggml.py ================================================ [File too large to display: 15.9 KB] ================================================ FILE: src/python/txtai/ann/dense/hnsw.py ================================================ [File too large to display: 3.1 KB] ================================================ FILE: src/python/txtai/ann/dense/numpy.py ================================================ [File too large to display: 5.6 KB] ================================================ FILE: src/python/txtai/ann/dense/pgvector.py ================================================ [File too large to display: 9.1 KB] ================================================ FILE: src/python/txtai/ann/dense/sqlite.py ================================================ [File too large to display: 7.7 KB] ================================================ FILE: src/python/txtai/ann/dense/torch.py ================================================ [File too large to display: 6.6 KB] ================================================ FILE: src/python/txtai/ann/sparse/__init__.py ================================================ [File too large to display: 130 B] ================================================ FILE: src/python/txtai/ann/sparse/factory.py ================================================ [File too large to display: 1.2 KB] ================================================ FILE: src/python/txtai/ann/sparse/ivfsparse.py ================================================ [File too large to display: 11.7 KB] ================================================ FILE: src/python/txtai/ann/sparse/pgsparse.py ================================================ [File too large to display: 1.3 KB] ================================================ FILE: src/python/txtai/api/__init__.py ================================================ [File too large to display: 510 B] ================================================ FILE: src/python/txtai/api/application.py ================================================ [File too large to display: 3.1 KB] ================================================ FILE: src/python/txtai/api/authorization.py ================================================ [File too large to display: 1.3 KB] ================================================ FILE: src/python/txtai/api/base.py ================================================ [File too large to display: 4.6 KB] ================================================ FILE: src/python/txtai/api/cluster.py ================================================ [File too large to display: 8.3 KB] ================================================ FILE: src/python/txtai/api/extension.py ================================================ [File too large to display: 369 B] ================================================ FILE: src/python/txtai/api/factory.py ================================================ [File too large to display: 640 B] ================================================ FILE: src/python/txtai/api/responses/__init__.py ================================================ [File too large to display: 153 B] ================================================ FILE: src/python/txtai/api/responses/factory.py ================================================ [File too large to display: 608 B] ================================================ FILE: src/python/txtai/api/responses/json.py ================================================ [File too large to display: 1.3 KB] ================================================ FILE: src/python/txtai/api/responses/messagepack.py ================================================ [File too large to display: 1007 B] ================================================ FILE: src/python/txtai/api/route.py ================================================ [File too large to display: 1.4 KB] ================================================ FILE: src/python/txtai/api/routers/__init__.py ================================================ [File too large to display: 505 B] ================================================ FILE: src/python/txtai/api/routers/agent.py ================================================ [File too large to display: 1.0 KB] ================================================ FILE: src/python/txtai/api/routers/caption.py ================================================ [File too large to display: 741 B] ================================================ FILE: src/python/txtai/api/routers/embeddings.py ================================================ [File too large to display: 7.8 KB] ================================================ FILE: src/python/txtai/api/routers/entity.py ================================================ [File too large to display: 807 B] ================================================ FILE: src/python/txtai/api/routers/extractor.py ================================================ [File too large to display: 658 B] ================================================ FILE: src/python/txtai/api/routers/labels.py ================================================ [File too large to display: 1.2 KB] ================================================ FILE: src/python/txtai/api/routers/llm.py ================================================ [File too large to display: 1.9 KB] ================================================ FILE: src/python/txtai/api/routers/objects.py ================================================ [File too large to display: 843 B] ================================================ FILE: src/python/txtai/api/routers/openai.py ================================================ [File too large to display: 5.3 KB] ================================================ FILE: src/python/txtai/api/routers/rag.py ================================================ [File too large to display: 1.9 KB] ================================================ FILE: src/python/txtai/api/routers/reranker.py ================================================ [File too large to display: 1.2 KB] ================================================ FILE: src/python/txtai/api/routers/segmentation.py ================================================ [File too large to display: 769 B] ================================================ FILE: src/python/txtai/api/routers/similarity.py ================================================ [File too large to display: 1.2 KB] ================================================ FILE: src/python/txtai/api/routers/summary.py ================================================ [File too large to display: 1.2 KB] ================================================ FILE: src/python/txtai/api/routers/tabular.py ================================================ [File too large to display: 803 B] ================================================ FILE: src/python/txtai/api/routers/textractor.py ================================================ [File too large to display: 782 B] ================================================ FILE: src/python/txtai/api/routers/texttospeech.py ================================================ [File too large to display: 806 B] ================================================ FILE: src/python/txtai/api/routers/transcription.py ================================================ [File too large to display: 795 B] ================================================ FILE: src/python/txtai/api/routers/translation.py ================================================ [File too large to display: 1.2 KB] ================================================ FILE: src/python/txtai/api/routers/upload.py ================================================ [File too large to display: 731 B] ================================================ FILE: src/python/txtai/api/routers/workflow.py ================================================ [File too large to display: 593 B] ================================================ FILE: src/python/txtai/app/__init__.py ================================================ [File too large to display: 66 B] ================================================ FILE: src/python/txtai/app/base.py ================================================ [File too large to display: 26.1 KB] ================================================ FILE: src/python/txtai/archive/__init__.py ================================================ [File too large to display: 160 B] ================================================ FILE: src/python/txtai/archive/base.py ================================================ [File too large to display: 2.8 KB] ================================================ FILE: src/python/txtai/archive/compress.py ================================================ [File too large to display: 1.0 KB] ================================================ FILE: src/python/txtai/archive/factory.py ================================================ [File too large to display: 429 B] ================================================ FILE: src/python/txtai/archive/tar.py ================================================ [File too large to display: 1.6 KB] ================================================ FILE: src/python/txtai/archive/zip.py ================================================ [File too large to display: 1.0 KB] ================================================ FILE: src/python/txtai/cloud/__init__.py ================================================ [File too large to display: 148 B] ================================================ FILE: src/python/txtai/cloud/base.py ================================================ [File too large to display: 2.4 KB] ================================================ FILE: src/python/txtai/cloud/factory.py ================================================ [File too large to display: 1.5 KB] ================================================ FILE: src/python/txtai/cloud/hub.py ================================================ [File too large to display: 3.6 KB] ================================================ FILE: src/python/txtai/cloud/storage.py ================================================ [File too large to display: 3.9 KB] ================================================ FILE: src/python/txtai/console/__init__.py ================================================ [File too large to display: 51 B] ================================================ FILE: src/python/txtai/console/__main__.py ================================================ [File too large to display: 274 B] ================================================ FILE: src/python/txtai/console/base.py ================================================ [File too large to display: 6.4 KB] ================================================ FILE: src/python/txtai/data/__init__.py ================================================ [File too large to display: 190 B] ================================================ FILE: src/python/txtai/data/base.py ================================================ [File too large to display: 3.9 KB] ================================================ FILE: src/python/txtai/data/labels.py ================================================ [File too large to display: 1.2 KB] ================================================ FILE: src/python/txtai/data/questions.py ================================================ [File too large to display: 4.2 KB] ================================================ FILE: src/python/txtai/data/sequences.py ================================================ [File too large to display: 1.3 KB] ================================================ FILE: src/python/txtai/data/texts.py ================================================ [File too large to display: 3.8 KB] ================================================ FILE: src/python/txtai/data/tokens.py ================================================ [File too large to display: 579 B] ================================================ FILE: src/python/txtai/database/__init__.py ================================================ [File too large to display: 291 B] ================================================ FILE: src/python/txtai/database/base.py ================================================ [File too large to display: 9.9 KB] ================================================ FILE: src/python/txtai/database/client.py ================================================ [File too large to display: 6.5 KB] ================================================ FILE: src/python/txtai/database/duckdb.py ================================================ [File too large to display: 5.3 KB] ================================================ FILE: src/python/txtai/database/embedded.py ================================================ [File too large to display: 1.8 KB] ================================================ FILE: src/python/txtai/database/encoder/__init__.py ================================================ [File too large to display: 159 B] ================================================ FILE: src/python/txtai/database/encoder/base.py ================================================ [File too large to display: 719 B] ================================================ FILE: src/python/txtai/database/encoder/factory.py ================================================ [File too large to display: 1.2 KB] ================================================ FILE: src/python/txtai/database/encoder/image.py ================================================ [File too large to display: 904 B] ================================================ FILE: src/python/txtai/database/encoder/serialize.py ================================================ [File too large to display: 605 B] ================================================ FILE: src/python/txtai/database/factory.py ================================================ [File too large to display: 1.7 KB] ================================================ FILE: src/python/txtai/database/rdbms.py ================================================ [File too large to display: 17.0 KB] ================================================ FILE: src/python/txtai/database/schema/__init__.py ================================================ [File too large to display: 76 B] ================================================ FILE: src/python/txtai/database/schema/orm.py ================================================ [File too large to display: 2.3 KB] ================================================ FILE: src/python/txtai/database/schema/statement.py ================================================ [File too large to display: 3.1 KB] ================================================ FILE: src/python/txtai/database/sql/__init__.py ================================================ [File too large to display: 146 B] ================================================ FILE: src/python/txtai/database/sql/aggregate.py ================================================ [File too large to display: 5.2 KB] ================================================ FILE: src/python/txtai/database/sql/base.py ================================================ [File too large to display: 6.1 KB] ================================================ FILE: src/python/txtai/database/sql/expression.py ================================================ [File too large to display: 13.5 KB] ================================================ FILE: src/python/txtai/database/sql/token.py ================================================ [File too large to display: 8.7 KB] ================================================ FILE: src/python/txtai/database/sqlite.py ================================================ [File too large to display: 1.7 KB] ================================================ FILE: src/python/txtai/embeddings/__init__.py ================================================ [File too large to display: 100 B] ================================================ FILE: src/python/txtai/embeddings/base.py ================================================ [File too large to display: 33.9 KB] ================================================ FILE: src/python/txtai/embeddings/index/__init__.py ================================================ [File too large to display: 333 B] ================================================ FILE: src/python/txtai/embeddings/index/action.py ================================================ [File too large to display: 152 B] ================================================ FILE: src/python/txtai/embeddings/index/autoid.py ================================================ [File too large to display: 2.1 KB] ================================================ FILE: src/python/txtai/embeddings/index/configuration.py ================================================ [File too large to display: 2.0 KB] ================================================ FILE: src/python/txtai/embeddings/index/documents.py ================================================ [File too large to display: 2.0 KB] ================================================ FILE: src/python/txtai/embeddings/index/functions.py ================================================ [File too large to display: 4.2 KB] ================================================ FILE: src/python/txtai/embeddings/index/indexes.py ================================================ [File too large to display: 4.9 KB] ================================================ FILE: src/python/txtai/embeddings/index/indexids.py ================================================ [File too large to display: 1.1 KB] ================================================ FILE: src/python/txtai/embeddings/index/reducer.py ================================================ [File too large to display: 2.8 KB] ================================================ FILE: src/python/txtai/embeddings/index/stream.py ================================================ [File too large to display: 2.1 KB] ================================================ FILE: src/python/txtai/embeddings/index/transform.py ================================================ [File too large to display: 6.6 KB] ================================================ FILE: src/python/txtai/embeddings/search/__init__.py ================================================ [File too large to display: 221 B] ================================================ FILE: src/python/txtai/embeddings/search/base.py ================================================ [File too large to display: 9.9 KB] ================================================ FILE: src/python/txtai/embeddings/search/errors.py ================================================ [File too large to display: 138 B] ================================================ FILE: src/python/txtai/embeddings/search/explain.py ================================================ [File too large to display: 4.0 KB] ================================================ FILE: src/python/txtai/embeddings/search/hybrid.py ================================================ [File too large to display: 7.7 KB] ================================================ FILE: src/python/txtai/embeddings/search/ids.py ================================================ [File too large to display: 1.3 KB] ================================================ FILE: src/python/txtai/embeddings/search/query.py ================================================ [File too large to display: 1.7 KB] ================================================ FILE: src/python/txtai/embeddings/search/scan.py ================================================ [File too large to display: 5.9 KB] ================================================ FILE: src/python/txtai/embeddings/search/terms.py ================================================ [File too large to display: 1.1 KB] ================================================ FILE: src/python/txtai/graph/__init__.py ================================================ [File too large to display: 189 B] ================================================ FILE: src/python/txtai/graph/base.py ================================================ [File too large to display: 21.1 KB] ================================================ FILE: src/python/txtai/graph/factory.py ================================================ [File too large to display: 1.2 KB] ================================================ FILE: src/python/txtai/graph/networkx.py ================================================ [File too large to display: 8.2 KB] ================================================ FILE: src/python/txtai/graph/query.py ================================================ [File too large to display: 5.0 KB] ================================================ FILE: src/python/txtai/graph/rdbms.py ================================================ [File too large to display: 3.2 KB] ================================================ FILE: src/python/txtai/graph/topics.py ================================================ [File too large to display: 4.4 KB] ================================================ FILE: src/python/txtai/models/__init__.py ================================================ [File too large to display: 176 B] ================================================ FILE: src/python/txtai/models/models.py ================================================ [File too large to display: 7.8 KB] ================================================ FILE: src/python/txtai/models/onnx.py ================================================ [File too large to display: 3.6 KB] ================================================ FILE: src/python/txtai/models/pooling/__init__.py ================================================ [File too large to display: 175 B] ================================================ FILE: src/python/txtai/models/pooling/base.py ================================================ [File too large to display: 5.7 KB] ================================================ FILE: src/python/txtai/models/pooling/cls.py ================================================ [File too large to display: 563 B] ================================================ FILE: src/python/txtai/models/pooling/factory.py ================================================ [File too large to display: 4.1 KB] ================================================ FILE: src/python/txtai/models/pooling/late.py ================================================ [File too large to display: 4.2 KB] ================================================ FILE: src/python/txtai/models/pooling/mean.py ================================================ [File too large to display: 801 B] ================================================ FILE: src/python/txtai/models/pooling/muvera.py ================================================ [File too large to display: 5.4 KB] ================================================ FILE: src/python/txtai/models/registry.py ================================================ [File too large to display: 1.3 KB] ================================================ FILE: src/python/txtai/models/tokendetection.py ================================================ [File too large to display: 4.5 KB] ================================================ FILE: src/python/txtai/pipeline/__init__.py ================================================ [File too large to display: 360 B] ================================================ FILE: src/python/txtai/pipeline/audio/__init__.py ================================================ [File too large to display: 274 B] ================================================ FILE: src/python/txtai/pipeline/audio/audiomixer.py ================================================ [File too large to display: 1.7 KB] ================================================ FILE: src/python/txtai/pipeline/audio/audiostream.py ================================================ [File too large to display: 2.4 KB] ================================================ FILE: src/python/txtai/pipeline/audio/microphone.py ================================================ [File too large to display: 7.7 KB] ================================================ FILE: src/python/txtai/pipeline/audio/signal.py ================================================ [File too large to display: 5.0 KB] ================================================ FILE: src/python/txtai/pipeline/audio/texttoaudio.py ================================================ [File too large to display: 1.8 KB] ================================================ FILE: src/python/txtai/pipeline/audio/texttospeech.py ================================================ [File too large to display: 18.0 KB] ================================================ FILE: src/python/txtai/pipeline/audio/transcription.py ================================================ [File too large to display: 6.4 KB] ================================================ FILE: src/python/txtai/pipeline/base.py ================================================ [File too large to display: 536 B] ================================================ FILE: src/python/txtai/pipeline/data/__init__.py ================================================ [File too large to display: 233 B] ================================================ FILE: src/python/txtai/pipeline/data/filetohtml.py ================================================ [File too large to display: 4.9 KB] ================================================ FILE: src/python/txtai/pipeline/data/htmltomd.py ================================================ [File too large to display: 12.1 KB] ================================================ FILE: src/python/txtai/pipeline/data/segmentation.py ================================================ [File too large to display: 5.5 KB] ================================================ FILE: src/python/txtai/pipeline/data/tabular.py ================================================ [File too large to display: 4.0 KB] ================================================ FILE: src/python/txtai/pipeline/data/textractor.py ================================================ [File too large to display: 3.5 KB] ================================================ FILE: src/python/txtai/pipeline/data/tokenizer.py ================================================ [File too large to display: 6.6 KB] ================================================ FILE: src/python/txtai/pipeline/factory.py ================================================ [File too large to display: 1.7 KB] ================================================ FILE: src/python/txtai/pipeline/hfmodel.py ================================================ [File too large to display: 3.6 KB] ================================================ FILE: src/python/txtai/pipeline/hfpipeline.py ================================================ [File too large to display: 3.4 KB] ================================================ FILE: src/python/txtai/pipeline/image/__init__.py ================================================ [File too large to display: 114 B] ================================================ FILE: src/python/txtai/pipeline/image/caption.py ================================================ [File too large to display: 1.5 KB] ================================================ FILE: src/python/txtai/pipeline/image/imagehash.py ================================================ [File too large to display: 2.5 KB] ================================================ FILE: src/python/txtai/pipeline/image/objects.py ================================================ [File too large to display: 2.7 KB] ================================================ FILE: src/python/txtai/pipeline/llm/__init__.py ================================================ [File too large to display: 252 B] ================================================ FILE: src/python/txtai/pipeline/llm/factory.py ================================================ [File too large to display: 2.2 KB] ================================================ FILE: src/python/txtai/pipeline/llm/generation.py ================================================ [File too large to display: 7.5 KB] ================================================ FILE: src/python/txtai/pipeline/llm/huggingface.py ================================================ [File too large to display: 7.6 KB] ================================================ FILE: src/python/txtai/pipeline/llm/litellm.py ================================================ [File too large to display: 2.3 KB] ================================================ FILE: src/python/txtai/pipeline/llm/llama.py ================================================ [File too large to display: 4.2 KB] ================================================ FILE: src/python/txtai/pipeline/llm/llm.py ================================================ [File too large to display: 2.6 KB] ================================================ FILE: src/python/txtai/pipeline/llm/opencode.py ================================================ [File too large to display: 2.4 KB] ================================================ FILE: src/python/txtai/pipeline/llm/rag.py ================================================ [File too large to display: 17.8 KB] ================================================ FILE: src/python/txtai/pipeline/nop.py ================================================ [File too large to display: 189 B] ================================================ FILE: src/python/txtai/pipeline/tensors.py ================================================ [File too large to display: 1.0 KB] ================================================ FILE: src/python/txtai/pipeline/text/__init__.py ================================================ [File too large to display: 317 B] ================================================ FILE: src/python/txtai/pipeline/text/crossencoder.py ================================================ [File too large to display: 2.6 KB] ================================================ FILE: src/python/txtai/pipeline/text/entity.py ================================================ [File too large to display: 5.0 KB] ================================================ FILE: src/python/txtai/pipeline/text/labels.py ================================================ [File too large to display: 5.3 KB] ================================================ FILE: src/python/txtai/pipeline/text/lateencoder.py ================================================ [File too large to display: 3.1 KB] ================================================ FILE: src/python/txtai/pipeline/text/questions.py ================================================ [File too large to display: 1.4 KB] ================================================ FILE: src/python/txtai/pipeline/text/reranker.py ================================================ [File too large to display: 1.8 KB] ================================================ FILE: src/python/txtai/pipeline/text/similarity.py ================================================ [File too large to display: 3.0 KB] ================================================ FILE: src/python/txtai/pipeline/text/summary.py ================================================ [File too large to display: 2.8 KB] ================================================ FILE: src/python/txtai/pipeline/text/translation.py ================================================ [File too large to display: 9.2 KB] ================================================ FILE: src/python/txtai/pipeline/train/__init__.py ================================================ [File too large to display: 110 B] ================================================ FILE: src/python/txtai/pipeline/train/hfonnx.py ================================================ [File too large to display: 5.9 KB] ================================================ FILE: src/python/txtai/pipeline/train/hftrainer.py ================================================ [File too large to display: 13.7 KB] ================================================ FILE: src/python/txtai/pipeline/train/mlonnx.py ================================================ [File too large to display: 2.0 KB] ================================================ FILE: src/python/txtai/scoring/__init__.py ================================================ [File too large to display: 268 B] ================================================ FILE: src/python/txtai/scoring/base.py ================================================ [File too large to display: 4.4 KB] ================================================ FILE: src/python/txtai/scoring/bm25.py ================================================ [File too large to display: 670 B] ================================================ FILE: src/python/txtai/scoring/factory.py ================================================ [File too large to display: 2.2 KB] ================================================ FILE: src/python/txtai/scoring/normalize.py ================================================ [File too large to display: 3.6 KB] ================================================ FILE: src/python/txtai/scoring/pgtext.py ================================================ [File too large to display: 5.8 KB] ================================================ FILE: src/python/txtai/scoring/sif.py ================================================ [File too large to display: 853 B] ================================================ FILE: src/python/txtai/scoring/sparse.py ================================================ [File too large to display: 6.8 KB] ================================================ FILE: src/python/txtai/scoring/terms.py ================================================ [File too large to display: 15.5 KB] ================================================ FILE: src/python/txtai/scoring/tfidf.py ================================================ [File too large to display: 10.5 KB] ================================================ FILE: src/python/txtai/serialize/__init__.py ================================================ [File too large to display: 227 B] ================================================ FILE: src/python/txtai/serialize/base.py ================================================ [File too large to display: 1.5 KB] ================================================ FILE: src/python/txtai/serialize/errors.py ================================================ [File too large to display: 114 B] ================================================ FILE: src/python/txtai/serialize/factory.py ================================================ [File too large to display: 598 B] ================================================ FILE: src/python/txtai/serialize/messagepack.py ================================================ [File too large to display: 1021 B] ================================================ FILE: src/python/txtai/serialize/pickle.py ================================================ [File too large to display: 3.1 KB] ================================================ FILE: src/python/txtai/serialize/serializer.py ================================================ [File too large to display: 1.1 KB] ================================================ FILE: src/python/txtai/util/__init__.py ================================================ [File too large to display: 133 B] ================================================ FILE: src/python/txtai/util/resolver.py ================================================ [File too large to display: 565 B] ================================================ FILE: src/python/txtai/util/sparsearray.py ================================================ [File too large to display: 1.3 KB] ================================================ FILE: src/python/txtai/util/template.py ================================================ [File too large to display: 349 B] ================================================ FILE: src/python/txtai/vectors/__init__.py ================================================ [File too large to display: 125 B] ================================================ FILE: src/python/txtai/vectors/base.py ================================================ [File too large to display: 13.7 KB] ================================================ FILE: src/python/txtai/vectors/dense/__init__.py ================================================ [File too large to display: 277 B] ================================================ FILE: src/python/txtai/vectors/dense/external.py ================================================ [File too large to display: 1.5 KB] ================================================ FILE: src/python/txtai/vectors/dense/factory.py ================================================ [File too large to display: 3.1 KB] ================================================ FILE: src/python/txtai/vectors/dense/huggingface.py ================================================ [File too large to display: 1.3 KB] ================================================ FILE: src/python/txtai/vectors/dense/litellm.py ================================================ [File too large to display: 2.2 KB] ================================================ FILE: src/python/txtai/vectors/dense/llama.py ================================================ [File too large to display: 2.6 KB] ================================================ FILE: src/python/txtai/vectors/dense/m2v.py ================================================ [File too large to display: 1.6 KB] ================================================ FILE: src/python/txtai/vectors/dense/sbert.py ================================================ [File too large to display: 2.7 KB] ================================================ FILE: src/python/txtai/vectors/dense/words.py ================================================ [File too large to display: 6.2 KB] ================================================ FILE: src/python/txtai/vectors/recovery.py ================================================ [File too large to display: 1.4 KB] ================================================ FILE: src/python/txtai/vectors/sparse/__init__.py ================================================ [File too large to display: 141 B] ================================================ FILE: src/python/txtai/vectors/sparse/base.py ================================================ [File too large to display: 2.8 KB] ================================================ FILE: src/python/txtai/vectors/sparse/factory.py ================================================ [File too large to display: 1.3 KB] ================================================ FILE: src/python/txtai/vectors/sparse/sbert.py ================================================ [File too large to display: 1020 B] ================================================ FILE: src/python/txtai/version.py ================================================ [File too large to display: 69 B] ================================================ FILE: src/python/txtai/workflow/__init__.py ================================================ [File too large to display: 139 B] ================================================ FILE: src/python/txtai/workflow/base.py ================================================ [File too large to display: 5.4 KB] ================================================ FILE: src/python/txtai/workflow/execute.py ================================================ [File too large to display: 2.8 KB] ================================================ FILE: src/python/txtai/workflow/factory.py ================================================ [File too large to display: 965 B] ================================================ FILE: src/python/txtai/workflow/task/__init__.py ================================================ [File too large to display: 461 B] ================================================ FILE: src/python/txtai/workflow/task/base.py ================================================ [File too large to display: 14.4 KB] ================================================ FILE: src/python/txtai/workflow/task/console.py ================================================ [File too large to display: 492 B] ================================================ FILE: src/python/txtai/workflow/task/export.py ================================================ [File too large to display: 1.6 KB] ================================================ FILE: src/python/txtai/workflow/task/factory.py ================================================ [File too large to display: 2.1 KB] ================================================ FILE: src/python/txtai/workflow/task/file.py ================================================ [File too large to display: 560 B] ================================================ FILE: src/python/txtai/workflow/task/image.py ================================================ [File too large to display: 732 B] ================================================ FILE: src/python/txtai/workflow/task/retrieve.py ================================================ [File too large to display: 1.7 KB] ================================================ FILE: src/python/txtai/workflow/task/service.py ================================================ [File too large to display: 3.0 KB] ================================================ FILE: src/python/txtai/workflow/task/storage.py ================================================ [File too large to display: 3.2 KB] ================================================ FILE: src/python/txtai/workflow/task/stream.py ================================================ [File too large to display: 935 B] ================================================ FILE: src/python/txtai/workflow/task/template.py ================================================ [File too large to display: 3.4 KB] ================================================ FILE: src/python/txtai/workflow/task/url.py ================================================ [File too large to display: 346 B] ================================================ FILE: src/python/txtai/workflow/task/workflow.py ================================================ [File too large to display: 232 B] ================================================ FILE: test/python/testagent.py ================================================ [File too large to display: 6.9 KB] ================================================ FILE: test/python/testann/__init__.py ================================================ ================================================ FILE: test/python/testann/testdense.py ================================================ [File too large to display: 14.1 KB] ================================================ FILE: test/python/testann/testsparse.py ================================================ [File too large to display: 4.2 KB] ================================================ FILE: test/python/testapi/__init__.py ================================================ ================================================ FILE: test/python/testapi/testapiagent.py ================================================ [File too large to display: 1.9 KB] ================================================ FILE: test/python/testapi/testapiembeddings.py ================================================ [File too large to display: 13.1 KB] ================================================ FILE: test/python/testapi/testapipipeline.py ================================================ [File too large to display: 10.6 KB] ================================================ FILE: test/python/testapi/testapiworkflow.py ================================================ [File too large to display: 6.5 KB] ================================================ FILE: test/python/testapi/testauthorization.py ================================================ [File too large to display: 1.7 KB] ================================================ FILE: test/python/testapi/testcluster.py ================================================ [File too large to display: 7.6 KB] ================================================ FILE: test/python/testapi/testencoding.py ================================================ [File too large to display: 4.8 KB] ================================================ FILE: test/python/testapi/testextension.py ================================================ [File too large to display: 2.3 KB] ================================================ FILE: test/python/testapi/testmcp.py ================================================ [File too large to display: 1.2 KB] ================================================ FILE: test/python/testapi/testopenai.py ================================================ [File too large to display: 5.8 KB] ================================================ FILE: test/python/testapp.py ================================================ [File too large to display: 2.0 KB] ================================================ FILE: test/python/testarchive.py ================================================ [File too large to display: 3.2 KB] ================================================ FILE: test/python/testcloud.py ================================================ [File too large to display: 5.4 KB] ================================================ FILE: test/python/testconsole.py ================================================ [File too large to display: 4.7 KB] ================================================ FILE: test/python/testdatabase/__init__.py ================================================ ================================================ FILE: test/python/testdatabase/testclient.py ================================================ [File too large to display: 2.0 KB] ================================================ FILE: test/python/testdatabase/testcustom.py ================================================ [File too large to display: 627 B] ================================================ FILE: test/python/testdatabase/testdatabase.py ================================================ [File too large to display: 1.1 KB] ================================================ FILE: test/python/testdatabase/testduckdb.py ================================================ [File too large to display: 2.2 KB] ================================================ FILE: test/python/testdatabase/testencoder.py ================================================ [File too large to display: 4.6 KB] ================================================ FILE: test/python/testdatabase/testrdbms.py ================================================ [File too large to display: 32.5 KB] ================================================ FILE: test/python/testdatabase/testsql.py ================================================ [File too large to display: 14.3 KB] ================================================ FILE: test/python/testdatabase/testsqlite.py ================================================ [File too large to display: 2.0 KB] ================================================ FILE: test/python/testembeddings.py ================================================ [File too large to display: 20.1 KB] ================================================ FILE: test/python/testgraph.py ================================================ [File too large to display: 18.8 KB] ================================================ FILE: test/python/testmodels/__init__.py ================================================ ================================================ FILE: test/python/testmodels/testmodels.py ================================================ [File too large to display: 1.2 KB] ================================================ FILE: test/python/testmodels/testpooling.py ================================================ [File too large to display: 3.9 KB] ================================================ FILE: test/python/testoptional.py ================================================ [File too large to display: 10.2 KB] ================================================ FILE: test/python/testpipeline/__init__.py ================================================ ================================================ FILE: test/python/testpipeline/testaudio/__init__.py ================================================ ================================================ FILE: test/python/testpipeline/testaudio/testaudiomixer.py ================================================ [File too large to display: 536 B] ================================================ FILE: test/python/testpipeline/testaudio/testaudiostream.py ================================================ [File too large to display: 690 B] ================================================ FILE: test/python/testpipeline/testaudio/testmicrophone.py ================================================ [File too large to display: 2.3 KB] ================================================ FILE: test/python/testpipeline/testaudio/testtexttoaudio.py ================================================ [File too large to display: 528 B] ================================================ FILE: test/python/testpipeline/testaudio/testtexttospeech.py ================================================ [File too large to display: 2.0 KB] ================================================ FILE: test/python/testpipeline/testaudio/testtranscription.py ================================================ [File too large to display: 3.0 KB] ================================================ FILE: test/python/testpipeline/testdata/__init__.py ================================================ ================================================ FILE: test/python/testpipeline/testdata/testfiletohtml.py ================================================ [File too large to display: 445 B] ================================================ FILE: test/python/testpipeline/testdata/testtabular.py ================================================ [File too large to display: 2.9 KB] ================================================ FILE: test/python/testpipeline/testdata/testtextractor.py ================================================ [File too large to display: 7.1 KB] ================================================ FILE: test/python/testpipeline/testdata/testtokenizer.py ================================================ [File too large to display: 1.5 KB] ================================================ FILE: test/python/testpipeline/testimage/__init__.py ================================================ ================================================ FILE: test/python/testpipeline/testimage/testcaption.py ================================================ [File too large to display: 460 B] ================================================ FILE: test/python/testpipeline/testimage/testimagehash.py ================================================ [File too large to display: 1.5 KB] ================================================ FILE: test/python/testpipeline/testimage/testobjects.py ================================================ [File too large to display: 962 B] ================================================ FILE: test/python/testpipeline/testllm/__init__.py ================================================ ================================================ FILE: test/python/testpipeline/testllm/testgenerator.py ================================================ [File too large to display: 440 B] ================================================ FILE: test/python/testpipeline/testllm/testlitellm.py ================================================ [File too large to display: 3.0 KB] ================================================ FILE: test/python/testpipeline/testllm/testllama.py ================================================ [File too large to display: 2.3 KB] ================================================ FILE: test/python/testpipeline/testllm/testllm.py ================================================ [File too large to display: 5.4 KB] ================================================ FILE: test/python/testpipeline/testllm/testopencode.py ================================================ [File too large to display: 1.5 KB] ================================================ FILE: test/python/testpipeline/testllm/testrag.py ================================================ [File too large to display: 6.9 KB] ================================================ FILE: test/python/testpipeline/testllm/testsequences.py ================================================ [File too large to display: 419 B] ================================================ FILE: test/python/testpipeline/testtext/__init__.py ================================================ ================================================ FILE: test/python/testpipeline/testtext/testentity.py ================================================ [File too large to display: 1.7 KB] ================================================ FILE: test/python/testpipeline/testtext/testlabels.py ================================================ [File too large to display: 2.7 KB] ================================================ FILE: test/python/testpipeline/testtext/testreranker.py ================================================ [File too large to display: 1.2 KB] ================================================ FILE: test/python/testpipeline/testtext/testsimilarity.py ================================================ [File too large to display: 3.4 KB] ================================================ FILE: test/python/testpipeline/testtext/testsummary.py ================================================ [File too large to display: 2.2 KB] ================================================ FILE: test/python/testpipeline/testtext/testtranslation.py ================================================ [File too large to display: 5.0 KB] ================================================ FILE: test/python/testpipeline/testtrain/__init__.py ================================================ ================================================ FILE: test/python/testpipeline/testtrain/testonnx.py ================================================ [File too large to display: 4.8 KB] ================================================ FILE: test/python/testpipeline/testtrain/testquantization.py ================================================ [File too large to display: 954 B] ================================================ FILE: test/python/testpipeline/testtrain/testtrainer.py ================================================ [File too large to display: 10.2 KB] ================================================ FILE: test/python/testscoring/__init__.py ================================================ ================================================ FILE: test/python/testscoring/testkeyword.py ================================================ [File too large to display: 14.8 KB] ================================================ FILE: test/python/testscoring/testsparse.py ================================================ [File too large to display: 6.6 KB] ================================================ FILE: test/python/testserialize.py ================================================ [File too large to display: 1.7 KB] ================================================ FILE: test/python/testvectors/__init__.py ================================================ ================================================ FILE: test/python/testvectors/testdense/__init__.py ================================================ ================================================ FILE: test/python/testvectors/testdense/testcustom.py ================================================ [File too large to display: 1.2 KB] ================================================ FILE: test/python/testvectors/testdense/testexternal.py ================================================ [File too large to display: 1.3 KB] ================================================ FILE: test/python/testvectors/testdense/testhuggingface.py ================================================ [File too large to display: 3.0 KB] ================================================ FILE: test/python/testvectors/testdense/testlitellm.py ================================================ [File too large to display: 1.9 KB] ================================================ FILE: test/python/testvectors/testdense/testllama.py ================================================ [File too large to display: 927 B] ================================================ FILE: test/python/testvectors/testdense/testm2v.py ================================================ [File too large to display: 891 B] ================================================ FILE: test/python/testvectors/testdense/testsbert.py ================================================ [File too large to display: 1.7 KB] ================================================ FILE: test/python/testvectors/testdense/testvectors.py ================================================ [File too large to display: 1.7 KB] ================================================ FILE: test/python/testvectors/testdense/testwordvectors.py ================================================ [File too large to display: 4.6 KB] ================================================ FILE: test/python/testvectors/testsparse/__init__.py ================================================ ================================================ FILE: test/python/testvectors/testsparse/testsbert.py ================================================ [File too large to display: 903 B] ================================================ FILE: test/python/testvectors/testsparse/testvectors.py ================================================ [File too large to display: 1.1 KB] ================================================ FILE: test/python/testworkflow.py ================================================ [File too large to display: 18.7 KB] ================================================ FILE: test/python/utils.py ================================================ [File too large to display: 111 B]